Merge "blueprint nova-image-cache-management phase1"

This commit is contained in:
Jenkins
2012-02-03 06:10:30 +00:00
committed by Gerrit Code Review
7 changed files with 673 additions and 1 deletions
+21
View File
@@ -109,6 +109,13 @@ compute_opts = [
help="Action to take if a running deleted instance is detected."
"Valid options are 'noop', 'log' and 'reap'. "
"Set to 'noop' to disable."),
cfg.BoolOpt("use_image_cache_manager",
default=False,
help="Whether to manage images in the local cache."),
cfg.IntOpt("image_cache_manager_interval",
default=3600,
help="Number of periodic scheduler ticks to wait between "
"runs of the image cache manager."),
]
FLAGS = flags.FLAGS
@@ -195,6 +202,7 @@ class ComputeManager(manager.SchedulerDependentManager):
self.network_manager = utils.import_object(FLAGS.network_manager)
self._last_host_check = 0
self._last_bw_usage_poll = 0
super(ComputeManager, self).__init__(service_name="compute",
*args, **kwargs)
@@ -2266,3 +2274,16 @@ class ComputeManager(manager.SchedulerDependentManager):
def remove_aggregate_host(self, context, aggregate_id, host):
"""Removes a host from a physical hypervisor pool."""
raise NotImplementedError()
@manager.periodic_task(
ticks_between_runs=FLAGS.image_cache_manager_interval)
def _run_image_cache_manager_pass(self, context):
"""Run a single pass of the image cache manager."""
if not FLAGS.use_image_cache_manager:
return
try:
self.driver.manage_image_cache(context)
except NotImplementedError:
pass
+241
View File
@@ -0,0 +1,241 @@
#!/usr/bin/python
# vim: tabstop=4 shiftwidth=4 softtabstop=4
# Copyright 2012 Michael Still and Canonical Inc
# All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import hashlib
import os
import shutil
import tempfile
import time
from nova import test
from nova import db
from nova import flags
from nova import log as logging
from nova.virt.libvirt import imagecache
from nova.virt.libvirt import utils as virtutils
FLAGS = flags.FLAGS
LOG = logging.getLogger('nova.tests.test_imagecache')
class ImageCacheManagerTestCase(test.TestCase):
def test_read_stored_checksum_missing(self):
self.stubs.Set(os.path, 'exists', lambda x: False)
csum = imagecache.read_stored_checksum('/tmp/foo')
self.assertEquals(csum, None)
def test_read_stored_checksum(self):
try:
dirname = tempfile.mkdtemp()
fname = os.path.join(dirname, 'aaa')
csum_input = 'fdghkfhkgjjksfdgjksjkghsdf'
f = open('%s.sha1' % fname, 'w')
f.write('%s\n' % csum_input)
f.close()
csum_output = imagecache.read_stored_checksum(fname)
self.assertEquals(csum_input, csum_output)
finally:
shutil.rmtree(dirname)
def test_list_base_images(self):
listing = ['00000001',
'ephemeral_0_20_None',
'e97222e91fc4241f49a7f520d1dcf446751129b3_sm',
'e09c675c2d1cfac32dae3c2d83689c8c94bc693b_sm',
'e97222e91fc4241f49a7f520d1dcf446751129b3',
'00000004']
self.stubs.Set(os, 'listdir', lambda x: listing)
self.stubs.Set(os.path, 'isfile', lambda x: True)
base_dir = '/var/lib/nova/instances/_base'
image_cache_manager = imagecache.ImageCacheManager()
image_cache_manager._list_base_images(base_dir)
self.assertEquals(len(image_cache_manager.unexplained_images), 3)
expected = os.path.join(base_dir,
'e97222e91fc4241f49a7f520d1dcf446751129b3')
self.assertTrue(expected in image_cache_manager.unexplained_images)
unexpected = os.path.join(base_dir, '00000004')
self.assertFalse(unexpected in image_cache_manager.unexplained_images)
for ent in image_cache_manager.unexplained_images:
self.assertTrue(ent.startswith(base_dir))
def test_list_running_instances(self):
self.stubs.Set(db, 'instance_get_all',
lambda x: [{'image_ref': 'image-1',
'host': FLAGS.host,
'name': 'inst-1'},
{'image_ref': 'image-2',
'host': FLAGS.host,
'name': 'inst-2'},
{'image_ref': 'image-2',
'host': 'remotehost',
'name': 'inst-3'}])
image_cache_manager = imagecache.ImageCacheManager()
# The argument here should be a context, but its mocked out
image_cache_manager._list_running_instances(None)
self.assertEqual(len(image_cache_manager.used_images), 2)
self.assertTrue(image_cache_manager.used_images['image-1'] ==
(1, 0, ['inst-1']))
self.assertTrue(image_cache_manager.used_images['image-2'] ==
(1, 1, ['inst-2', 'inst-3']))
self.assertEqual(len(image_cache_manager.image_popularity), 2)
self.assertEqual(image_cache_manager.image_popularity['image-1'], 1)
self.assertEqual(image_cache_manager.image_popularity['image-2'], 2)
def test_list_backing_images(self):
self.stubs.Set(os, 'listdir',
lambda x: ['_base', 'instance-00000001',
'instance-00000002', 'instance-00000003'])
self.stubs.Set(os.path, 'exists',
lambda x: x.find('instance-') != -1)
self.stubs.Set(virtutils, 'get_disk_backing_file',
lambda x: 'e97222e91fc4241f49a7f520d1dcf446751129b3_sm')
found = os.path.join(FLAGS.instances_path, '_base',
'e97222e91fc4241f49a7f520d1dcf446751129b3_sm')
image_cache_manager = imagecache.ImageCacheManager()
image_cache_manager.unexplained_images = [found]
inuse_images = image_cache_manager._list_backing_images()
self.assertEquals(inuse_images, [found])
self.assertEquals(len(image_cache_manager.unexplained_images), 0)
def test_find_base_file_nothing(self):
self.stubs.Set(os.path, 'exists', lambda x: False)
base_dir = '/var/lib/nova/instances/_base'
fingerprint = '549867354867'
image_cache_manager = imagecache.ImageCacheManager()
res = list(image_cache_manager._find_base_file(base_dir, fingerprint))
self.assertEqual(0, len(res))
def test_find_base_file_small(self):
self.stubs.Set(os.path, 'exists',
lambda x: x.endswith('549867354867_sm'))
base_dir = '/var/lib/nova/instances/_base'
fingerprint = '549867354867'
image_cache_manager = imagecache.ImageCacheManager()
res = list(image_cache_manager._find_base_file(base_dir, fingerprint))
base_file = os.path.join(base_dir, fingerprint + '_sm')
self.assertTrue(res == [(base_file, True)])
def test_find_base_file_both(self):
self.stubs.Set(os.path, 'exists', lambda x: True)
base_dir = '/var/lib/nova/instances/_base'
fingerprint = '549867354867'
image_cache_manager = imagecache.ImageCacheManager()
res = list(image_cache_manager._find_base_file(base_dir, fingerprint))
base_file1 = os.path.join(base_dir, fingerprint)
base_file2 = os.path.join(base_dir, fingerprint + '_sm')
self.assertTrue(res == [(base_file1, False), (base_file2, True)])
def test_verify_checksum(self):
testdata = ('OpenStack Software delivers a massively scalable cloud '
'operating system.')
img = {'container_format': 'ami', 'id': '42'}
try:
dirname = tempfile.mkdtemp()
fname = os.path.join(dirname, 'aaa')
f = open(fname, 'w')
f.write(testdata)
f.close()
# Checksum is valid
f = open('%s.sha1' % fname, 'w')
csum = hashlib.sha1()
csum.update(testdata)
f.write(csum.hexdigest())
f.close()
image_cache_manager = imagecache.ImageCacheManager()
res = image_cache_manager._verify_checksum(img, fname)
self.assertTrue(res)
# Checksum is invalid
f = open('%s.sha1' % fname, 'w')
f.write('banana')
f.close()
image_cache_manager = imagecache.ImageCacheManager()
res = image_cache_manager._verify_checksum(img, fname)
self.assertFalse(res)
# Checksum file missing
os.remove('%s.sha1' % fname)
image_cache_manager = imagecache.ImageCacheManager()
res = image_cache_manager._verify_checksum(img, fname)
self.assertEquals(res, None)
finally:
shutil.rmtree(dirname)
def test_remove_base_file(self):
try:
dirname = tempfile.mkdtemp()
fname = os.path.join(dirname, 'aaa')
f = open(fname, 'w')
f.write('data')
f.close()
f = open('%s.sha1' % fname, 'w')
f.close()
image_cache_manager = imagecache.ImageCacheManager()
image_cache_manager._remove_base_file(fname)
# Files are initially too new to delete
self.assertTrue(os.path.exists(fname))
self.assertTrue(os.path.exists('%s.sha1' % fname))
# Old files get cleaned up though
os.utime(fname, (-1, time.time() - 100000))
image_cache_manager._remove_base_file(fname)
self.assertFalse(os.path.exists(fname))
self.assertFalse(os.path.exists('%s.sha1' % fname))
finally:
shutil.rmtree(dirname)
+9
View File
@@ -17,7 +17,9 @@
import __builtin__
import mox
import datetime
import hashlib
import os
import StringIO
import tempfile
import nova
@@ -679,3 +681,10 @@ class DeprecationTest(test.TestCase):
self.mox.ReplayAll()
result = utils.service_is_up(service)
self.assertFalse(result)
def test_hash_file(self):
data = 'Mary had a little lamb, its fleece as white as snow'
flo = StringIO.StringIO(data)
h1 = utils.hash_file(flo)
h2 = hashlib.sha1(data).hexdigest()
self.assertEquals(h1, h2)
+8
View File
@@ -22,6 +22,7 @@
import contextlib
import datetime
import functools
import hashlib
import inspect
import json
import lockfile
@@ -1194,6 +1195,13 @@ def read_cached_file(filename, cache_info, reload_func=None):
return cache_info['data']
def hash_file(file_like_object):
"""Generate a hash for the contents of a file."""
checksum = hashlib.sha1()
any(map(checksum.update, iter(lambda: file_like_object.read(32768), '')))
return checksum.hexdigest()
@contextlib.contextmanager
def temporary_mutation(obj, **kwargs):
"""Temporarily set the attr on a particular object to a given value then
+11
View File
@@ -612,3 +612,14 @@ class ComputeDriver(object):
"""
# TODO(tr3buchet): update all subclasses and remove this
return True
def manage_image_cache(self, context):
"""
Manage the driver's local image cache.
Some drivers chose to cache images for instances on disk. This method
is an opportunity to do management of that cache which isn't directly
related to other calls into the driver. The prime example is to clean
the cache and remove images which are no longer of interest.
"""
raise NotImplementedError()
+17 -1
View File
@@ -67,6 +67,7 @@ from nova import utils
from nova.virt.disk import api as disk
from nova.virt import driver
from nova.virt import images
from nova.virt.libvirt import imagecache
from nova.virt.libvirt import utils as libvirt_utils
@@ -209,6 +210,8 @@ class LibvirtConnection(driver.ComputeDriver):
self.default_ephemeral_device = self._disk_prefix + 'b'
self.default_swap_device = self._disk_prefix + 'c'
self.image_cache_manager = imagecache.ImageCacheManager()
@property
def host_state(self):
if not self._host_state:
@@ -823,7 +826,6 @@ class LibvirtConnection(driver.ComputeDriver):
"""
generating = 'image_id' not in kwargs
if not os.path.exists(target):
base_dir = os.path.join(FLAGS.instances_path, '_base')
if not os.path.exists(base_dir):
@@ -866,6 +868,16 @@ class LibvirtConnection(driver.ComputeDriver):
"""Grab image to raw format"""
images.fetch_to_raw(context, image_id, target, user_id, project_id)
if FLAGS.checksum_base_images:
f = open(target, 'r')
checksum = utils.hash_file(f)
f.close()
checksum_fname = '%s.sha1' % target
fd = os.open(checksum_filename, os.O_WRONLY, mode=0444)
os.write(fd, checksum)
os.close(fd)
@staticmethod
def _create_local(target, local_size, unit='G', fs_format=None):
"""Create a blank image of specified size"""
@@ -1996,6 +2008,10 @@ class LibvirtConnection(driver.ComputeDriver):
"""Sets the specified host's ability to accept new instances."""
pass
def manage_image_cache(self, context):
"""Manage the local cache of images."""
self.image_cache_manager.verify_base_images(context)
class HostState(object):
"""Manages information about the compute node through libvirt"""
+366
View File
@@ -0,0 +1,366 @@
#!/usr/bin/python
# vim: tabstop=4 shiftwidth=4 softtabstop=4
# Copyright 2012 Michael Still and Canonical Inc
# All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
"""Image cache manager.
The cache manager implements the specification at
http://wiki.openstack.org/nova-image-cache-management.
"""
import datetime
import hashlib
import os
import sys
import time
from nova.common import cfg
from nova import compute
from nova import context as db_context
from nova import db
from nova import flags
from nova import image
from nova import log as logging
from nova import utils
from nova.virt.libvirt import utils as virtutils
LOG = logging.getLogger('nova.compute.imagecache')
imagecache_opts = [
cfg.StrOpt('instances_path',
default='$state_path/instances',
help='Where instances are stored on disk'),
cfg.BoolOpt('remove_unused_base_images',
default=False,
help='Should unused base images be removed?'),
cfg.IntOpt('remove_unused_minimum_age_seconds',
default=3600,
help='Unused base images younger than this will not be '
'removed'),
]
FLAGS = flags.FLAGS
FLAGS.add_options(imagecache_opts)
def read_stored_checksum(base_file):
"""Read the checksum which was created at image fetch time.
Returns the checksum (as hex) or None.
"""
checksum_file = '%s.sha1' % base_file
if not os.path.exists(checksum_file):
return None
f = open(checksum_file, 'r')
stored_checksum = f.read().rstrip()
f.close()
return stored_checksum
class ImageCacheManager(object):
def __init__(self):
self.unexplained_images = []
def _list_base_images(self, base_dir):
"""Return a list of the images present in _base.
Note that this does not return a value. It instead populates a class
variable with a list of images that we need to try and explain.
"""
# Determine what images we have on disk. There will be other files in
# this directory (for example kernels) so we only grab the ones which
# are the right length to be disk images.
self.unexplained_images = []
digest_size = hashlib.sha1().digestsize * 2
for ent in os.listdir(base_dir):
if len(ent) == digest_size or len(ent) == digest_size + 3:
entpath = os.path.join(base_dir, ent)
if os.path.isfile(entpath):
self.unexplained_images.append(entpath)
def _list_running_instances(self, context):
"""List running instances (on all compute nodes)."""
self.used_images = {}
self.image_popularity = {}
instances = db.instance_get_all(context)
for instance in instances:
image_ref_str = str(instance['image_ref'])
local, remote, insts = self.used_images.get(image_ref_str,
(0, 0, []))
if instance['host'] == FLAGS.host:
local += 1
else:
remote += 1
insts.append(instance['name'])
self.used_images[image_ref_str] = (local, remote, insts)
self.image_popularity.setdefault(image_ref_str, 0)
self.image_popularity[image_ref_str] += 1
def _list_backing_images(self):
"""List the backing images currently in use."""
inuse_images = []
for ent in os.listdir(FLAGS.instances_path):
if ent.startswith('instance-'):
disk_path = os.path.join(FLAGS.instances_path, ent, 'disk')
if os.path.exists(disk_path):
backing_file = virtutils.get_disk_backing_file(disk_path)
LOG.debug(_('Instance %(instance)s is backed by '
'%(backing)s'),
{'instance': ent,
'backing': backing_file})
backing_path = os.path.join(FLAGS.instances_path,
'_base', backing_file)
if not backing_path in inuse_images:
inuse_images.append(backing_path)
if backing_path in self.unexplained_images:
LOG.warning(_('Instance %(instance)s is using a '
'backing file %(backing)s which does '
'not appear in the image service'),
{'instance': ent,
'backing': backing_file})
self.unexplained_images.remove(backing_path)
return inuse_images
def _find_base_file(self, base_dir, fingerprint):
"""Find the base file matching this fingerprint.
Yields the name of the base file, and a boolean which is True if
the image is "small". Note that is is possible for more than one
yield to result from this check.
If no base file is found, then nothing is yielded.
"""
base_file = os.path.join(base_dir, fingerprint)
if os.path.exists(base_file):
yield base_file, False
base_file = os.path.join(base_dir, fingerprint + '_sm')
if os.path.exists(base_file):
yield base_file, True
def _verify_checksum(self, img, base_file):
"""Compare the checksum stored on disk with the current file.
Note that if the checksum fails to verify this is logged, but no actual
action occurs. This is something sysadmins should monitor for and
handle manually when it occurs.
"""
f = open(base_file, 'r')
current_checksum = utils.hash_file(f)
f.close()
stored_checksum = read_stored_checksum(base_file)
if stored_checksum:
if current_checksum != stored_checksum:
LOG.error(_('%(container_format)s-%(id)s '
'(%(base_file)s): '
'image verification failed'),
{'container_format': img['container_format'],
'id': img['id'],
'base_file': base_file})
return False
else:
return True
else:
LOG.debug(_('%(container_format)s-%(id)s (%(base_file)s): '
'image verification skipped, no hash stored'),
{'container_format': img['container_format'],
'id': img['id'],
'base_file': base_file})
return None
def _remove_base_file(self, base_file):
"""Remove a single base file if it is old enough.
Returns nothing.
"""
mtime = os.path.getmtime(base_file)
age = time.time() - mtime
if age < FLAGS.remove_unused_minimum_age_seconds:
LOG.info(_('Base file too young to remove: %s'),
base_file)
else:
LOG.info(_('Removing base file: %s'), base_file)
try:
os.remove(base_file)
signature = base_file + '.sha1'
if os.path.exists(signature):
os.remove(signature)
except OSError, e:
LOG.error(_('Failed to remove %(base_file)s, '
'error was %(error)s'),
{'base_file': base_file,
'error': e})
def _handle_base_image(self, img, base_file, image_small):
"""Handle the checks for a single base image."""
# TODO(mikal): Write a unit test for this method
image_bad = False
image_in_use = False
if base_file:
LOG.debug(_('%(container_format)s-%(id)s (%(base_file)s): '
'checking'),
{'container_format': img['container_format'],
'id': img['id'],
'base_file': base_file})
if base_file in self.unexplained_images:
self.unexplained_images.remove(base_file)
self._verify_checksum(img, base_file)
else:
LOG.debug(_('%(container_format)s-%(id)s (%(base_file)s): '
'base file absent'),
{'container_format': img['container_format'],
'id': img['id'],
'base_file': base_file})
base_file = None
instances = []
if str(img['id']) in self.used_images:
local, remote, instances = self.used_images[str(img['id'])]
if local > 0:
LOG.debug(_('%(container_format)s-%(id)s (%(base_file)s): '
'in use: on this node %(local)d local, '
'%(remote)d on other nodes'),
{'container_format': img['container_format'],
'id': img['id'],
'base_file': base_file,
'local': local,
'remote': remote})
image_in_use = True
self.active_base_files.append(base_file)
if not base_file:
LOG.warning(_('%(container_format)s-%(id)s '
'(%(base_file)s): warning -- an absent '
'base file is in use! instances: '
'%(instance_list)s'),
{'container_format':
img['container_format'],
'id': img['id'],
'base_file': base_file,
'instance_list': ' '.join(instances)})
else:
LOG.debug(_('%(container_format)s-%(id)s (%(base_file)s): '
'in: on other nodes (%(remote)d on other '
'nodes)'),
{'container_format': img['container_format'],
'id': img['id'],
'base_file': base_file,
'remote': remote})
if image_bad:
self.corrupt_base_files.append(base_file)
if base_file:
if not image_in_use:
LOG.debug(_('%(container_format)s-%(id)s (%(base_file)s): '
'image is not in use'),
{'container_format': img['container_format'],
'id': img['id'],
'base_file': base_file})
self.removable_base_files.append(base_file)
else:
LOG.debug(_('%(container_format)s-%(id)s (%(base_file)s): '
'image is in use'),
{'container_format': img['container_format'],
'id': img['id'],
'base_file': base_file})
def verify_base_images(self, context):
"""Verify that base images are in a reasonable state."""
# TODO(mikal): Write a unit test for this method
base_dir = os.path.join(FLAGS.instances_path, '_base')
if not os.path.exists(base_dir):
LOG.debug(_('Skipping verification, no base directory at %s'),
base_dir)
return
LOG.debug(_('Verify base images'))
self._list_base_images(base_dir)
self._list_running_instances(context)
# Determine what images are in glance
image_service = image.get_default_image_service()
self.active_base_files = []
self.corrupt_base_files = []
self.removable_base_files = []
# GlanceImageService.detail uses _fetch_images which handles pagination
# for us
for img in image_service.detail(context):
if img['container_format'] != 'ami':
continue
fingerprint = hashlib.sha1(str(img['id'])).hexdigest()
for base_file, image_small in self._find_base_file(base_dir,
fingerprint):
self._handle_base_image(img, base_file, image_small)
# Elements remaining in unexplained_images are not currently in
# glance. That doesn't mean that they're really not in use though
# (consider images which have been removed from glance but are still
# used by instances). So, we check the backing file for any running
# instances as well.
if self.unexplained_images:
inuse_backing_images = self._list_backing_images()
if inuse_backing_images:
for backing_path in inuse_backing_images:
self.active_base_files.append(backing_path)
# Anything left is an unknown base image
for img in self.unexplained_images:
LOG.warning(_('Unknown base file: %s'), img)
self.removable_base_files.append(img)
# Dump these lists
if self.active_base_files:
LOG.info(_('Active base files: %s'),
' '.join(self.active_base_files))
if self.corrupt_base_files:
LOG.info(_('Corrupt base files: %s'),
' '.join(self.corrupt_base_files))
if self.removable_base_files:
LOG.info(_('Removable base files: %s'),
' '.join(self.removable_base_files))
if FLAGS.remove_unused_base_images:
for base_file in self.removable_base_files:
self._remove_base_file(base_file)
# That's it
LOG.debug(_('Verification complete'))