From ec9d5e375e208686d33b9259b039cc009bded42e Mon Sep 17 00:00:00 2001 From: Ankit Agrawal Date: Mon, 10 Aug 2015 16:27:57 +1000 Subject: [PATCH] libvirt: Race condition leads to instance in error ImageCacheManager deletes base image while image backend is copying image to the instance path leading instance to go in the error state. Acquired lock before removing image from cache. If libvirt is copying image to the instance path, image cache manager won't be able to remove it until libvirt finishes copying image completely. Closes-Bug: 1256838 Closes-Bug: 1470437 Co-Authored-By: Michael Still Depends-On: I337ce28e2fc516c91bec61ca3639ebff0029ad49 Change-Id: I376cc951922c338669fdf3f83da83e0d3cea1532 --- etc/nova/rootwrap.d/compute.filters | 3 ++ .../unit/virt/libvirt/fake_libvirt_utils.py | 4 ++ .../unit/virt/libvirt/test_imagecache.py | 49 ++++++++++++------- nova/tests/unit/virt/test_images.py | 2 +- nova/virt/images.py | 3 +- nova/virt/libvirt/imagebackend.py | 9 ++++ nova/virt/libvirt/imagecache.py | 28 +++++++---- nova/virt/libvirt/utils.py | 8 +++ ...wrap_compute_filters-428ca239f2e4e63d.yaml | 13 +++++ 9 files changed, 88 insertions(+), 31 deletions(-) create mode 100644 releasenotes/notes/upgrade_rootwrap_compute_filters-428ca239f2e4e63d.yaml diff --git a/etc/nova/rootwrap.d/compute.filters b/etc/nova/rootwrap.d/compute.filters index a731178aa3..c846b89ecd 100644 --- a/etc/nova/rootwrap.d/compute.filters +++ b/etc/nova/rootwrap.d/compute.filters @@ -246,3 +246,6 @@ ploop: CommandFilter, ploop, root # nova/virt/libvirt/utils.py: 'xend', 'status' xend: CommandFilter, xend, root + +# nova/virt/libvirt/utils.py: +touch: CommandFilter, touch, root diff --git a/nova/tests/unit/virt/libvirt/fake_libvirt_utils.py b/nova/tests/unit/virt/libvirt/fake_libvirt_utils.py index b0e1ee2f58..ebd2f73580 100644 --- a/nova/tests/unit/virt/libvirt/fake_libvirt_utils.py +++ b/nova/tests/unit/virt/libvirt/fake_libvirt_utils.py @@ -77,6 +77,10 @@ def chown(path, owner): pass +def update_mtime(path): + pass + + def extract_snapshot(disk_path, source_fmt, out_path, dest_fmt): files[out_path] = '' diff --git a/nova/tests/unit/virt/libvirt/test_imagecache.py b/nova/tests/unit/virt/libvirt/test_imagecache.py index 9c6685c1bb..62c193be54 100644 --- a/nova/tests/unit/virt/libvirt/test_imagecache.py +++ b/nova/tests/unit/virt/libvirt/test_imagecache.py @@ -20,6 +20,7 @@ import os import time import mock +from oslo_concurrency import lockutils from oslo_concurrency import processutils from oslo_config import cfg from oslo_log import formatters @@ -461,34 +462,32 @@ class ImageCacheManagerTestCase(test.NoDBTestCase): [fname]) self.assertEqual(image_cache_manager.corrupt_base_files, []) - def test_handle_base_image_used(self): - self.stubs.Set(libvirt_utils, 'chown', lambda x, y: None) + @mock.patch.object(libvirt_utils, 'update_mtime') + def test_handle_base_image_used(self, mock_mtime): img = '123' with self._make_base_file() as fname: - os.utime(fname, (-1, time.time() - 3601)) - image_cache_manager = imagecache.ImageCacheManager() image_cache_manager.unexplained_images = [fname] image_cache_manager.used_images = {'123': (1, 0, ['banana-42'])} image_cache_manager._handle_base_image(img, fname) + mock_mtime.assert_called_once_with(fname) self.assertEqual(image_cache_manager.unexplained_images, []) self.assertEqual(image_cache_manager.removable_base_files, []) self.assertEqual(image_cache_manager.corrupt_base_files, []) - def test_handle_base_image_used_remotely(self): - self.stubs.Set(libvirt_utils, 'chown', lambda x, y: None) + @mock.patch.object(libvirt_utils, 'update_mtime') + def test_handle_base_image_used_remotely(self, mock_mtime): img = '123' with self._make_base_file() as fname: - os.utime(fname, (-1, time.time() - 3601)) - image_cache_manager = imagecache.ImageCacheManager() image_cache_manager.unexplained_images = [fname] image_cache_manager.used_images = {'123': (0, 1, ['banana-42'])} image_cache_manager._handle_base_image(img, fname) + mock_mtime.assert_called_once_with(fname) self.assertEqual(image_cache_manager.unexplained_images, []) self.assertEqual(image_cache_manager.removable_base_files, []) self.assertEqual(image_cache_manager.corrupt_base_files, []) @@ -527,9 +526,9 @@ class ImageCacheManagerTestCase(test.NoDBTestCase): self.assertEqual(image_cache_manager.removable_base_files, []) self.assertEqual(image_cache_manager.corrupt_base_files, []) - def test_handle_base_image_checksum_fails(self): + @mock.patch.object(libvirt_utils, 'update_mtime') + def test_handle_base_image_checksum_fails(self, mock_mtime): self.flags(checksum_base_images=True, group='libvirt') - self.stubs.Set(libvirt_utils, 'chown', lambda x, y: None) img = '123' @@ -546,12 +545,15 @@ class ImageCacheManagerTestCase(test.NoDBTestCase): image_cache_manager.used_images = {'123': (1, 0, ['banana-42'])} image_cache_manager._handle_base_image(img, fname) + mock_mtime.assert_called_once_with(fname) self.assertEqual(image_cache_manager.unexplained_images, []) self.assertEqual(image_cache_manager.removable_base_files, []) self.assertEqual(image_cache_manager.corrupt_base_files, [fname]) - def test_verify_base_images(self): + @mock.patch.object(libvirt_utils, 'update_mtime') + @mock.patch.object(lockutils, 'external_lock') + def test_verify_base_images(self, mock_lock, mock_mtime): hashed_1 = '356a192b7913b04c54574d18c28d46e6395428ab' hashed_21 = '472b07b9fcf2c2451e8781e944bf5f77cd8457c8' hashed_22 = '12c6fc06c99a462375eeb3f43dfd832b08ca9e17' @@ -607,11 +609,6 @@ class ImageCacheManagerTestCase(test.NoDBTestCase): self.stub_out('os.path.exists', lambda x: exists(x)) - self.stubs.Set(libvirt_utils, 'chown', lambda x, y: None) - - # We need to stub utime as well - self.stub_out('os.utime', lambda x, y: None) - # Fake up some instances in the instances directory orig_listdir = os.listdir @@ -857,13 +854,13 @@ class ImageCacheManagerTestCase(test.NoDBTestCase): expect_set = set(['swap_123', 'swap_456']) self.assertEqual(image_cache_manager.back_swap_images, expect_set) - @mock.patch.object(libvirt_utils, 'chown') + @mock.patch.object(lockutils, 'external_lock') + @mock.patch.object(libvirt_utils, 'update_mtime') @mock.patch('os.path.exists', return_value=True) - @mock.patch('os.utime') @mock.patch('os.path.getmtime') @mock.patch('os.remove') def test_age_and_verify_swap_images(self, mock_remove, mock_getmtime, - mock_utime, mock_exist, mock_chown): + mock_exist, mock_mtime, mock_lock): image_cache_manager = imagecache.ImageCacheManager() expected_remove = set() expected_exist = set(['swap_128', 'swap_256']) @@ -894,6 +891,20 @@ class ImageCacheManagerTestCase(test.NoDBTestCase): self.assertIn('swap_128', expected_exist) self.assertIn('swap_256', expected_remove) + @mock.patch.object(utils, 'synchronized') + @mock.patch.object(imagecache.ImageCacheManager, '_get_age_of_file', + return_value=(True, 100)) + def test_lock_acquired_on_removing_old_enough_files(self, mock_get_age, + mock_synchronized): + base_file = '/tmp_age_test' + lock_path = os.path.join(CONF.instances_path, 'locks') + lock_file = os.path.split(base_file)[-1] + image_cache_manager = imagecache.ImageCacheManager() + image_cache_manager._remove_old_enough_file( + base_file, 60, remove_sig=False, remove_lock=False) + mock_synchronized.assert_called_once_with(lock_file, external=True, + lock_path=lock_path) + class VerifyChecksumTestCase(test.NoDBTestCase): diff --git a/nova/tests/unit/virt/test_images.py b/nova/tests/unit/virt/test_images.py index 46857ac09d..b0fc20b01b 100644 --- a/nova/tests/unit/virt/test_images.py +++ b/nova/tests/unit/virt/test_images.py @@ -25,7 +25,7 @@ from nova.virt import images class QemuTestCase(test.NoDBTestCase): def test_qemu_info_with_bad_path(self): - self.assertRaises(exception.InvalidDiskInfo, + self.assertRaises(exception.DiskNotFound, images.qemu_img_info, '/path/that/does/not/exist') diff --git a/nova/virt/images.py b/nova/virt/images.py index adaa009754..395866d911 100644 --- a/nova/virt/images.py +++ b/nova/virt/images.py @@ -54,8 +54,7 @@ def qemu_img_info(path, format=None): CONF.import_opt('images_type', 'nova.virt.libvirt.imagebackend', group='libvirt') if not os.path.exists(path) and CONF.libvirt.images_type != 'rbd': - msg = (_("Path does not exist %(path)s") % {'path': path}) - raise exception.InvalidDiskInfo(reason=msg) + raise exception.DiskNotFound(location=path) try: cmd = ('env', 'LC_ALL=C', 'LANG=C', 'qemu-img', 'info', path) diff --git a/nova/virt/libvirt/imagebackend.py b/nova/virt/libvirt/imagebackend.py index e9d516c8cf..fd52b17e93 100644 --- a/nova/virt/libvirt/imagebackend.py +++ b/nova/virt/libvirt/imagebackend.py @@ -531,10 +531,15 @@ class Raw(Image): else: if not os.path.exists(base): prepare_template(target=base, max_size=size, *args, **kwargs) + + # NOTE(mikal): Update the mtime of the base file so the image + # cache manager knows it is in use. + libvirt_utils.update_mtime(base) self.verify_base_size(base, size) if not os.path.exists(self.path): with fileutils.remove_path_on_error(self.path): copy_raw_image(base, self.path, size) + self.correct_format() def resize_image(self, size): @@ -584,6 +589,10 @@ class Qcow2(Image): # Download the unmodified base image unless we already have a copy. if not os.path.exists(base): prepare_template(target=base, max_size=size, *args, **kwargs) + + # NOTE(ankit): Update the mtime of the base file so the image + # cache manager knows it is in use. + libvirt_utils.update_mtime(base) self.verify_base_size(base, size) legacy_backing_size = None diff --git a/nova/virt/libvirt/imagecache.py b/nova/virt/libvirt/imagecache.py index 0553bd00b3..efdd5483c6 100644 --- a/nova/virt/libvirt/imagecache.py +++ b/nova/virt/libvirt/imagecache.py @@ -449,10 +449,18 @@ class ImageCacheManager(imagecache.ImageCacheManager): if not exists: return - if age < maxage: - LOG.info(_LI('Base or swap file too young to remove: %s'), - base_file) - else: + lock_file = os.path.split(base_file)[-1] + + @utils.synchronized(lock_file, external=True, + lock_path=self.lock_path) + def _inner_remove_old_enough_file(): + # NOTE(mikal): recheck that the file is old enough, as a new + # user of the file might have come along while we were waiting + # for the lock + exists, age = self._get_age_of_file(base_file) + if not exists or age < maxage: + return + LOG.info(_LI('Removing base or swap file: %s'), base_file) try: os.remove(base_file) @@ -466,6 +474,11 @@ class ImageCacheManager(imagecache.ImageCacheManager): {'base_file': base_file, 'error': e}) + if age < maxage: + LOG.info(_LI('Base or swap file too young to remove: %s'), + base_file) + else: + _inner_remove_old_enough_file() if remove_lock: try: # NOTE(jichenjc) The lock file will be constructed first @@ -473,7 +486,6 @@ class ImageCacheManager(imagecache.ImageCacheManager): # like nova-9e881789030568a317fad9daae82c5b1c65e0d4a # or nova-03d8e206-6500-4d91-b47d-ee74897f9b4e # according to the original file name - lock_file = os.path.split(base_file)[-1] lockutils.remove_external_lock_file(lock_file, lock_file_prefix='nova-', lock_path=self.lock_path) except OSError as e: @@ -562,8 +574,7 @@ class ImageCacheManager(imagecache.ImageCacheManager): {'id': img_id, 'base_file': base_file}) if os.path.exists(base_file): - libvirt_utils.chown(base_file, os.getuid()) - os.utime(base_file, None) + libvirt_utils.update_mtime(base_file) def _age_and_verify_swap_images(self, context, base_dir): LOG.debug('Verify swap images') @@ -571,8 +582,7 @@ class ImageCacheManager(imagecache.ImageCacheManager): for ent in self.back_swap_images: base_file = os.path.join(base_dir, ent) if ent in self.used_swap_images and os.path.exists(base_file): - libvirt_utils.chown(base_file, os.getuid()) - os.utime(base_file, None) + libvirt_utils.update_mtime(base_file) elif self.remove_unused_base_images: self._remove_swap_file(base_file) diff --git a/nova/virt/libvirt/utils.py b/nova/virt/libvirt/utils.py index e410bd2a31..fabf8fea3b 100644 --- a/nova/virt/libvirt/utils.py +++ b/nova/virt/libvirt/utils.py @@ -248,6 +248,14 @@ def chown(path, owner): execute('chown', owner, path, run_as_root=True) +def update_mtime(path): + """Touch a file without being the owner. + + :param path: File bump the mtime on + """ + execute('touch', '-c', path, run_as_root=True) + + def _id_map_to_config(id_map): return "%s:%s:%s" % (id_map.start, id_map.target, id_map.count) diff --git a/releasenotes/notes/upgrade_rootwrap_compute_filters-428ca239f2e4e63d.yaml b/releasenotes/notes/upgrade_rootwrap_compute_filters-428ca239f2e4e63d.yaml new file mode 100644 index 0000000000..652a56f531 --- /dev/null +++ b/releasenotes/notes/upgrade_rootwrap_compute_filters-428ca239f2e4e63d.yaml @@ -0,0 +1,13 @@ +--- +upgrade: + - Upgrade the rootwrap configuration for the compute service, + so that patches requiring new rootwrap configuration can be + tested with grenade. +fixes: + - In a race condition if base image is deleted by ImageCacheManager + while imagebackend is copying the image to instance path, then the + instance goes in to error state. In this case when libvirt has + changed the base file ownership to libvirt-qemu while imagebackend + is copying the image, then we get permission denied error on updating + the file access time using os.utime. Fixed this issue by updating the + base file access time with root user privileges using 'touch' command. \ No newline at end of file