diff --git a/nova/compute/manager.py b/nova/compute/manager.py index 692044a603..834e2889c2 100644 --- a/nova/compute/manager.py +++ b/nova/compute/manager.py @@ -22,7 +22,6 @@ handles RPC calls relating to creating instances. It is responsible for building a disk image, launching it via the underlying virtualization driver, responding to calls to check its state, attaching persistent storage, and terminating it. - """ import base64 @@ -3328,6 +3327,21 @@ class ComputeManager(manager.Manager): if original_exception is not None and raise_exc: raise original_exception + def _get_multiattach_volume_lock_names_bdms( + self, bdms: objects.BlockDeviceMappingList) -> ty.List[str]: + """Get the lock names for multiattach volumes. + + :param bdms: BlockDeviceMappingList object + :return: List of lock names + """ + if not bdms: + return [] + return [ + f"multi_attach_volume_{bdm.volume_id}" + for bdm in bdms + if bdm.is_multiattach + ] + def _delete_instance(self, context, instance, bdms): """Delete an instance on this host. @@ -3345,35 +3359,38 @@ class ComputeManager(manager.Manager): compute_utils.notify_about_instance_action(context, instance, self.host, action=fields.NotificationAction.DELETE, phase=fields.NotificationPhase.START, bdms=bdms) + with utils.FairLockGuard( + self._get_multiattach_volume_lock_names_bdms(bdms) + ): + self._shutdown_instance(context, instance, bdms) - self._shutdown_instance(context, instance, bdms) + # NOTE(vish): We have already deleted the instance, so we have + # to ignore problems cleaning up the volumes. It + # would be nice to let the user know somehow that + # the volume deletion failed, but it is not + # acceptable to have an instance that can not be + # deleted. Perhaps this could be reworked in the + # future to set an instance fault the first time + # and to only ignore the failure if the instance + # is already in ERROR. - # NOTE(vish): We have already deleted the instance, so we have - # to ignore problems cleaning up the volumes. It - # would be nice to let the user know somehow that - # the volume deletion failed, but it is not - # acceptable to have an instance that can not be - # deleted. Perhaps this could be reworked in the - # future to set an instance fault the first time - # and to only ignore the failure if the instance - # is already in ERROR. + # NOTE(ameeda): The volumes have already been detached during + # the above _shutdown_instance() call and this is + # why detach is not requested from + # _cleanup_volumes() in this case - # NOTE(ameeda): The volumes have already been detached during - # the above _shutdown_instance() call and this is - # why detach is not requested from - # _cleanup_volumes() in this case + self._cleanup_volumes(context, instance, bdms, + raise_exc=False, detach=False) + # if a delete task succeeded, always update vm state and task + # state without expecting task state to be DELETING + instance.vm_state = vm_states.DELETED + instance.task_state = None + instance.power_state = power_state.NOSTATE + instance.terminated_at = timeutils.utcnow() + instance.save() - self._cleanup_volumes(context, instance, bdms, - raise_exc=False, detach=False) # Delete Cyborg ARQs if the instance has a device profile. compute_utils.delete_arqs_if_needed(context, instance) - # if a delete task succeeded, always update vm state and task - # state without expecting task state to be DELETING - instance.vm_state = vm_states.DELETED - instance.task_state = None - instance.power_state = power_state.NOSTATE - instance.terminated_at = timeutils.utcnow() - instance.save() self._complete_deletion(context, instance) # only destroy the instance in the db if the _complete_deletion diff --git a/nova/objects/block_device.py b/nova/objects/block_device.py index 813fc33069..22d2a222fb 100644 --- a/nova/objects/block_device.py +++ b/nova/objects/block_device.py @@ -15,6 +15,7 @@ from oslo_db import api as oslo_db_api from oslo_db.sqlalchemy import update_match from oslo_log import log as logging +from oslo_serialization import jsonutils from oslo_utils import uuidutils from oslo_utils import versionutils @@ -316,6 +317,26 @@ class BlockDeviceMapping(base.NovaPersistentObject, base.NovaObject, return (self.destination_type == fields.BlockDeviceDestinationType.VOLUME) + @property + def is_multiattach(self): + """Return whether the volume is multiattach. + + Note that this property is only valid for volumes that are attached to + an instance. If the volume is not attached to an instance, this + property will return False. + + :returns: True if the volume is multiattach, False otherwise + """ + + if not self.is_volume: + return False + if 'connection_info' not in self: + return False + if not self.connection_info: + return False + info = jsonutils.loads(self.connection_info) + return info.get("multiattach", False) + @property def is_image(self): return self.source_type == fields.BlockDeviceSourceType.IMAGE diff --git a/nova/tests/functional/regressions/test_bug_2048837.py b/nova/tests/functional/regressions/test_bug_2048837.py index 09c75ae398..f282087526 100644 --- a/nova/tests/functional/regressions/test_bug_2048837.py +++ b/nova/tests/functional/regressions/test_bug_2048837.py @@ -11,13 +11,11 @@ # under the License. import fixtures -import threading -import time + +from oslo_concurrency import lockutils from unittest import mock -from nova import context as nova_context -from nova import objects from nova.tests.functional.libvirt import base from nova.virt import libvirt @@ -66,15 +64,22 @@ class TestConcurrentMultiAttachCleanup(base.ServersTestBase): self.server_b = self._create_server(networks='none') self.notifier.wait_for_versioned_notifications('instance.create.end') self._attach_volume(self.server_b, self.volume_id) - self.lock = threading.Lock() + # we use a reader writer lock because the allow any number of readers + # to progress as long as no one holds the write lock. + self.lock = lockutils.ReaderWriterLock() # run periodics to allow async tasks to complete self._run_periodics() def _should_disconnect(self, *args, **kwargs): - with self.lock: + # use a read locks to not block concurrent requests + # when the write lock is not held. + self.lock.acquire_read_lock() + try: result = self._orgi_should_disconnect( self.compute_manager.driver, *args, **kwargs) return result + finally: + self.lock.release_read_lock() def test_serial_server_delete(self): # Now that we have 2 vms both using the same multi attach volume @@ -83,10 +88,12 @@ class TestConcurrentMultiAttachCleanup(base.ServersTestBase): self.disconnect_volume_mock.assert_not_called() self._delete_server(self.server_a) - self.should_disconnect_mock.assert_called() + self.should_disconnect_mock.assert_called_once() + self.should_disconnect_mock.reset_mock() self.disconnect_volume_mock.assert_not_called() self._delete_server(self.server_b) self.disconnect_volume_mock.assert_called() + self.should_disconnect_mock.assert_called_once() def test_concurrent_server_delete(self): # Now that we have 2 vms both using the same multi attach volume @@ -94,33 +101,20 @@ class TestConcurrentMultiAttachCleanup(base.ServersTestBase): # cleaning up self.should_disconnect_mock.assert_not_called() self.disconnect_volume_mock.assert_not_called() - # emulate concurrent delete - context = nova_context.get_admin_context() - servers_this_host = objects.InstanceList.get_uuids_by_host( - context, self.hostname) - with mock.patch('nova.objects.InstanceList.get_uuids_by_host', - return_value=servers_this_host): - self.lock.acquire() - self.api.delete_server(self.server_a['id']) - self.api.delete_server(self.server_b['id']) - self.disconnect_volume_mock.assert_not_called() - # this mostly stabilizes the test but it may not be 100% reliable. - # locally with time.sleep(1) it passed 42 back to back executions - # im not sure why this is required given the lock but it is likely - # due to a the lock being released before a background task is - # completed. i.e. the conductor or resource trakcer updating state. - time.sleep(1) - self.lock.release() + # emulate concurrent delete by acquiring the lock to prevent + # the delete from progressing to far. We want to pause + # at the call to _should_disconnect so we acquire the write + # lock to block all readers. + self.lock.acquire_write_lock() + self.api.delete_server(self.server_a['id']) + self.api.delete_server(self.server_b['id']) + self.disconnect_volume_mock.assert_not_called() + # now that both delete are submitted and we are stopped at + # nova.virt.libvirt.LibvirtDriver._should_disconnect_target + # we can release the lock and allow the deletes to complete. + self.lock.release_write_lock() self._wait_until_deleted(self.server_a) self._wait_until_deleted(self.server_b) - self.should_disconnect_mock.assert_called() - # Fixme(sean-k-mooney): this is bug 2048837 - try: - self.disconnect_volume_mock.assert_not_called() - except AssertionError: - # NOTE(sean-k-mooney): this reproducer is not 100% - # reliable so we convert a failure to a skip to avoid - # gating issues. the bug is addressed in the follow up patch - # and that test is stable so it is not worth fixing this test - # beyond the time.sleep(1) above. - self.skipTest("Bug 2048837: volume disconnect not called") + self.assertEqual(2, len(self.should_disconnect_mock.call_args_list)) + # this validates bug 2048837 + self.disconnect_volume_mock.assert_called_once() diff --git a/nova/tests/unit/compute/test_compute_mgr.py b/nova/tests/unit/compute/test_compute_mgr.py index c8a23351fb..b5d2b3820c 100644 --- a/nova/tests/unit/compute/test_compute_mgr.py +++ b/nova/tests/unit/compute/test_compute_mgr.py @@ -398,6 +398,26 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase, startup=False, ) + def test__get_multiattach_volume_lock_names_bdms(self): + bdms = objects.BlockDeviceMappingList( + objects=[ + objects.BlockDeviceMapping( + volume_id=uuids.volume1, + destination_type='volume', + instance_uuid=uuids.instance, + attachment_id=uuids.attachment1), + objects.BlockDeviceMapping( + volume_id=uuids.volume2, + destination_type='volume', + instance_uuid=uuids.instance, + attachment_id=uuids.attachment2, + connection_info='{"multiattach": true}'), + ] + ) + lock_names = self.compute._get_multiattach_volume_lock_names_bdms(bdms) + expected_lock_names = [f"multi_attach_volume_{uuids.volume2}"] + self.assertEqual(expected_lock_names, lock_names) + @mock.patch('nova.compute.manager.LOG') def test_update_available_resource_for_node_reshape_failed(self, log_mock): """ReshapeFailed logs and reraises.""" diff --git a/nova/tests/unit/objects/test_block_device.py b/nova/tests/unit/objects/test_block_device.py index 85959a961a..cf019b9eac 100644 --- a/nova/tests/unit/objects/test_block_device.py +++ b/nova/tests/unit/objects/test_block_device.py @@ -251,6 +251,19 @@ class _TestBlockDeviceMappingObject(object): destination_type='local') self.assertFalse(bdm.is_volume) + def test_is_mutiattach_true(self): + bdm = objects.BlockDeviceMapping( + context=self.context, volume_id=uuids.volume_id, + destination_type='volume', + connection_info='{"multiattach": true}') + self.assertTrue(bdm.is_multiattach) + + def test_is_multiattach_false(self): + bdm = objects.BlockDeviceMapping( + context=self.context, destination_type='volume', + volume_id=uuids.volume_id) + self.assertFalse(bdm.is_multiattach) + def test_is_local(self): self.assertTrue( objects.BlockDeviceMapping( diff --git a/nova/tests/unit/test_utils.py b/nova/tests/unit/test_utils.py index 8a5d120602..339834bbfc 100644 --- a/nova/tests/unit/test_utils.py +++ b/nova/tests/unit/test_utils.py @@ -15,7 +15,9 @@ import datetime import hashlib import os +import os.path import threading +import time from unittest import mock import fixtures @@ -1705,3 +1707,261 @@ class OsloServiceBackendSelectionTestCase(test.NoDBTestCase): ex = self.assertRaises(ValueError, monkey_patch.patch, backend='foo') self.assertEqual( "the backend can only be 'eventlet' or 'threading'", str(ex)) + + +class TestFairLockGuard(test.NoDBTestCase): + + def test_aquire_single_lock(self): + lock_name = 'test_aquire_single_lock' + test_lock = utils.NOVA_FAIR_LOCKS.get(lock_name) + self.assertFalse(test_lock.has_pending_writers) + lock_guard = utils.FairLockGuard([lock_name]) + with lock_guard: + self.assertTrue(lock_guard.is_locked()) + self.assertIs(lock_guard.locks[0], test_lock) + self.assertTrue(test_lock.is_writer()) + + def test_aquire_multiple_locks(self): + lock_names = ['test_aquire_multiple_locks1', + 'test_aquire_multiple_locks2'] + test_locks = [utils.NOVA_FAIR_LOCKS.get( + lock_name) for lock_name in lock_names] + for lock in test_locks: + self.assertFalse(lock.has_pending_writers) + lock_guard = utils.FairLockGuard(lock_names) + with lock_guard: + self.assertTrue(lock_guard.is_locked()) + for i, lock in enumerate(lock_guard.locks): + self.assertIs(lock, test_locks[i]) + self.assertTrue(lock_guard.locks[i].is_writer()) + + def test_aquire_multiple_locks_sorted(self): + lock_names = ['test_aquire_multiple_locks_sorted2', + 'test_aquire_multiple_locks_sorted1'] + self.assertNotEqual(lock_names, sorted(lock_names)) + test_locks = [utils.NOVA_FAIR_LOCKS.get( + lock_name) for lock_name in lock_names] + for lock in test_locks: + self.assertFalse(lock.has_pending_writers) + lock_guard = utils.FairLockGuard(lock_names) + with lock_guard: + self.assertTrue(lock_guard.is_locked()) + for i, name in enumerate(sorted(lock_names)): + self.assertIs(lock_guard.locks[i], + utils.NOVA_FAIR_LOCKS.get(name)) + self.assertTrue(lock_guard.locks[i].is_writer()) + + def test_locks_are_released_on_exception(self): + lock_names = ['test_locks_are_released_on_exception1', + 'test_locks_are_released_on_exception2'] + test_locks = [utils.NOVA_FAIR_LOCKS.get( + lock_name) for lock_name in lock_names] + lock_guard = utils.FairLockGuard(lock_names) + try: + with lock_guard: + self.assertTrue(lock_guard.is_locked()) + raise ValueError() + except ValueError: + pass + self.assertFalse(lock_guard.is_locked()) + for lock in test_locks: + self.assertFalse(lock.is_writer()) + + def test_partial_acquire(self): + lock_names = ['test_partial_acquire1', 'test_partial_acquire2'] + test_locks = [utils.NOVA_FAIR_LOCKS.get( + lock_name) for lock_name in lock_names] + + lock_acquired = threading.Event() + lock_released = threading.Event() + worker_continue = threading.Event() + + def worker(): + test_locks[0].acquire_write_lock() + lock_acquired.set() + worker_continue.wait() + time.sleep(1) + test_locks[0].release_write_lock() + lock_released.set() + + thread = threading.Thread(target=worker) + thread.start() + lock_guard = utils.FairLockGuard(lock_names) + self.assertFalse(lock_guard.is_locked()) + lock_acquired.wait() + # we should not have acquired the lock yet + # as the worker thread is still holding it + self.assertFalse(test_locks[0].is_writer()) + # Note the first lock is still held by the worker thread + # so this will block until the worker releases it + worker_continue.set() + with lock_guard: + self.assertTrue(lock_guard.is_locked()) + self.assertTrue(test_locks[0].is_writer()) + self.assertTrue(test_locks[1].is_writer()) + self.assertTrue(lock_released.is_set()) + thread.join() + self.assertFalse(lock_guard.is_locked()) + self.assertFalse(test_locks[0].is_writer()) + self.assertFalse(test_locks[1].is_writer()) + + def test_thread_ordering_subset_locks(self): + """Test that a thread requesting a subset of locks waits for the + first thread that requested all locks to complete first. + """ + lock_names_t1 = ['test_thread_ordering_A', + 'test_thread_ordering_B', + 'test_thread_ordering_C'] + lock_names_t2 = ['test_thread_ordering_A', + 'test_thread_ordering_B'] + test_locks = [utils.NOVA_FAIR_LOCKS.get( + lock_name) for lock_name in lock_names_t1] + + t1_acquired = threading.Event() + t1_acquired_time = None + t1_released = threading.Event() + t2_started = threading.Event() + t2_acquired = threading.Event() + t2_acquired_time = None + t1_continue = threading.Event() + + def thread1(): + lock_guard = utils.FairLockGuard(lock_names_t1) + with lock_guard: + nonlocal t1_acquired_time + t1_acquired_time = time.time() + t1_acquired.set() + # Wait for T2 to start trying to acquire locks + t2_started.wait() + # Give T2 time to block on the locks + time.sleep(0.1) + t1_continue.wait() + # Hold locks for a bit to ensure T2 is waiting + time.sleep(0.5) + t1_released.set() + + def thread2(): + t2_started.set() + lock_guard = utils.FairLockGuard(lock_names_t2) + # This should block until T1 releases all locks + with lock_guard: + nonlocal t2_acquired_time + t2_acquired_time = time.time() + t2_acquired.set() + + t1 = threading.Thread(target=thread1) + t2 = threading.Thread(target=thread2) + + t1.start() + # Wait for T1 to acquire all locks + self.assertTrue(t1_acquired.wait(timeout=2)) + self.assertIsNotNone(t1_acquired_time) + t2.start() + # Wait for T2 to start trying to acquire locks + self.assertTrue(t2_started.wait(timeout=2)) + # T2 should not have acquired locks yet + self.assertIsNone(t2_acquired_time) + t1_continue.set() + t1.join() + t2.join() + + # T1 should have released all locks + self.assertTrue(t1_released.is_set()) + # T2 should have acquired its subset of locks + self.assertTrue(t2_acquired.is_set()) + self.assertIsNotNone(t2_acquired_time) + # T1 should have acquired its locks before T2 + self.assertLess(t1_acquired_time, t2_acquired_time) + + # and since all thread are now finished, + # all locks should be released + for lock in test_locks: + self.assertFalse(lock.is_writer()) + + def test_context_manager_shared_between_threads(self): + """Test that a context manager can be shared between threads + and that the locks are acquired in the correct order. + Do not do this in production code, while it works this is not + the intended usage pattern. + """ + lock_names = ['test_context_manager_shared_between_threads1', + 'test_context_manager_shared_between_threads2'] + lock_guard = utils.FairLockGuard(lock_names) + sum = 0 + started = threading.Event() + + def worker(id, lock_guard): + started.wait() + nonlocal sum + with lock_guard: + sum += id + thread1 = threading.Thread(target=worker, args=(1, lock_guard)) + thread2 = threading.Thread(target=worker, args=(2, lock_guard)) + thread1.start() + thread2.start() + started.set() + thread1.join() + thread2.join() + self.assertEqual(3, sum) + + def test_context_manager_can_be_acquired_multiple_times(self): + lock_names = ['test_context_manager_can_be_acquired_multiple_times1', + 'test_context_manager_can_be_acquired_multiple_times2'] + test_locks = [utils.NOVA_FAIR_LOCKS.get( + lock_name) for lock_name in lock_names] + lock_guard = utils.FairLockGuard(lock_names) + with lock_guard: + self.assertTrue(lock_guard.is_locked()) + self.assertTrue(test_locks[0].is_writer()) + self.assertTrue(test_locks[1].is_writer()) + + self.assertFalse(lock_guard.is_locked()) + for lock in test_locks: + self.assertFalse(lock.is_writer()) + with lock_guard: + self.assertTrue(lock_guard.is_locked()) + self.assertTrue(test_locks[0].is_writer()) + self.assertTrue(test_locks[1].is_writer()) + + def test_nested_context_managers(self): + lock_names = ['test_nested_context_managers1', + 'test_nested_context_managers2'] + test_locks = [utils.NOVA_FAIR_LOCKS.get( + lock_name) for lock_name in lock_names] + lock_guard = utils.FairLockGuard(lock_names) + lock_guard2 = utils.FairLockGuard(lock_names) + + # on any one thread we can acquire the same lock + # multiple times provided we use separate context managers. + # In other words the locks are renterent. + # This is an implementation detail of the ReaderWriterLock + # provided by fasteners. if this test fails the api behavior + # of the FairLock has changed. + with lock_guard: + self.assertTrue(lock_guard.is_locked()) + self.assertTrue(test_locks[0].is_writer()) + self.assertTrue(test_locks[1].is_writer()) + with lock_guard2: + self.assertTrue(lock_guard2.is_locked()) + self.assertTrue(test_locks[0].is_writer()) + self.assertTrue(test_locks[1].is_writer()) + self.assertTrue(lock_guard.is_locked()) + self.assertTrue(test_locks[0].is_writer()) + self.assertTrue(test_locks[1].is_writer()) + + # attempting to nest the same context manager instance + # should raise a TypeError. + with lock_guard: + self.assertTrue(lock_guard.is_locked()) + self.assertTrue(test_locks[0].is_writer()) + self.assertTrue(test_locks[1].is_writer()) + with self.assertRaisesRegex( + TypeError, + "Cannot enter FairLockGuard while it is already active."): + with lock_guard: + pass + # after the TypeError, the outer context should still + # be active. + self.assertTrue(lock_guard.is_locked()) + self.assertTrue(test_locks[0].is_writer()) + self.assertTrue(test_locks[1].is_writer()) diff --git a/nova/utils.py b/nova/utils.py index ae4fea3752..21dc3cb3f3 100644 --- a/nova/utils.py +++ b/nova/utils.py @@ -59,6 +59,7 @@ CONF = nova.conf.CONF LOG = logging.getLogger(__name__) synchronized = lockutils.synchronized_with_prefix('nova-') +NOVA_FAIR_LOCKS = lockutils.FairLocks() SM_IMAGE_PROP_PREFIX = "image_" SM_INHERITABLE_KEYS = ( @@ -1171,6 +1172,107 @@ def latch_error_on_raise(retryable=(_SentinelException,)): return outer_wrapper +class FairLockGuard: + """A lock guard context manager + + This class support acquiring multiple locks safely by name + and releasing them via the context manager protocol + i.e. with FairLockGuard([list of lock names]): + + The intended usage model for this context manager is to + mediate access to a set of locks by name, not by pre-creating + the locks and passing them in. Lock creation and management + is handled entirely internally. + + If you are using this between threads, Thread-A and Thread-B + should both create there own context manager instead of sharing + a single context manager between treads. + + Nesting is supported by creating a new context manager instance + for each nested context with the same or different lock names + as the outer context. Attempting to nest the same context manager + instance should raise a TypeError. + + Example Valid Usage: + ``` + with FairLockGuard(['lock1', 'lock2']) as lock_guard: + with FairLockGuard(['lock1', 'lock2']) as lock_guard2: + pass + ``` + + Example Invalid Usage: + ``` + with FairLockGuard(['lock1', 'lock2']) as lock_guard: + with lock_guard: + pass + ``` + This will raise a TypeError because the same context manager instance + is being nested. + + In general you should avoid naming the context manager instance + and only construct it in the with statement to avoid incorrect usage. + """ + + def __init__(self, names): + # we need to sort the lock to ensure we acquire + # them in the same order. This ensures we do not + # fall afoul of the Dining philosophers problem + # https://en.wikipedia.org/wiki/Dining_philosophers_problem + # by implementing the simplest Ordered solution + # https://howardhinnant.github.io/dining_philosophers.html + self.names = sorted(names) + self.locks = [] + # NOTE(sean-k-mooney): this is technically not required + # but it protect the internal state of self.locks list + # from incorrect usage where a context manager instance is + # shared between threads. We use a reader-writer lock to + # allow concurrent reads in is_locked() while ensuring + # exclusive access for state modifications. + self.locks_lock = lockutils.ReaderWriterLock() + self._active = False + + def __enter__(self): + with self.locks_lock.write_lock(): + if self._active: + raise TypeError( + "Cannot enter FairLockGuard while it is already active. " + "Create a new instance for nested usage or wait for the " + "current context to exit.") + for name in self.names: + named_lock = NOVA_FAIR_LOCKS.get(name) + self.locks.append(named_lock) + # we ensure we add it to the list + # before we acquire the lock to make sure + # we release it if there is an exception + named_lock.acquire_write_lock() + self._active = True + + def __exit__(self, exc_type, exc_value, traceback): + with self.locks_lock.write_lock(): + for lock in self.locks: + # This should always be true but since + # we add the lock to the list before we + # acquire it check anyway. + if lock.is_writer(): + lock.release_write_lock() + self.locks = [] + self._active = False + + def is_locked(self): + # NOTE(sean-k-mooney): LockGuards exist in several programming + # languages such as c++, in general a LockGuard can support a + # single lock or many but by convention a LockGuard is only + # considered locked if all locks are acquired. FairLocks in + # oslo are implemented as ReaderWriter inter-process locks. + # that means that we hold the lock if we are hold the writer + # lock aka if the lock is the writer. We use a read lock here + # to allow concurrent reads while state modifications use write + # locks. + with self.locks_lock.read_lock(): + return ( + self.locks and all(lock.is_writer() for lock in self.locks)) + + def concurrency_mode_threading(): """Returns true if the service is running in threading mode, false if running in Eventlet mode diff --git a/releasenotes/notes/bug-2048837-multi-attach-volume-cleanup.yaml b/releasenotes/notes/bug-2048837-multi-attach-volume-cleanup.yaml new file mode 100644 index 0000000000..eb5d404c5a --- /dev/null +++ b/releasenotes/notes/bug-2048837-multi-attach-volume-cleanup.yaml @@ -0,0 +1,11 @@ +--- +fixes: + - | + When deleting multiple instances that share the same multi-attach volume + on the same compute host in parallel, the volume connection could be left + attached to the host even after all instances were deleted, preventing the + volume from being properly detached and reused. Nova now correctly cleans + up multi-attach volume connections when deleting instances, including when + multiple instances sharing the same volume are deleted concurrently. For + more details, see `bug 2048837 `__. +