Confirm cross-cell resize from the API
This adds the logic to the API confirmResize operation such that if the migration is a cross-cell resize the API will RPC cast to conductor to confirm the resize rather than directly to the source compute service like a traditional resize. Conductor will then orchestrate the confirm process between the source and target cell. Now that the API has confirmResize plumbed this change builds on the cross-cell resize functional tests by confirming the resize for the image-backed server test. To make that extra fun, a volume is attached to the server while it is in VERIFY_RESIZE status to assert it remains attached to the instance in the target cell after the resize is confirmed. In addition, the FakeDriver.cleanup() method is updated to guard against calling it before the guest is destroyed from the "hypervisor". This is to make sure the cleanup() method is called properly from confirm_snapshot_based_resize_at_source() in the compute service on the source host. The _confirm_resize_on_deleting scenario will be dealt with in a later change. Part of blueprint cross-cell-resize Change-Id: Ia5892e1d2cb7c7685e104466f83df7bb00b168c0
This commit is contained in:
+15
-4
@@ -3687,10 +3687,21 @@ class API(base.Base):
|
||||
self._record_action_start(context, instance,
|
||||
instance_actions.CONFIRM_RESIZE)
|
||||
|
||||
self.compute_rpcapi.confirm_resize(context,
|
||||
instance,
|
||||
migration,
|
||||
migration.source_compute)
|
||||
# Check to see if this was a cross-cell resize, in which case the
|
||||
# resized instance is in the target cell (the migration and instance
|
||||
# came from the target cell DB in this case), and we need to cleanup
|
||||
# the source host and source cell database records.
|
||||
if migration.cross_cell_move:
|
||||
self.compute_task_api.confirm_snapshot_based_resize(
|
||||
context, instance, migration)
|
||||
else:
|
||||
# It's a traditional resize within a single cell, so RPC cast to
|
||||
# the source compute host to cleanup the host since the instance
|
||||
# is already on the target host.
|
||||
self.compute_rpcapi.confirm_resize(context,
|
||||
instance,
|
||||
migration,
|
||||
migration.source_compute)
|
||||
|
||||
@staticmethod
|
||||
def _allow_cross_cell_resize(context, instance):
|
||||
|
||||
@@ -12,6 +12,9 @@
|
||||
|
||||
import mock
|
||||
|
||||
from oslo_utils.fixture import uuidsentinel as uuids
|
||||
|
||||
from nova.compute import instance_actions
|
||||
from nova import context as nova_context
|
||||
from nova import exception
|
||||
from nova import objects
|
||||
@@ -378,13 +381,172 @@ class TestMultiCellMigrate(integrated_helpers.ProviderUsageBaseTestCase):
|
||||
|
||||
return server, source_rp_uuid, target_rp_uuid, old_flavor, new_flavor
|
||||
|
||||
def _attach_volume_to_server(self, server_id, volume_id):
|
||||
"""Attaches the volume to the server and waits for the
|
||||
"instance.volume_attach.end" versioned notification.
|
||||
"""
|
||||
body = {'volumeAttachment': {'volumeId': volume_id}}
|
||||
self.api.api_post(
|
||||
'/servers/%s/os-volume_attachments' % server_id, body)
|
||||
fake_notifier.wait_for_versioned_notifications(
|
||||
'instance.volume_attach.end')
|
||||
|
||||
def assert_volume_is_attached(self, server_id, volume_id):
|
||||
"""Asserts the volume is attached to the server."""
|
||||
server = self.api.get_server(server_id)
|
||||
attachments = server['os-extended-volumes:volumes_attached']
|
||||
attached_vol_ids = [attachment['id'] for attachment in attachments]
|
||||
self.assertIn(volume_id, attached_vol_ids,
|
||||
'Attached volumes: %s' % attachments)
|
||||
|
||||
def assert_resize_confirm_notifications(self):
|
||||
# We should have gotten only two notifications:
|
||||
# 1. instance.resize_confirm.start
|
||||
# 2. instance.resize_confirm.end
|
||||
self.assertEqual(2, len(fake_notifier.VERSIONED_NOTIFICATIONS),
|
||||
'Unexpected number of versioned notifications for '
|
||||
'cross-cell resize confirm: %s' %
|
||||
fake_notifier.VERSIONED_NOTIFICATIONS)
|
||||
start = fake_notifier.VERSIONED_NOTIFICATIONS[0]['event_type']
|
||||
self.assertEqual('instance.resize_confirm.start', start)
|
||||
end = fake_notifier.VERSIONED_NOTIFICATIONS[1]['event_type']
|
||||
self.assertEqual('instance.resize_confirm.end', end)
|
||||
|
||||
def delete_server_and_assert_cleanup(self, server):
|
||||
"""Deletes the server and makes various cleanup checks.
|
||||
|
||||
- makes sure allocations from placement are gone
|
||||
- makes sure the instance record is gone from both cells
|
||||
- makes sure there are no leaked volume attachments
|
||||
|
||||
:param server: dict of the server resource to delete
|
||||
"""
|
||||
# Determine which cell the instance was in when the server was deleted
|
||||
# in the API so we can check hard vs soft delete in the DB.
|
||||
current_cell = self.host_to_cell_mappings[
|
||||
server['OS-EXT-SRV-ATTR:host']]
|
||||
# Delete the server and check that the allocations are gone from
|
||||
# the placement service.
|
||||
self._delete_and_check_allocations(server)
|
||||
# Make sure the instance record is gone from both cell databases.
|
||||
ctxt = nova_context.get_admin_context()
|
||||
for cell_name in self.host_to_cell_mappings.values():
|
||||
cell = self.cell_mappings[cell_name]
|
||||
with nova_context.target_cell(ctxt, cell) as cctxt:
|
||||
# If this is the current cell the instance was in when it was
|
||||
# deleted it should be soft-deleted (instance.deleted!=0),
|
||||
# otherwise it should be hard-deleted and getting it with a
|
||||
# read_deleted='yes' context should still raise.
|
||||
read_deleted = 'no' if current_cell == cell_name else 'yes'
|
||||
with utils.temporary_mutation(
|
||||
cctxt, read_deleted=read_deleted):
|
||||
self.assertRaises(exception.InstanceNotFound,
|
||||
objects.Instance.get_by_uuid,
|
||||
cctxt, server['id'])
|
||||
# Make sure there are no leaked volume attachments.
|
||||
attachment_count = self._count_volume_attachments(server['id'])
|
||||
self.assertEqual(0, attachment_count, 'Leaked volume attachments: %s' %
|
||||
self.cinder.volume_to_attachment)
|
||||
|
||||
def assert_resize_confirm_actions(self, server):
|
||||
actions = self.api.api_get(
|
||||
'/servers/%s/os-instance-actions' % server['id']
|
||||
).body['instanceActions']
|
||||
actions_by_action = {action['action']: action for action in actions}
|
||||
self.assertIn(instance_actions.CONFIRM_RESIZE, actions_by_action)
|
||||
confirm_action = actions_by_action[instance_actions.CONFIRM_RESIZE]
|
||||
detail = self.api.api_get(
|
||||
'/servers/%s/os-instance-actions/%s' % (
|
||||
server['id'], confirm_action['request_id'])
|
||||
).body['instanceAction']
|
||||
events_by_name = {event['event']: event for event in detail['events']}
|
||||
self.assertEqual(2, len(detail['events']), detail)
|
||||
for event_name in ('conductor_confirm_snapshot_based_resize',
|
||||
'compute_confirm_snapshot_based_resize_at_source'):
|
||||
self.assertIn(event_name, events_by_name)
|
||||
self.assertEqual('Success', events_by_name[event_name]['result'])
|
||||
self.assertEqual('Success', detail['events'][0]['result'])
|
||||
|
||||
def test_resize_confirm_image_backed(self):
|
||||
"""Creates an image-backed server in one cell and resizes it to the
|
||||
host in the other cell. The resize is confirmed.
|
||||
"""
|
||||
self._resize_and_validate()
|
||||
server, source_rp_uuid, target_rp_uuid, _, new_flavor = (
|
||||
self._resize_and_validate())
|
||||
|
||||
# TODO(mriedem): Confirm the resize and make assertions.
|
||||
# Attach a fake volume to the server to make sure it survives confirm.
|
||||
self._attach_volume_to_server(server['id'], uuids.fake_volume_id)
|
||||
|
||||
# Reset the fake notifier so we only check confirmation notifications.
|
||||
fake_notifier.reset()
|
||||
|
||||
# Confirm the resize and check all the things. The instance and its
|
||||
# related records should be gone from the source cell database; the
|
||||
# migration should be confirmed; the allocations, held by the migration
|
||||
# record on the source compute node resource provider, should now be
|
||||
# gone; there should be a confirmResize instance action record with
|
||||
# a successful event.
|
||||
target_host = server['OS-EXT-SRV-ATTR:host']
|
||||
source_host = 'host1' if target_host == 'host2' else 'host2'
|
||||
self.api.post_server_action(server['id'], {'confirmResize': None})
|
||||
self._wait_for_state_change(server, 'ACTIVE')
|
||||
|
||||
# The migration should be confirmed.
|
||||
migrations = self.api.api_get(
|
||||
'/os-migrations?instance_uuid=%s' % server['id']
|
||||
).body['migrations']
|
||||
self.assertEqual(1, len(migrations), migrations)
|
||||
migration = migrations[0]
|
||||
self.assertEqual('confirmed', migration['status'], migration)
|
||||
|
||||
# The resource allocations held against the source node by the
|
||||
# migration record should be gone and the target node provider should
|
||||
# have allocations held by the instance.
|
||||
source_allocations = self._get_allocations_by_provider_uuid(
|
||||
source_rp_uuid)
|
||||
self.assertEqual({}, source_allocations)
|
||||
target_allocations = self._get_allocations_by_provider_uuid(
|
||||
target_rp_uuid)
|
||||
self.assertIn(server['id'], target_allocations)
|
||||
self.assertFlavorMatchesAllocation(
|
||||
new_flavor, target_allocations[server['id']]['resources'])
|
||||
|
||||
self.assert_resize_confirm_actions(server)
|
||||
|
||||
# Make sure the guest is on the target node hypervisor and not on the
|
||||
# source node hypervisor.
|
||||
source_guest_uuids = (
|
||||
self.computes[source_host].manager.driver.list_instance_uuids())
|
||||
self.assertNotIn(server['id'], source_guest_uuids,
|
||||
'Guest is still running on the source hypervisor.')
|
||||
target_guest_uuids = (
|
||||
self.computes[target_host].manager.driver.list_instance_uuids())
|
||||
self.assertIn(server['id'], target_guest_uuids,
|
||||
'Guest is not running on the target hypervisor.')
|
||||
|
||||
# Assert the source host hypervisor usage is back to 0 and the target
|
||||
# is using the new flavor.
|
||||
self.assert_hypervisor_usage(
|
||||
target_rp_uuid, new_flavor, volume_backed=False)
|
||||
no_usage = {'vcpus': 0, 'disk': 0, 'ram': 0}
|
||||
self.assert_hypervisor_usage(
|
||||
source_rp_uuid, no_usage, volume_backed=False)
|
||||
|
||||
# Run periodics and make sure the usage is still as expected.
|
||||
self._run_periodics()
|
||||
self.assert_hypervisor_usage(
|
||||
target_rp_uuid, new_flavor, volume_backed=False)
|
||||
self.assert_hypervisor_usage(
|
||||
source_rp_uuid, no_usage, volume_backed=False)
|
||||
|
||||
# Make sure the fake volume is still attached.
|
||||
self.assert_volume_is_attached(server['id'], uuids.fake_volume_id)
|
||||
|
||||
# Make sure we got the expected notifications for the confirm action.
|
||||
self.assert_resize_confirm_notifications()
|
||||
|
||||
# Explicitly delete the server and make sure it's gone from all cells.
|
||||
self.delete_server_and_assert_cleanup(server)
|
||||
|
||||
def test_resize_revert_volume_backed(self):
|
||||
"""Tests a volume-backed resize to another cell where the resize
|
||||
@@ -457,11 +619,25 @@ class TestMultiCellMigrate(integrated_helpers.ProviderUsageBaseTestCase):
|
||||
# onto the source host in the source cell.
|
||||
|
||||
def test_resize_confirm_from_stopped(self):
|
||||
"""Tests resizing and confirming a server that was initially stopped
|
||||
so it should remain stopped through the resize.
|
||||
"""Tests resizing and confirming a volume-backed server that was
|
||||
initially stopped so it should remain stopped through the resize.
|
||||
"""
|
||||
self._resize_and_validate(volume_backed=True, stopped=True)
|
||||
# TODO(mriedem): Confirm the resize and assert the guest remains off
|
||||
server = self._resize_and_validate(volume_backed=True, stopped=True)[0]
|
||||
# Confirm the resize and assert the guest remains off.
|
||||
self.api.post_server_action(server['id'], {'confirmResize': None})
|
||||
server = self._wait_for_state_change(server, 'SHUTOFF')
|
||||
self.assertEqual(4, server['OS-EXT-STS:power_state'],
|
||||
"Unexpected power state after confirmResize.")
|
||||
self._wait_for_migration_status(server, ['confirmed'])
|
||||
|
||||
# Now try cold-migrating back to cell1 to make sure there is no
|
||||
# duplicate entry error in the DB.
|
||||
self.api.post_server_action(server['id'], {'migrate': None})
|
||||
server = self._wait_for_state_change(server, 'VERIFY_RESIZE')
|
||||
# Should be back on host1 in cell1.
|
||||
self.assertEqual('host1', server['OS-EXT-SRV-ATTR:host'])
|
||||
|
||||
# TODO(mriedem): test_resize_revert_from_stopped with image-backed.
|
||||
|
||||
def test_finish_snapshot_based_resize_at_dest_spawn_fails(self):
|
||||
"""Negative test where the driver spawn fails on the dest host during
|
||||
|
||||
@@ -6859,6 +6859,34 @@ class ComputeAPIUnitTestCase(_ComputeAPIUnitTestMixIn, test.NoDBTestCase):
|
||||
self._test_get_migrations_sorted_filter_duplicates(
|
||||
[older, newer], newer)
|
||||
|
||||
@mock.patch('nova.objects.Migration.get_by_instance_and_status')
|
||||
def test_confirm_resize_cross_cell_move_true(self, mock_migration_get):
|
||||
"""Tests confirm_resize where Migration.cross_cell_move is True"""
|
||||
instance = fake_instance.fake_instance_obj(
|
||||
self.context, vm_state=vm_states.RESIZED, task_state=None,
|
||||
launched_at=timeutils.utcnow())
|
||||
migration = objects.Migration(cross_cell_move=True)
|
||||
mock_migration_get.return_value = migration
|
||||
with test.nested(
|
||||
mock.patch.object(self.context, 'elevated',
|
||||
return_value=self.context),
|
||||
mock.patch.object(migration, 'save'),
|
||||
mock.patch.object(self.compute_api, '_record_action_start'),
|
||||
mock.patch.object(self.compute_api.compute_task_api,
|
||||
'confirm_snapshot_based_resize'),
|
||||
) as (
|
||||
mock_elevated, mock_migration_save, mock_record_action,
|
||||
mock_conductor_confirm
|
||||
):
|
||||
self.compute_api.confirm_resize(self.context, instance)
|
||||
mock_elevated.assert_called_once_with()
|
||||
mock_migration_save.assert_called_once_with()
|
||||
self.assertEqual('confirming', migration.status)
|
||||
mock_record_action.assert_called_once_with(
|
||||
self.context, instance, instance_actions.CONFIRM_RESIZE)
|
||||
mock_conductor_confirm.assert_called_once_with(
|
||||
self.context, instance, migration)
|
||||
|
||||
|
||||
class DiffDictTestCase(test.NoDBTestCase):
|
||||
"""Unit tests for _diff_dict()."""
|
||||
|
||||
+4
-1
@@ -314,7 +314,10 @@ class FakeDriver(driver.ComputeDriver):
|
||||
|
||||
def cleanup(self, context, instance, network_info, block_device_info=None,
|
||||
destroy_disks=True, migrate_data=None, destroy_vifs=True):
|
||||
pass
|
||||
# cleanup() should not be called when the guest has not been destroyed.
|
||||
if instance.uuid in self.instances:
|
||||
raise exception.InstanceExists(
|
||||
"Instance %s has not been destroyed." % instance.uuid)
|
||||
|
||||
def attach_volume(self, context, connection_info, instance, mountpoint,
|
||||
disk_bus=None, device_type=None, encryption=None):
|
||||
|
||||
Reference in New Issue
Block a user