From 89dbd08976da6faabb5c8d0e7a29f4b7f9e8b812 Mon Sep 17 00:00:00 2001 From: Sundar Nadathur Date: Mon, 5 Aug 2019 21:55:01 -0700 Subject: [PATCH] Block unsupported instance operations with accelerators. The block is applied to primary operations, such as pause or shelve, but not to their reverse operations, like unpause or unshelve, because that is not necessary. Added functional tests for various instance operations, including those that work and those that fail. Rebuild functional test passes. Change-Id: I016bc1812404ce1019c71b7a3363f34acc3f8aed Blueprint: nova-cyborg-interaction --- nova/api/openstack/compute/evacuate.py | 4 +- nova/api/openstack/compute/migrate_server.py | 7 +- nova/api/openstack/compute/servers.py | 6 +- nova/api/openstack/compute/suspend_server.py | 4 +- nova/compute/api.py | 16 ++ nova/exception.py | 5 + nova/tests/functional/test_servers.py | 156 ++++++++++++++++++- nova/tests/unit/compute/test_compute.py | 2 + nova/tests/unit/compute/test_compute_api.py | 37 ++++- 9 files changed, 228 insertions(+), 9 deletions(-) diff --git a/nova/api/openstack/compute/evacuate.py b/nova/api/openstack/compute/evacuate.py index 5d7c35a8cc..22ae35df8f 100644 --- a/nova/api/openstack/compute/evacuate.py +++ b/nova/api/openstack/compute/evacuate.py @@ -74,7 +74,7 @@ class EvacuateController(wsgi.Controller): # TODO(eliqiao): Should be responding here with 202 Accept # because evacuate is an async call, but keep to 200 for # backwards compatibility reasons. - @wsgi.expected_errors((400, 404, 409)) + @wsgi.expected_errors((400, 403, 404, 409)) @wsgi.action('evacuate') @validation.schema(evacuate.evacuate, "2.0", "2.13") @validation.schema(evacuate.evacuate_v214, "2.14", "2.28") @@ -144,6 +144,8 @@ class EvacuateController(wsgi.Controller): 'evacuate', id) except exception.ComputeServiceInUse as e: raise exc.HTTPBadRequest(explanation=e.format_message()) + except exception.ForbiddenWithAccelerators as e: + raise exc.HTTPForbidden(explanation=e.format_message()) if (not api_version_request.is_supported(req, min_version='2.14') and CONF.api.enable_instance_password): diff --git a/nova/api/openstack/compute/migrate_server.py b/nova/api/openstack/compute/migrate_server.py index 74f04dcdd9..45189bece5 100644 --- a/nova/api/openstack/compute/migrate_server.py +++ b/nova/api/openstack/compute/migrate_server.py @@ -74,7 +74,8 @@ class MigrateServerController(wsgi.Controller): try: self.compute_api.resize(req.environ['nova.context'], instance, host_name=host_name) - except (exception.TooManyInstances, exception.QuotaError) as e: + except (exception.TooManyInstances, exception.QuotaError, + exception.ForbiddenWithAccelerators) as e: raise exc.HTTPForbidden(explanation=e.format_message()) except (exception.InstanceIsLocked, exception.InstanceNotReady, @@ -90,7 +91,7 @@ class MigrateServerController(wsgi.Controller): raise exc.HTTPBadRequest(explanation=e.format_message()) @wsgi.response(202) - @wsgi.expected_errors((400, 404, 409)) + @wsgi.expected_errors((400, 403, 404, 409)) @wsgi.action('os-migrateLive') @validation.schema(migrate_server.migrate_live, "2.0", "2.24") @validation.schema(migrate_server.migrate_live_v2_25, "2.25", "2.29") @@ -174,6 +175,8 @@ class MigrateServerController(wsgi.Controller): except exception.InstanceInvalidState as state_error: common.raise_http_conflict_for_instance_invalid_state(state_error, 'os-migrateLive', id) + except exception.ForbiddenWithAccelerators as e: + raise exc.HTTPForbidden(explanation=e.format_message()) def _get_force_param_for_live_migration(self, body, host): force = body["os-migrateLive"].get("force", False) diff --git a/nova/api/openstack/compute/servers.py b/nova/api/openstack/compute/servers.py index b1497f1655..e2f59d3dd1 100644 --- a/nova/api/openstack/compute/servers.py +++ b/nova/api/openstack/compute/servers.py @@ -946,7 +946,8 @@ class ServersController(wsgi.Controller): try: self.compute_api.resize(context, instance, flavor_id, auto_disk_config=auto_disk_config) - except exception.QuotaError as error: + except (exception.QuotaError, + exception.ForbiddenWithAccelerators) as error: raise exc.HTTPForbidden( explanation=error.format_message()) except (exception.InstanceIsLocked, @@ -1113,7 +1114,8 @@ class ServersController(wsgi.Controller): except exception.KeypairNotFound: msg = _("Invalid key_name provided.") raise exc.HTTPBadRequest(explanation=msg) - except exception.QuotaError as error: + except (exception.QuotaError, + exception.ForbiddenWithAccelerators) as error: raise exc.HTTPForbidden(explanation=error.format_message()) except (exception.AutoDiskConfigDisabledByImage, exception.CertificateValidationFailed, diff --git a/nova/api/openstack/compute/suspend_server.py b/nova/api/openstack/compute/suspend_server.py index fcee538c7b..e364842b4d 100644 --- a/nova/api/openstack/compute/suspend_server.py +++ b/nova/api/openstack/compute/suspend_server.py @@ -27,7 +27,7 @@ class SuspendServerController(wsgi.Controller): self.compute_api = compute.API() @wsgi.response(202) - @wsgi.expected_errors((404, 409)) + @wsgi.expected_errors((403, 404, 409)) @wsgi.action('suspend') def _suspend(self, req, id, body): """Permit admins to suspend the server.""" @@ -44,6 +44,8 @@ class SuspendServerController(wsgi.Controller): except exception.InstanceInvalidState as state_error: common.raise_http_conflict_for_instance_invalid_state(state_error, 'suspend', id) + except exception.ForbiddenWithAccelerators as e: + raise exc.HTTPForbidden(explanation=e.format_message()) @wsgi.response(202) @wsgi.expected_errors((404, 409)) diff --git a/nova/compute/api.py b/nova/compute/api.py index 6a90ddffc7..5c543887d1 100644 --- a/nova/compute/api.py +++ b/nova/compute/api.py @@ -284,6 +284,16 @@ def _get_image_meta_obj(image_meta_dict): return image_meta +def block_accelerators(func): + @functools.wraps(func) + def wrapper(self, context, instance, *args, **kwargs): + dp_name = instance.flavor.extra_specs.get('accel:device_profile') + if dp_name: + raise exception.ForbiddenWithAccelerators() + return func(self, context, instance, *args, **kwargs) + return wrapper + + @profiler.trace_cls("compute_api") class API(base.Base): """API for interacting with the compute manager.""" @@ -3372,6 +3382,7 @@ class API(base.Base): if img_arch: fields_obj.Architecture.canonicalize(img_arch) + @block_accelerators # TODO(stephenfin): We should expand kwargs out to named args @check_instance_lock @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.STOPPED, @@ -3850,6 +3861,7 @@ class API(base.Base): return node + @block_accelerators @check_instance_lock @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.STOPPED]) @check_instance_host(check_is_up=True) @@ -4046,6 +4058,7 @@ class API(base.Base): allow_same_host = CONF.allow_resize_to_same_host return allow_same_host + @block_accelerators @check_instance_lock @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.STOPPED, vm_states.PAUSED, vm_states.SUSPENDED]) @@ -4210,6 +4223,7 @@ class API(base.Base): return self.compute_rpcapi.get_instance_diagnostics(context, instance=instance) + @block_accelerators @reject_sev_instances(instance_actions.SUSPEND) @check_instance_lock @check_instance_state(vm_state=[vm_states.ACTIVE]) @@ -4890,6 +4904,7 @@ class API(base.Base): diff=diff) return _metadata + @block_accelerators @reject_sev_instances(instance_actions.LIVE_MIGRATION) @check_instance_lock @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.PAUSED]) @@ -5019,6 +5034,7 @@ class API(base.Base): self.compute_rpcapi.live_migration_abort(context, instance, migration.id) + @block_accelerators @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.STOPPED, vm_states.ERROR]) def evacuate(self, context, instance, host, on_shared_storage, diff --git a/nova/exception.py b/nova/exception.py index a5f16b85fc..9978d8d034 100644 --- a/nova/exception.py +++ b/nova/exception.py @@ -156,6 +156,11 @@ class Forbidden(NovaException): code = 403 +class ForbiddenWithAccelerators(NovaException): + msg_fmt = _("Forbidden with instances that have accelerators.") + code = 403 + + class AdminRequired(Forbidden): msg_fmt = _("User does not have admin privileges") diff --git a/nova/tests/functional/test_servers.py b/nova/tests/functional/test_servers.py index ab3fc2cf72..fe433bfca7 100644 --- a/nova/tests/functional/test_servers.py +++ b/nova/tests/functional/test_servers.py @@ -7900,7 +7900,7 @@ class AcceleratorServerBase(integrated_helpers.ProviderUsageBaseTestCase): extra_spec=extra_specs) return flavor_id - def _check_allocations_usage(self, server): + def _check_allocations_usage(self, server, check_other_host_alloc=True): # Check allocations on host where instance is running server_uuid = server['id'] @@ -7921,7 +7921,8 @@ class AcceleratorServerBase(integrated_helpers.ProviderUsageBaseTestCase): self.assertEqual(expected_device_alloc, device_alloc[server_uuid]) else: - self.assertEqual({}, host_alloc) + if check_other_host_alloc: + self.assertEqual({}, host_alloc) self.assertEqual({}, device_alloc) # NOTE(Sundar): ARQs for an instance could come from different @@ -8088,3 +8089,154 @@ class AcceleratorServerReschedTest(AcceleratorServerBase): [mock.call(server_uuid), mock.call(server_uuid), mock.call(server_uuid)]) + + +class AcceleratorServerOpsTest(AcceleratorServerBase): + + def setUp(self): + self.NUM_HOSTS = 2 # 2nd host needed for evacuate + super(AcceleratorServerOpsTest, self).setUp() + flavor_id = self._create_acc_flavor() + server_name = 'accel_server1' + self.server = self._create_server( + server_name, flavor_id=flavor_id, + image_uuid='155d900f-4e14-4e4c-a73d-069cbf4541e6', + networks='none', expected_state='ACTIVE') + + def test_soft_reboot_ok(self): + params = {'reboot': {'type': 'SOFT'}} + self.api.post_server_action(self.server['id'], params) + self._wait_for_state_change(self.server, 'ACTIVE') + self._check_allocations_usage(self.server) + + def test_hard_reboot_ok(self): + params = {'reboot': {'type': 'HARD'}} + self.api.post_server_action(self.server['id'], params) + self._wait_for_state_change(self.server, 'HARD_REBOOT') + self._wait_for_state_change(self.server, 'ACTIVE') + self._check_allocations_usage(self.server) + + def test_pause_unpause_ok(self): + # Pause and unpause should work with accelerators. + # This is not a general test of un/pause functionality. + self.api.post_server_action(self.server['id'], {'pause': {}}) + self._wait_for_state_change(self.server, 'PAUSED') + self._check_allocations_usage(self.server) + # ARQs didn't get deleted (and so didn't have to be re-created). + self.cyborg.mock_del_arqs.assert_not_called() + + self.api.post_server_action(self.server['id'], {'unpause': {}}) + self._wait_for_state_change(self.server, 'ACTIVE') + self._check_allocations_usage(self.server) + + def test_stop_start_ok(self): + # Stop and start should work with accelerators. + # This is not a general test of start/stop functionality. + self.api.post_server_action(self.server['id'], {'os-stop': {}}) + self._wait_for_state_change(self.server, 'SHUTOFF') + self._check_allocations_usage(self.server) + # ARQs didn't get deleted (and so didn't have to be re-created). + self.cyborg.mock_del_arqs.assert_not_called() + + self.api.post_server_action(self.server['id'], {'os-start': {}}) + self._wait_for_state_change(self.server, 'ACTIVE') + self._check_allocations_usage(self.server) + + def test_lock_unlock_ok(self): + # Lock/unlock are no-ops for accelerators. + self.api.post_server_action(self.server['id'], {'lock': {}}) + server = self.api.get_server(self.server['id']) + self.assertTrue(server['locked']) + self._check_allocations_usage(self.server) + + self.api.post_server_action(self.server['id'], {'unlock': {}}) + server = self.api.get_server(self.server['id']) + self.assertTrue(not server['locked']) + self._check_allocations_usage(self.server) + + def test_backup_ok(self): + self.api.post_server_action(self.server['id'], + {'createBackup': { + 'name': 'Backup 1', + 'backup_type': 'daily', + 'rotation': 1}}) + self._check_allocations_usage(self.server) + + def test_create_image_ok(self): # snapshot + self.api.post_server_action(self.server['id'], + {'createImage': { + 'name': 'foo-image', + 'metadata': {'meta_var': 'meta_val'}}}) + self._check_allocations_usage(self.server) + + def test_rescue_unrescue_ok(self): + self.api.post_server_action(self.server['id'], + {'rescue': { + 'adminPass': 'MySecretPass', + 'rescue_image_ref': '70a599e0-31e7-49b7-b260-868f441e862b'}}) + self._check_allocations_usage(self.server) + # ARQs didn't get deleted (and so didn't have to be re-created). + self.cyborg.mock_del_arqs.assert_not_called() + self._wait_for_state_change(self.server, 'RESCUE') + + self.api.post_server_action(self.server['id'], {'unrescue': {}}) + self._check_allocations_usage(self.server) + + def test_resize_fails(self): + ex = self.assertRaises(client.OpenStackApiException, + self.api.post_server_action, self.server['id'], + {'resize': {'flavorRef': '2', 'OS-DCF:diskConfig': 'AUTO'}}) + self.assertEqual(403, ex.response.status_code) + self._check_allocations_usage(self.server) + + def test_suspend_fails(self): + ex = self.assertRaises(client.OpenStackApiException, + self.api.post_server_action, self.server['id'], {'suspend': {}}) + self.assertEqual(403, ex.response.status_code) + self._check_allocations_usage(self.server) + + def test_migrate_fails(self): + ex = self.assertRaises(client.OpenStackApiException, + self.api.post_server_action, self.server['id'], {'migrate': {}}) + self.assertEqual(403, ex.response.status_code) + self._check_allocations_usage(self.server) + + def test_live_migrate_fails(self): + ex = self.assertRaises(client.OpenStackApiException, + self.api.post_server_action, self.server['id'], + {'migrate': {'host': 'accel_host1'}}) + self.assertEqual(403, ex.response.status_code) + self._check_allocations_usage(self.server) + + def test_evacuate_fails(self): + server_hostname = self.server['OS-EXT-SRV-ATTR:host'] + for i in range(self.NUM_HOSTS): + hostname = 'accel_host' + str(i) + if hostname != server_hostname: + other_hostname = hostname + if self.compute_services[i].host == server_hostname: + compute_to_stop = self.compute_services[i] + + # Stop and force down the compute service. + compute_id = self.admin_api.get_services( + host=server_hostname, binary='nova-compute')[0]['id'] + compute_to_stop.stop() + self.admin_api.put_service(compute_id, {'forced_down': 'true'}) + + ex = self.assertRaises(client.OpenStackApiException, + self.api.post_server_action, self.server['id'], + {'evacuate': { + 'host': other_hostname, + 'adminPass': 'MySecretPass'}}) + self.assertEqual(403, ex.response.status_code) + self._check_allocations_usage(self.server) + + def test_rebuild_fails(self): + rebuild_image_ref = fake_image.AUTO_DISK_CONFIG_ENABLED_IMAGE_UUID + ex = self.assertRaises(client.OpenStackApiException, + self.api.post_server_action, self.server['id'], + {'rebuild': { + 'imageRef': rebuild_image_ref, + 'OS-DCF:diskConfig': 'AUTO'}}) + self.assertEqual(403, ex.response.status_code) + self._check_allocations_usage(self.server) diff --git a/nova/tests/unit/compute/test_compute.py b/nova/tests/unit/compute/test_compute.py index f541ce8765..917a0aeec6 100644 --- a/nova/tests/unit/compute/test_compute.py +++ b/nova/tests/unit/compute/test_compute.py @@ -9088,6 +9088,7 @@ class ComputeAPITestCase(BaseTestCase): def test_rebuild_in_error_not_launched(self): instance = self._create_fake_instance_obj(params={'image_ref': ''}) + flavor = instance.flavor self.stub_out('nova.tests.unit.image.fake._FakeImageService.show', self.fake_show) self.compute.build_and_run_instance(self.context, instance, {}, {}, {}, @@ -9098,6 +9099,7 @@ class ComputeAPITestCase(BaseTestCase): "launched_at": None}) instance = db.instance_get_by_uuid(self.context, instance['uuid']) + instance['flavor'] = flavor self.assertRaises(exception.InstanceInvalidState, self.compute_api.rebuild, diff --git a/nova/tests/unit/compute/test_compute_api.py b/nova/tests/unit/compute/test_compute_api.py index 2d60ca8870..021295feb5 100644 --- a/nova/tests/unit/compute/test_compute_api.py +++ b/nova/tests/unit/compute/test_compute_api.py @@ -2057,7 +2057,7 @@ class _ComputeAPIUnitTestMixIn(object): 'user': user_count} cur_flavor = objects.Flavor(id=1, name='foo', vcpus=1, memory_mb=512, - root_gb=10, disabled=False) + root_gb=10, disabled=False, extra_specs={}) fake_inst = self._create_instance_obj() fake_inst.flavor = cur_flavor new_flavor = objects.Flavor(id=2, name='bar', vcpus=1, memory_mb=2048, @@ -7214,6 +7214,41 @@ class ComputeAPIUnitTestCase(_ComputeAPIUnitTestMixIn, test.NoDBTestCase): mock_get_min_ver.assert_called_once_with( self.context, ['nova-compute']) + def _test_block_accelerators(self, instance, args_info): + @compute_api.block_accelerators + def myfunc(self, context, instance, *args, **kwargs): + args_info['args'] = (context, instance, *args) + args_info['kwargs'] = dict(**kwargs) + + args = ('arg1', 'arg2') + kwargs = {'arg3': 'dummy3', 'arg4': 'dummy4'} + + myfunc(mock.ANY, self.context, instance, *args, **kwargs) + + expected_args = (self.context, instance, *args) + return expected_args, kwargs + + def test_block_accelerators_no_device_profile(self): + instance = self._create_instance_obj() + args_info = {} + + expected_args, kwargs = self._test_block_accelerators( + instance, args_info) + self.assertEqual(expected_args, args_info['args']) + self.assertEqual(kwargs, args_info['kwargs']) + + def test_block_accelerators_with_device_profile(self): + extra_specs = {'accel:device_profile': 'mydp'} + flavor = self._create_flavor(extra_specs=extra_specs) + instance = self._create_instance_obj(flavor=flavor) + args_info = {} + + self.assertRaisesRegex(exception.ForbiddenWithAccelerators, + 'Forbidden with instances that have accelerators.', + self._test_block_accelerators, instance, args_info) + # myfunc was not called + self.assertEqual({}, args_info) + class DiffDictTestCase(test.NoDBTestCase): """Unit tests for _diff_dict()."""