diff --git a/nova/compute/manager.py b/nova/compute/manager.py index f7260dfa7c..d26889589c 100644 --- a/nova/compute/manager.py +++ b/nova/compute/manager.py @@ -2579,7 +2579,11 @@ class ComputeManager(manager.Manager): spec_arqs, network_arqs) except (Exception, eventlet.timeout.Timeout) as exc: LOG.exception(exc) - compute_utils.delete_arqs_if_needed(context, instance) + # ARQs created for instance or ports. + # The port binding isn't done yet. + # Unbind port won't clean port ARQs. + compute_utils.delete_arqs_if_needed( + context, instance, accel_uuids) msg = _('Failure getting accelerator requests.') raise exception.BuildAbortException( reason=msg, instance_uuid=instance.uuid) @@ -2678,7 +2682,12 @@ class ComputeManager(manager.Manager): reason=str(exc)) finally: # Call Cyborg to delete accelerator requests - compute_utils.delete_arqs_if_needed(context, instance) + if accel_uuids: + # ARQs created for instance or ports. + # Port bind may not successful. + # unbind port won't clean port ARQs. + compute_utils.delete_arqs_if_needed( + context, instance, accel_uuids) def _get_bound_arq_resources(self, context, instance, arq_uuids): """Get bound accelerator requests. diff --git a/nova/compute/utils.py b/nova/compute/utils.py index c5be116584..39c56c140f 100644 --- a/nova/compute/utils.py +++ b/nova/compute/utils.py @@ -1537,16 +1537,28 @@ def update_pci_request_spec_with_allocated_interface_name( spec['parent_ifname'] = rp_name_pieces[2] -def delete_arqs_if_needed(context, instance): - """Delete Cyborg ARQs for the instance.""" - dp_name = instance.flavor.extra_specs.get('accel:device_profile') - if dp_name is None: - return +def delete_arqs_if_needed(context, instance, arq_uuids=None): + """Delete Cyborg ARQs for the instance. + + :param context + :param instance: instance who own the args + :param uuids: delete arqs by uuids while did not bind to instance yet. + """ cyclient = cyborg.get_client(context) - LOG.debug('Calling Cyborg to delete ARQs for instance %(instance)s', + dp_name = instance.flavor.extra_specs.get('accel:device_profile') + + if dp_name: + LOG.debug('Calling Cyborg to delete ARQs for instance %(instance)s', {'instance': instance.uuid}) - try: - cyclient.delete_arqs_for_instance(instance.uuid) - except exception.AcceleratorRequestOpFailed as e: - LOG.exception('Failed to delete accelerator requests for ' + try: + cyclient.delete_arqs_for_instance(instance.uuid) + except exception.AcceleratorRequestOpFailed as e: + LOG.exception('Failed to delete accelerator requests for ' 'instance %s. Exception: %s', instance.uuid, e) + + if arq_uuids: + LOG.debug('Calling Cyborg to delete ARQs by uuids %(uuid)s for' + ' instance %(instance)s', + {'instance': instance.uuid, + 'uuid': arq_uuids}) + cyclient.delete_arqs_by_uuid(arq_uuids) diff --git a/nova/conductor/manager.py b/nova/conductor/manager.py index 61702c8859..d9b1d212ac 100644 --- a/nova/conductor/manager.py +++ b/nova/conductor/manager.py @@ -592,7 +592,13 @@ class ComputeTaskManager: legacy_request_spec) self._cleanup_allocated_networks( context, instance, requested_networks) - compute_utils.delete_arqs_if_needed(context, instance) + + arq_uuids = None + # arqs have not bound to port/instance yet + if requested_networks: + arq_uuids = [req.arq_uuid + for req in requested_networks if req.arq_uuid] + compute_utils.delete_arqs_if_needed(context, instance, arq_uuids) # NOTE(danms): This is never cell-targeted because it is only used for # n-cpu reschedules which go to the cell conductor and thus are always diff --git a/nova/network/neutron.py b/nova/network/neutron.py index a46d35458d..80a8c7080f 100644 --- a/nova/network/neutron.py +++ b/nova/network/neutron.py @@ -653,11 +653,19 @@ class API: ).get('network') networks[net_id] = network + # Unbind Port device + if port_profile.get('arq_uuid'): + """Delete device profile by arq uuid.""" + cyclient = cyborg.get_client(context) + cyclient.delete_arqs_by_uuid([port_profile['arq_uuid']]) + LOG.debug('Delete ARQs %s for port %s', + port_profile['arq_uuid'], port_id) + # NOTE: We're doing this to remove the binding information # for the physical device but don't want to overwrite the other # information in the binding profile. for profile_key in ('pci_vendor_info', 'pci_slot', - constants.ALLOCATION): + constants.ALLOCATION, 'arq_uuid'): if profile_key in port_profile: del port_profile[profile_key] port_req_body['port'][constants.BINDING_PROFILE] = port_profile diff --git a/nova/tests/unit/compute/test_compute_mgr.py b/nova/tests/unit/compute/test_compute_mgr.py index 9e060290a4..c6818caf9d 100644 --- a/nova/tests/unit/compute/test_compute_mgr.py +++ b/nova/tests/unit/compute/test_compute_mgr.py @@ -6235,9 +6235,12 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase): self.context, self.instance, arq_uuids) self.assertEqual(sorted(resources['accel_info']), sorted(arq_list)) + @mock.patch.object(compute_utils, + 'delete_arqs_if_needed') @mock.patch.object(nova.compute.manager.ComputeManager, '_get_bound_arq_resources') - def test_accel_build_resources_exception(self, mock_get_arqs): + def test_accel_build_resources_exception(self, mock_get_arqs, + mock_clean_arq): dp_name = "mydp" self.instance.flavor.extra_specs = {"accel:device_profile": dp_name} arq_list = fixtures.CyborgFixture.bound_arq_list @@ -6252,6 +6255,8 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase): self._test_accel_build_resources, arq_uuids, None, self.requested_networks) + mock_clean_arq.assert_called_once_with( + self.context, self.instance, arq_uuids) @mock.patch.object(nova.compute.manager.ComputeManager, '_get_bound_arq_resources') diff --git a/nova/tests/unit/compute/test_compute_utils.py b/nova/tests/unit/compute/test_compute_utils.py index 4d46fb0cd2..a261bd9280 100644 --- a/nova/tests/unit/compute/test_compute_utils.py +++ b/nova/tests/unit/compute/test_compute_utils.py @@ -1658,3 +1658,22 @@ class AcceleratorRequestTestCase(test.NoDBTestCase): mock_log_exc.assert_called_once() self.assertIn('Failed to delete accelerator requests for instance', mock_log_exc.call_args[0][0]) + + @mock.patch.object(cyborgclient, 'delete_arqs_by_uuid') + def test_delete_with_arq_uuid(self, mock_del_arq): + flavor = objects.Flavor(**test_flavor.fake_flavor) + instance = fake_instance.fake_instance_obj(self.context, flavor=flavor) + arq_uuids = [uuids.arq1, uuids.arq2] + compute_utils.delete_arqs_if_needed(self.context, instance, arq_uuids) + mock_del_arq.assert_called_once_with(arq_uuids) + + @mock.patch.object(cyborgclient, 'delete_arqs_by_uuid') + @mock.patch.object(cyborgclient, 'delete_arqs_for_instance') + def test_delete_with_arq_uuid_and_dp(self, mock_del_inst, mock_del_uuid): + flavor = objects.Flavor(**test_flavor.fake_flavor) + flavor['extra_specs'] = {'accel:device_profile': 'mydp'} + instance = fake_instance.fake_instance_obj(self.context, flavor=flavor) + arq_uuids = [uuids.arq1, uuids.arq2] + compute_utils.delete_arqs_if_needed(self.context, instance, arq_uuids) + mock_del_inst.assert_called_once_with(instance.uuid) + mock_del_uuid.assert_called_once_with(arq_uuids) diff --git a/nova/tests/unit/conductor/test_conductor.py b/nova/tests/unit/conductor/test_conductor.py index 23edab3a51..af845fca38 100644 --- a/nova/tests/unit/conductor/test_conductor.py +++ b/nova/tests/unit/conductor/test_conductor.py @@ -4144,7 +4144,25 @@ class ConductorTaskTestCase(_BaseTaskTestCase, test_compute.BaseTestCase): self.conductor_manager._cleanup_when_reschedule_fails( self.context, instance, exception=None, legacy_request_spec=None, requested_networks=None) - mock_del_arqs.assert_called_once_with(self.context, instance) + mock_del_arqs.assert_called_once_with( + self.context, instance, None) + + @mock.patch.object(conductor_manager.ComputeTaskManager, + '_cleanup_allocated_networks') + @mock.patch.object(conductor_manager.ComputeTaskManager, + '_set_vm_state_and_notify') + @mock.patch.object(compute_utils, 'delete_arqs_if_needed') + def test_cleanup_arqs_on_reschedule_by_uuid(self, mock_del_arqs, + mock_set_vm, mock_clean_net): + instance = fake_instance.fake_instance_obj(self.context) + request_tuples = [('123', '1.2.3.4', uuids.fakeid, + None, uuids.arq_uuid, None)] + requests = objects.NetworkRequestList.from_tuples(request_tuples) + self.conductor_manager._cleanup_when_reschedule_fails( + self.context, instance, exception=None, + legacy_request_spec=None, requested_networks=requests) + mock_del_arqs.assert_called_once_with(self.context, + instance, [uuids.arq_uuid]) def test_cleanup_allocated_networks_none_requested(self): # Tests that we don't deallocate networks if 'none' were specifically diff --git a/nova/tests/unit/network/test_neutron.py b/nova/tests/unit/network/test_neutron.py index 1a2426da84..3ec6fd8c38 100644 --- a/nova/tests/unit/network/test_neutron.py +++ b/nova/tests/unit/network/test_neutron.py @@ -7065,6 +7065,26 @@ class TestAPIPortbinding(TestAPIBase): self.api.delete_port_binding(self.context, port_id, 'fake-host') + @mock.patch('nova.accelerator.cyborg._CyborgClient.delete_arqs_by_uuid') + @mock.patch('nova.network.neutron.get_binding_profile') + @mock.patch('nova.network.neutron.API._show_port') + @mock.patch('nova.network.neutron.get_client') + def test_unbind_ports_clean_arq(self, mock_neutron, mock_show, + mock_bind, mock_delete_arq): + mock_client = mock.Mock() + mock_ctx = mock.Mock(is_admin=False) + ports = ["1"] + mock_show.return_value = {'id': uuids.port} + mock_bind.return_value = {'arq_uuid': self.arqs[0]['uuid'], + 'key': 'val'} + api = neutronapi.API() + api._unbind_ports(mock_ctx, ports, mock_neutron, mock_client) + mock_delete_arq.assert_called_once_with([self.arqs[0]['uuid']]) + # verify binding profile key 'arq_uuid' deleted + call_args = mock_client.update_port.call_args[0][1] + self.assertEqual(call_args['port']['binding:profile'], + {'key': 'val'}) + class TestAllocateForInstance(test.NoDBTestCase): def setUp(self):