From 9f8b05fd90b5f5c8618a470b43fe19a2264a5449 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Ribaud?= Date: Fri, 30 Sep 2022 21:46:29 +0200 Subject: [PATCH] Add instance.share_attach_error notification This patch add a notification when a share can not be mounted on the compute host. Manila is the OpenStack Shared Filesystems service. These series of patches implement changes required in Nova to allow the shares provided by Manila to be associated with and attached to instances using virtiofs. Implements: blueprint libvirt-virtiofs-attach-manila-shares Change-Id: Idd44f5ad96c0a1dd62eba5d283e614c324854a80 --- .../instance-share_attach-error.json | 22 +++ nova/compute/manager.py | 64 +++++--- nova/notifications/objects/instance.py | 1 + nova/tests/functional/integrated_helpers.py | 6 + .../test_instance.py | 145 +++++++++++++++++- nova/tests/unit/compute/test_compute_mgr.py | 70 ++++++++- 6 files changed, 285 insertions(+), 23 deletions(-) create mode 100644 doc/notification_samples/instance-share_attach-error.json diff --git a/doc/notification_samples/instance-share_attach-error.json b/doc/notification_samples/instance-share_attach-error.json new file mode 100644 index 0000000000..622b1e63fa --- /dev/null +++ b/doc/notification_samples/instance-share_attach-error.json @@ -0,0 +1,22 @@ +{ + "event_type": "instance.share_attach.error", + "payload": { + "$ref": "common_payloads/InstanceActionSharePayload.json#", + "nova_object.data": { + "fault": { + "nova_object.data": { + "exception": "ShareAccessGrantError", + "exception_message": "Share access could not be granted to share id 8db0037b-e98f-4bde-ae71-f96a077c19a4.\nReason: Connection timed out.", + "function_name": "_execute_mock_call", + "module_name": "unittest.mock", + "traceback": "Traceback (most recent call last):\n File \"nova/compute/manager.py\", line ..." + }, + "nova_object.name": "ExceptionPayload", + "nova_object.namespace": "nova", + "nova_object.version": "1.1" + } + } + }, + "priority": "ERROR", + "publisher_id": "nova-api:compute" +} diff --git a/nova/compute/manager.py b/nova/compute/manager.py index b1344eaba6..668074ae3d 100644 --- a/nova/compute/manager.py +++ b/nova/compute/manager.py @@ -4637,7 +4637,6 @@ class ComputeManager(manager.Manager): @utils.synchronized(share_mapping.share_id) def _allow_share(context, instance, share_mapping): - def _apply_policy(): # self.manila_api.lock(share_mapping.share_id) # Explicitly locking the share is not needed as @@ -4684,6 +4683,15 @@ class ComputeManager(manager.Manager): ) try: + compute_utils.notify_about_share_attach_detach( + context, + instance, + instance.host, + action=fields.NotificationAction.SHARE_ATTACH, + phase=fields.NotificationPhase.START, + share_id=share_mapping.share_id + ) + share_mapping.set_access_according_to_protocol() if not self.manila_api.has_access( @@ -4700,6 +4708,15 @@ class ComputeManager(manager.Manager): share_mapping, fields.ShareMappingStatus.INACTIVE ) + compute_utils.notify_about_share_attach_detach( + context, + instance, + instance.host, + action=fields.NotificationAction.SHARE_ATTACH, + phase=fields.NotificationPhase.END, + share_id=share_mapping.share_id + ) + except ( exception.ShareNotFound, exception.ShareProtocolNotSupported, @@ -4708,6 +4725,15 @@ class ComputeManager(manager.Manager): self._set_share_mapping_status( share_mapping, fields.ShareMappingStatus.ERROR ) + compute_utils.notify_about_share_attach_detach( + context, + instance, + instance.host, + action=fields.NotificationAction.SHARE_ATTACH, + phase=fields.NotificationPhase.ERROR, + share_id=share_mapping.share_id, + exception=e + ) LOG.error(e.format_message()) raise except ( @@ -4716,6 +4742,15 @@ class ComputeManager(manager.Manager): self._set_share_mapping_status( share_mapping, fields.ShareMappingStatus.ERROR ) + compute_utils.notify_about_share_attach_detach( + context, + instance, + instance.host, + action=fields.NotificationAction.SHARE_ATTACH, + phase=fields.NotificationPhase.ERROR, + share_id=share_mapping.share_id, + exception=e + ) LOG.error( "%s: %s error from url: %s, %s", e.message, @@ -4728,29 +4763,20 @@ class ComputeManager(manager.Manager): self._set_share_mapping_status( share_mapping, fields.ShareMappingStatus.ERROR ) + compute_utils.notify_about_share_attach_detach( + context, + instance, + instance.host, + action=fields.NotificationAction.SHARE_ATTACH, + phase=fields.NotificationPhase.ERROR, + share_id=share_mapping.share_id, + exception=e + ) LOG.error(e) raise - compute_utils.notify_about_share_attach_detach( - context, - instance, - instance.host, - action=fields.NotificationAction.SHARE_ATTACH, - phase=fields.NotificationPhase.START, - share_id=share_mapping.share_id - ) - _allow_share(context, instance, share_mapping) - compute_utils.notify_about_share_attach_detach( - context, - instance, - instance.host, - action=fields.NotificationAction.SHARE_ATTACH, - phase=fields.NotificationPhase.END, - share_id=share_mapping.share_id - ) - @messaging.expected_exceptions(NotImplementedError) @wrap_exception() def deny_share(self, context, instance, share_mapping): diff --git a/nova/notifications/objects/instance.py b/nova/notifications/objects/instance.py index c29c7cf7e4..18c895f797 100644 --- a/nova/notifications/objects/instance.py +++ b/nova/notifications/objects/instance.py @@ -683,6 +683,7 @@ class InstanceActionVolumeNotification(base.NotificationBase): @base.notification_sample('instance-share_attach-start.json') +@base.notification_sample('instance-share_attach-error.json') @base.notification_sample('instance-share_attach-end.json') @base.notification_sample('instance-share_detach-start.json') @base.notification_sample('instance-share_detach-end.json') diff --git a/nova/tests/functional/integrated_helpers.py b/nova/tests/functional/integrated_helpers.py index 045f22ace7..2cb83ef68e 100644 --- a/nova/tests/functional/integrated_helpers.py +++ b/nova/tests/functional/integrated_helpers.py @@ -565,6 +565,12 @@ class InstanceHelperMixin: self.notifier.wait_for_versioned_notifications( 'instance.share_attach.end') + def _attach_share_with_error(self, server, share_id): + self.api.post_server_share( + server['id'], {"share": {"share_id": share_id}}) + self.notifier.wait_for_versioned_notifications( + 'instance.share_attach.error') + def _detach_share(self, server, share_id): self.api.delete_server_share(server['id'], share_id) self.notifier.wait_for_versioned_notifications( diff --git a/nova/tests/functional/notification_sample_tests/test_instance.py b/nova/tests/functional/notification_sample_tests/test_instance.py index 1c7487f480..b0c19740c2 100644 --- a/nova/tests/functional/notification_sample_tests/test_instance.py +++ b/nova/tests/functional/notification_sample_tests/test_instance.py @@ -20,6 +20,7 @@ from nova.tests.functional.api import client from nova.tests.functional.notification_sample_tests \ import notification_sample_base from nova.volume import cinder +from oslo_concurrency import processutils class TestInstanceNotificationSampleWithMultipleCompute( @@ -344,7 +345,7 @@ class TestInstanceNotificationSample( self.useFixture(self.neutron) self.cinder = fixtures.CinderFixture(self) self.useFixture(self.cinder) - self.useFixture(fixtures.ManilaFixture()) + self.manila_fixture = self.useFixture(fixtures.ManilaFixture()) def _wait_until_swap_volume(self, server, volume_id): for i in range(50): @@ -392,6 +393,7 @@ class TestInstanceNotificationSample( self._test_lock_unlock_instance, self._test_lock_unlock_instance_with_reason, self._test_share_attach_detach, + self._test_share_attach_error, ] for action in actions: @@ -1786,7 +1788,6 @@ class TestInstanceNotificationSample( }, actual=self.notifier.versioned_notifications[1]) - def _test_share_detach(self, server): # Shutdown server self.notifier.reset() self.api.post_server_action(server['id'], {'os-stop': {}}) @@ -1800,8 +1801,10 @@ class TestInstanceNotificationSample( 'uuid': server['id'], 'state': 'active', 'power_state': 'running', + 'shares': expected_shares }, actual=self.notifier.versioned_notifications[0]) + expected_shares[0]['nova_object.data']['status'] = 'inactive' self._verify_notification( 'instance-power_off-end', replacements={ @@ -1809,6 +1812,7 @@ class TestInstanceNotificationSample( 'uuid': server['id'], 'state': 'stopped', 'power_state': 'shutdown', + 'shares': expected_shares }, actual=self.notifier.versioned_notifications[1]) @@ -1819,6 +1823,7 @@ class TestInstanceNotificationSample( self.assertEqual(2, len(self.notifier.versioned_notifications), self.notifier.versioned_notifications) + expected_shares[0]['nova_object.data']['status'] = 'detaching' self._verify_notification( 'instance-share_detach-start', replacements={ @@ -1826,6 +1831,7 @@ class TestInstanceNotificationSample( 'uuid': server['id'], 'state': 'stopped', 'power_state': 'shutdown', + 'shares': expected_shares }, actual=self.notifier.versioned_notifications[0]) self._verify_notification( @@ -1863,6 +1869,141 @@ class TestInstanceNotificationSample( }, actual=self.notifier.versioned_notifications[1]) + def _test_share_attach_error(self, server): + + expected_shares = [ + { + "nova_object.name": "SharePayload", + "nova_object.namespace": "nova", + "nova_object.version": "1.0", + "nova_object.data": { + "share_mapping_uuid": ( + "f7c1726d-7622-42b3-8b2c-4473239d60d1"), + "share_id": "e8debdc0-447a-4376-a10a-4cd9122d7986", + "status": "attaching", + "tag": "e8debdc0-447a-4376-a10a-4cd9122d7986", + # 'export_location': '10.0.0.50:/mnt/foo', + }, + } + ] + + self.api.post_server_action(server['id'], {'os-stop': {}}) + self._wait_for_state_change(server, expected_status='SHUTOFF') + self.notifier.reset() + + with mock.patch( + 'nova.virt.fake.FakeDriver.mount_share', + side_effect=exception.ShareMountError( + share_id='8db0037b-e98f-4bde-ae71-f96a077c19a4', + server_id=server['id'], + reason=processutils.ProcessExecutionError( + stdout='This is stdout', + stderr='This is stderror', + exit_code=1, + cmd="mount" + ) + ) + ): + + # Simulate an error attaching the share. + self.manila_fixture.mock_allow.side_effect = ( + exception.ShareAccessGrantError( + share_id="8db0037b-e98f-4bde-ae71-f96a077c19a4", + reason="Connection timed out" + ) + ) + + self._attach_share_with_error( + server, "e8debdc0-447a-4376-a10a-4cd9122d7986") + + share_info = self._get_share( + server, "e8debdc0-447a-4376-a10a-4cd9122d7986", admin=True + ) + expected_shares[0]["nova_object.data"][ + "share_mapping_uuid" + ] = share_info["uuid"] + + # 0: instance-share_attach-start + # 1: instance-share_attach-error + # 2: compute.exception + self.assertEqual(3, len(self.notifier.versioned_notifications), + self.notifier.versioned_notifications) + self._verify_notification( + 'instance-share_attach-start', + replacements={ + 'reservation_id': server['reservation_id'], + 'uuid': server['id'], + 'state': 'stopped', + 'power_state': 'shutdown', + 'shares': expected_shares, + }, + actual=self.notifier.versioned_notifications[0]) + expected_shares[0]["nova_object.data"]["status"] = "error" + self._verify_notification( + 'instance-share_attach-error', + replacements={ + 'reservation_id': server['reservation_id'], + 'uuid': server['id'], + 'state': 'stopped', + 'power_state': 'shutdown', + 'shares': expected_shares, + 'fault.traceback': self.ANY + }, + actual=self.notifier.versioned_notifications[1]) + + self._wait_for_notification('compute.exception') + + # Restart server + self.notifier.reset() + self.api.post_server_action(server['id'], {'os-start': {}}) + self._wait_for_state_change(server, expected_status='ERROR') + + self.assertTrue( + any( + [ + item + for item in self.notifier.versioned_notifications + if item["event_type"] == "compute.exception" + ] + ) + ) + + # Detach share despite the share error + self.notifier.reset() + self._detach_share( + server, 'e8debdc0-447a-4376-a10a-4cd9122d7986') + + expected_shares[0]["nova_object.data"]["status"] = "detaching" + self.assertEqual(2, len(self.notifier.versioned_notifications), + self.notifier.versioned_notifications) + self._verify_notification( + 'instance-share_detach-start', + replacements={ + 'reservation_id': server['reservation_id'], + 'uuid': server['id'], + 'state': 'error', + 'shares': expected_shares, + 'power_state': 'shutdown'}, + actual=self.notifier.versioned_notifications[0]) + expected_shares[0]['nova_object.data']['status'] = 'detaching' + self._verify_notification( + 'instance-share_detach-end', + replacements={ + 'reservation_id': server['reservation_id'], + 'uuid': server['id'], + 'state': 'error', + 'power_state': 'shutdown', + }, + actual=self.notifier.versioned_notifications[1]) + + # Reboot server + self.notifier.reset() + + post = {'reboot': {'type': 'HARD'}} + self.api.post_server_action(server['id'], post) + self._wait_for_notification('instance.reboot.start') + self._wait_for_notification('instance.reboot.end') + def _test_rescue_unrescue_server(self, server): # Both "rescue" and "unrescue" notification asserts are made here # rescue notification asserts diff --git a/nova/tests/unit/compute/test_compute_mgr.py b/nova/tests/unit/compute/test_compute_mgr.py index 46a3d7a5d7..f142ea8c43 100644 --- a/nova/tests/unit/compute/test_compute_mgr.py +++ b/nova/tests/unit/compute/test_compute_mgr.py @@ -2493,7 +2493,7 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase, "fake-host", action=fields.NotificationAction.SHARE_ATTACH, phase=fields.NotificationPhase.START, - share_id=share_mapping.share_id + share_id=share_mapping.share_id, ), mock.call( mock.ANY, @@ -2501,7 +2501,73 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase, "fake-host", action=fields.NotificationAction.SHARE_ATTACH, phase=fields.NotificationPhase.END, - share_id=share_mapping.share_id + share_id=share_mapping.share_id, + ), + ]) + + @mock.patch( + 'nova.compute.utils.notify_about_share_attach_detach', + return_value=None + ) + @mock.patch('nova.share.manila.API.allow') + @mock.patch('nova.share.manila.API.get_access') + @mock.patch('nova.objects.share_mapping.ShareMapping.save') + def test_allow_share_fails_share_grant_failure( + self, mock_db, mock_get_access, mock_allow, mock_notifications + ): + self.flags(shutdown_retry_interval=20, group='compute') + instance = fake_instance.fake_instance_obj( + self.context, + uuid=uuids.instance, + vm_state=vm_states.ACTIVE, + task_state=task_states.POWERING_OFF) + mock_get_access.side_effect = [None, self.get_fake_share_access()] + # Ensure CONF.my_shared_fs_storage_ip default is my_ip + self.flags(my_ip="10.0.0.2") + self.assertEqual(CONF.my_shared_fs_storage_ip, '10.0.0.2') + # Set CONF.my_shared_fs_storage_ip to ensure it is used by the code + self.flags(my_shared_fs_storage_ip="192.168.0.1") + compute_ip = CONF.my_shared_fs_storage_ip + self.assertEqual(compute_ip, '192.168.0.1') + share_mapping = self.get_fake_share_mapping() + + exc = exception.ShareAccessGrantError( + share_id=share_mapping.share_id, + reason="fake_reason" + ) + mock_allow.side_effect = exc + + self.assertRaises( + exception.ShareAccessGrantError, + self.compute.allow_share, + self.context, + instance, + share_mapping, + ) + + self.assertEqual(share_mapping.status, 'error') + + mock_get_access.assert_called_with( + self.context, share_mapping.share_id, 'ip', compute_ip) + mock_allow.assert_called_once_with( + mock.ANY, share_mapping.share_id, 'ip', compute_ip, 'rw') + mock_notifications.assert_has_calls([ + mock.call( + mock.ANY, + instance, + "fake-host", + action=fields.NotificationAction.SHARE_ATTACH, + phase=fields.NotificationPhase.START, + share_id=share_mapping.share_id, + ), + mock.call( + mock.ANY, + instance, + "fake-host", + action=fields.NotificationAction.SHARE_ATTACH, + phase=fields.NotificationPhase.ERROR, + share_id=share_mapping.share_id, + exception=exc ), ])