From b759514641478842cb24e72005ae09eec404f19b Mon Sep 17 00:00:00 2001 From: Jeegn Chen Date: Wed, 3 Dec 2014 14:03:00 +0800 Subject: [PATCH] Rollback is needed if initialize_connection times out During attach_volume operation, initialize_connection in Cinder is called. If timeout happens during initialize_connection, the Cinder API unreserve is called by the compute manager and the volume state is changed back to available. However, volume could be already mapped to the host on the array. This leaves the database and array out of sync. If rescan happens on the host after this, the volume will be visible to the host. Then if the so-called available volume is deleted, a faulty device will remain in the host. initialize_connection is also called in other cases such as post live migration. The timeout exception during initialize_connection should be handled and a rollback should be triggered by calling terminate_connection in Cinder. Co-Authored-By: xing-yang Change-Id: I8c195b7cfc6e9b296fc3b8f5ce56bb5e130769e8 Closes-Bug: #1387807 --- nova/tests/unit/volume/test_cinder.py | 30 +++++++++++++++++++++++++++ nova/volume/cinder.py | 30 +++++++++++++++++++++++++-- 2 files changed, 58 insertions(+), 2 deletions(-) diff --git a/nova/tests/unit/volume/test_cinder.py b/nova/tests/unit/volume/test_cinder.py index 7b51c2af96..b3802a6d12 100644 --- a/nova/tests/unit/volume/test_cinder.py +++ b/nova/tests/unit/volume/test_cinder.py @@ -286,6 +286,36 @@ class CinderApiTestCase(test.NoDBTestCase): self.api.initialize_connection(self.ctx, 'id1', 'connector') + @mock.patch('nova.volume.cinder.cinderclient') + def test_initialize_connection_rollback(self, mock_cinderclient): + mock_cinderclient.return_value.volumes.\ + initialize_connection.side_effect = ( + cinder_exception.ClientException(500, "500")) + + connector = {'host': 'host1'} + ex = self.assertRaises(cinder_exception.ClientException, + self.api.initialize_connection, + self.ctx, + 'id1', + connector) + self.assertEqual(500, ex.code) + mock_cinderclient.return_value.volumes.\ + terminate_connection.assert_called_once_with('id1', connector) + + @mock.patch('nova.volume.cinder.cinderclient') + def test_initialize_connection_no_rollback(self, mock_cinderclient): + mock_cinderclient.return_value.volumes.\ + initialize_connection.side_effect = test.TestingException + + connector = {'host': 'host1'} + self.assertRaises(test.TestingException, + self.api.initialize_connection, + self.ctx, + 'id1', + connector) + self.assertFalse(mock_cinderclient.return_value.volumes. + terminate_connection.called) + def test_terminate_connection(self): cinder.cinderclient(self.ctx).AndReturn(self.cinderclient) self.mox.StubOutWithMock(self.cinderclient.volumes, diff --git a/nova/volume/cinder.py b/nova/volume/cinder.py index 1cd7cdf4ad..66d5aa3313 100644 --- a/nova/volume/cinder.py +++ b/nova/volume/cinder.py @@ -28,12 +28,14 @@ from keystoneclient import exceptions as keystone_exception from keystoneclient import session from oslo_config import cfg from oslo_log import log as logging +from oslo_utils import excutils from oslo_utils import strutils import six from nova import availability_zones as az from nova import exception from nova.i18n import _ +from nova.i18n import _LE from nova.i18n import _LW cinder_opts = [ @@ -347,8 +349,32 @@ class API(object): @translate_volume_exception def initialize_connection(self, context, volume_id, connector): - return cinderclient(context).volumes.initialize_connection(volume_id, - connector) + try: + return cinderclient(context).volumes.initialize_connection( + volume_id, connector) + except cinder_exception.ClientException as ex: + with excutils.save_and_reraise_exception(): + LOG.error(_LE('Initialize connection failed for volume ' + '%(vol)s on host %(host)s. Error: %(msg)s ' + 'Code: %(code)s. Attempting to terminate ' + 'connection.'), + {'vol': volume_id, + 'host': connector.get('host'), + 'msg': six.text_type(ex), + 'code': ex.code}) + try: + self.terminate_connection(context, volume_id, connector) + except Exception as exc: + LOG.error(_LE('Connection between volume %(vol)s and host ' + '%(host)s might have succeeded, but attempt ' + 'to terminate connection has failed. ' + 'Validate the connection and determine if ' + 'manual cleanup is needed. Error: %(msg)s ' + 'Code: %(code)s.'), + {'vol': volume_id, + 'host': connector.get('host'), + 'msg': six.text_type(exc), + 'code': exc.code}) @translate_volume_exception def terminate_connection(self, context, volume_id, connector):