diff --git a/nova/db/sqlalchemy/migration.py b/nova/db/sqlalchemy/migration.py index a8a544999f..d3e0bd5b4c 100644 --- a/nova/db/sqlalchemy/migration.py +++ b/nova/db/sqlalchemy/migration.py @@ -62,6 +62,40 @@ def db_sync(version=None, database='main', context=None): def db_version(database='main', context=None): repository = _find_migrate_repo(database) + + # NOTE(mdbooth): This is a crude workaround for races in _db_version. The 2 + # races we have seen in practise are: + # * versioning_api.db_version() fails because the migrate_version table + # doesn't exist, but meta.tables subsequently contains tables because + # another thread has already started creating the schema. This results in + # the 'Essex' error. + # * db_version_control() fails with pymysql.error.InternalError(1050) + # (Create table failed) because of a race in sqlalchemy-migrate's + # ControlledSchema._create_table_version, which does: + # if not table.exists(): table.create() + # This means that it doesn't raise the advertised + # DatabaseAlreadyControlledError, which we could have handled explicitly. + # + # I believe the correct fix should be: + # * Delete the Essex-handling code as unnecessary complexity which nobody + # should still need. + # * Fix the races in sqlalchemy-migrate such that version_control() always + # raises a well-defined error, and then handle that error here. + # + # Until we do that, though, we should be able to just try again if we + # failed for any reason. In both of the above races, trying again should + # succeed the second time round. + # + # For additional context, see: + # * https://bugzilla.redhat.com/show_bug.cgi?id=1652287 + # * https://bugs.launchpad.net/nova/+bug/1804652 + try: + return _db_version(repository, database, context) + except Exception: + return _db_version(repository, database, context) + + +def _db_version(repository, database, context): try: return versioning_api.db_version(get_engine(database, context=context), repository) diff --git a/nova/tests/unit/db/test_sqlalchemy_migration.py b/nova/tests/unit/db/test_sqlalchemy_migration.py index 5621431bea..17e0d80120 100644 --- a/nova/tests/unit/db/test_sqlalchemy_migration.py +++ b/nova/tests/unit/db/test_sqlalchemy_migration.py @@ -159,6 +159,51 @@ class TestDbVersion(test.NoDBTestCase): engine_calls = [mock.call(database, context=None)] * 3 self.assertEqual(engine_calls, mock_get_engine.call_args_list) + def test_db_version_init_race(self, mock_get_engine, mock_db_version, + mock_find_repo): + # This test exercises bug 1804652 by causing + # versioning_api.version_contro() to raise an unhandleable error the + # first time it is called. + database = 'api' + mock_get_engine.return_value = 'engine' + exc = versioning_exceptions.DatabaseNotControlledError() + mock_db_version.side_effect = [exc, ''] + metadata = mock.MagicMock() + metadata.tables.return_value = [] + with mock.patch.object(sqlalchemy, 'MetaData', + metadata), mock.patch.object(migration, + 'db_version_control') as mock_version_control: + # db_version_control raises an unhandleable error because we were + # racing to initialise with another process. + mock_version_control.side_effect = test.TestingException + migration.db_version(database) + mock_version_control.assert_called_once_with(0, + database, + context=None) + db_version_calls = [mock.call('engine', 'repo')] * 2 + self.assertEqual(db_version_calls, mock_db_version.call_args_list) + engine_calls = [mock.call(database, context=None)] * 3 + self.assertEqual(engine_calls, mock_get_engine.call_args_list) + + def test_db_version_raise_on_error(self, mock_get_engine, mock_db_version, + mock_find_repo): + # This test asserts that we will still raise a persistent error after + # working around bug 1804652. + database = 'api' + mock_get_engine.return_value = 'engine' + mock_db_version.side_effect = \ + versioning_exceptions.DatabaseNotControlledError + metadata = mock.MagicMock() + metadata.tables.return_value = [] + with mock.patch.object(sqlalchemy, 'MetaData', + metadata), mock.patch.object(migration, + 'db_version_control') as mock_version_control: + # db_version_control raises an unhandleable error because we were + # racing to initialise with another process. + mock_version_control.side_effect = test.TestingException + self.assertRaises(test.TestingException, + migration.db_version, database) + @mock.patch.object(migration, '_find_migrate_repo', return_value='repo') @mock.patch.object(migration, 'get_engine', return_value='engine')