diff --git a/nova/scheduler/manager.py b/nova/scheduler/manager.py index 620519d403..902cc0e631 100644 --- a/nova/scheduler/manager.py +++ b/nova/scheduler/manager.py @@ -37,6 +37,7 @@ from nova import manager from nova import objects from nova.objects import fields as fields_obj from nova.objects import host_mapping as host_mapping_obj +from nova.objects import service as obj_service from nova import quota from nova import rpc from nova.scheduler.client import report @@ -108,6 +109,18 @@ class SchedulerManager(manager.Manager): spacing=CONF.scheduler.discover_hosts_in_cells_interval, run_immediately=True) def _discover_hosts_in_cells(self, context): + services = obj_service.ServiceList.get_by_binary( + context, 'nova-scheduler') + leader = sorted( + [service.host for service in services + if self.servicegroup_api.service_is_up(service)])[0] + + if CONF.host != leader: + LOG.debug( + f"Current leader is {leader}, " + f"skipping discover hosts on {CONF.host}") + return + global HOST_MAPPING_EXISTS_WARNING try: host_mappings = host_mapping_obj.discover_hosts(context) @@ -124,9 +137,8 @@ class SchedulerManager(manager.Manager): ) except exception.HostMappingExists as exp: msg = ( - 'This periodic task should only be enabled on a single ' - 'scheduler to prevent collisions between multiple ' - 'schedulers: %s' % str(exp) + 'This periodic task should only be enabled if discover hosts ' + 'is not run via nova-manage, schedulers: %s' % str(exp) ) if not HOST_MAPPING_EXISTS_WARNING: LOG.warning(msg) diff --git a/nova/tests/unit/scheduler/test_manager.py b/nova/tests/unit/scheduler/test_manager.py index e992fe6034..885fe51ec2 100644 --- a/nova/tests/unit/scheduler/test_manager.py +++ b/nova/tests/unit/scheduler/test_manager.py @@ -23,10 +23,13 @@ from keystoneauth1 import exceptions as ks_exc import oslo_messaging as messaging from oslo_serialization import jsonutils from oslo_utils.fixture import uuidsentinel as uuids +from oslo_utils import timeutils + from nova import context from nova import exception from nova import objects +from nova.objects import service from nova.scheduler import filters from nova.scheduler import host_manager from nova.scheduler import manager @@ -1658,29 +1661,50 @@ class SchedulerManagerTestCase(test.NoDBTestCase): self.manager.reset() mock_refresh.assert_called_once_with() + @mock.patch('nova.objects.service.ServiceList.get_by_binary') @mock.patch('nova.objects.host_mapping.discover_hosts') - def test_discover_hosts(self, mock_discover): + def test_discover_hosts(self, mock_discover, mock_get_by_binary): cm1 = objects.CellMapping(name='cell1') cm2 = objects.CellMapping(name='cell2') mock_discover.return_value = [objects.HostMapping(host='a', cell_mapping=cm1), objects.HostMapping(host='b', cell_mapping=cm2)] - self.manager._discover_hosts_in_cells(mock.sentinel.context) + self.flags(host="test-host") + mock_get_by_binary.return_value = service.ServiceList( + objects=[ + service.Service( + host="test-host", forced_down=False, + last_seen_up=timeutils.utcnow()) + ]) + self.manager._discover_hosts_in_cells(mock.sentinel.context) + mock_get_by_binary.assert_called_once_with( + mock.sentinel.context, 'nova-scheduler') + + @mock.patch('nova.objects.service.ServiceList.get_by_binary') @mock.patch('nova.scheduler.manager.LOG.debug') @mock.patch('nova.scheduler.manager.LOG.warning') @mock.patch('nova.objects.host_mapping.discover_hosts') - def test_discover_hosts_duplicate_host_mapping(self, mock_discover, - mock_log_warning, - mock_log_debug): + def test_discover_hosts_duplicate_host_mapping( + self, mock_discover, mock_log_warning, mock_log_debug, + mock_get_by_binary): # This tests the scenario of multiple schedulers running discover_hosts # at the same time. mock_discover.side_effect = exception.HostMappingExists(name='a') + self.flags(host="test-host") + mock_get_by_binary.return_value = service.ServiceList( + objects=[ + service.Service( + host="test-host", forced_down=False, + last_seen_up=timeutils.utcnow()) + ]) self.manager._discover_hosts_in_cells(mock.sentinel.context) - msg = ("This periodic task should only be enabled on a single " - "scheduler to prevent collisions between multiple " - "schedulers: Host 'a' mapping already exists") + mock_get_by_binary.assert_called_once_with( + mock.sentinel.context, 'nova-scheduler') + msg = ("This periodic task should only be enabled if discover hosts " + "is not run via nova-manage, " + "schedulers: Host 'a' mapping already exists") mock_log_warning.assert_called_once_with(msg) mock_log_debug.assert_not_called() # Second collision should log at debug, not warning. @@ -1689,6 +1713,33 @@ class SchedulerManagerTestCase(test.NoDBTestCase): mock_log_warning.assert_not_called() mock_log_debug.assert_called_once_with(msg) + @mock.patch('nova.objects.service.ServiceList.get_by_binary') + @mock.patch('nova.scheduler.manager.LOG.debug') + @mock.patch('nova.objects.host_mapping.discover_hosts') + def test_discover_hosts_not_leader( + self, mock_discover, mock_log_debug, mock_get_by_binary): + # This tests the scenario of multiple schedulers running discover_hosts + # at the same time. + mock_discover.side_effect = exception.HostMappingExists(name='a') + self.flags(host="test-host-2") + mock_get_by_binary.return_value = service.ServiceList( + objects=[ + service.Service( + host="test-host-1", forced_down=False, + last_seen_up=timeutils.utcnow()), + service.Service( + host="test-host-2", forced_down=False, + last_seen_up=timeutils.utcnow()) + ] + ) + self.manager._discover_hosts_in_cells(mock.sentinel.context) + mock_get_by_binary.assert_called_once_with( + mock.sentinel.context, 'nova-scheduler') + msg = ( + "Current leader is test-host-1, skipping discover " + "hosts on test-host-2") + mock_log_debug.assert_called_once_with(msg) + @mock.patch('nova.scheduler.client.report.report_client_singleton') @mock.patch.object(manager, 'LOG') @mock.patch('nova.scheduler.host_manager.HostManager') diff --git a/releasenotes/notes/distributed-discover-hosts-perodic-b983f528516dec14.yaml b/releasenotes/notes/distributed-discover-hosts-perodic-b983f528516dec14.yaml new file mode 100644 index 0000000000..6d8795d4d7 --- /dev/null +++ b/releasenotes/notes/distributed-discover-hosts-perodic-b983f528516dec14.yaml @@ -0,0 +1,12 @@ +--- +features: + - | + The nova scheduler now supports enabling the nova cell discover hosts + perodic task on multiple schedulers. In prior release enabling this + feature required setting the discover_hosts_in_cells_interval option + to a value greater than 0 in at most one scheduler, with the 2025.1 + release it was possible to enable the feature on multiple schedulers + via the introduction of leader election. This simplifies deployment + of nova in kubernetes by allowing the operator to deploy + multiple schedulers and have them elect a single leader that will run + the discover hosts perodic task.