diff --git a/nova/conf/rpc.py b/nova/conf/rpc.py index a74ef10de4..e5bd1b9898 100644 --- a/nova/conf/rpc.py +++ b/nova/conf/rpc.py @@ -27,6 +27,7 @@ instead of the global rpc_response_timeout value. Operations with RPC calls that utilize this value: * live migration +* scheduling Related options: diff --git a/nova/scheduler/client/__init__.py b/nova/scheduler/client/__init__.py index 2602879a3a..f2007ad58c 100644 --- a/nova/scheduler/client/__init__.py +++ b/nova/scheduler/client/__init__.py @@ -15,7 +15,6 @@ from nova.scheduler.client import query from nova.scheduler.client import report -from nova.scheduler import utils class SchedulerClient(object): @@ -25,7 +24,6 @@ class SchedulerClient(object): self.queryclient = query.SchedulerQueryClient() self.reportclient = report.SchedulerReportClient() - @utils.retry_select_destinations def select_destinations(self, context, spec_obj, instance_uuids, return_objects=False, return_alternates=False): return self.queryclient.select_destinations(context, spec_obj, diff --git a/nova/scheduler/rpcapi.py b/nova/scheduler/rpcapi.py index ca291e805b..a3d0109bdc 100644 --- a/nova/scheduler/rpcapi.py +++ b/nova/scheduler/rpcapi.py @@ -154,7 +154,9 @@ class SchedulerAPI(object): msg_args['filter_properties' ] = spec_obj.to_legacy_filter_properties_dict() version = '4.0' - cctxt = self.client.prepare(version=version) + cctxt = self.client.prepare( + version=version, call_monitor_timeout=CONF.rpc_response_timeout, + timeout=CONF.long_rpc_timeout) return cctxt.call(ctxt, 'select_destinations', **msg_args) def update_aggregates(self, ctxt, aggregates): diff --git a/nova/scheduler/utils.py b/nova/scheduler/utils.py index cc9e429135..e8efebbac9 100644 --- a/nova/scheduler/utils.py +++ b/nova/scheduler/utils.py @@ -15,12 +15,10 @@ """Utility methods for scheduling.""" import collections -import functools import re import sys from oslo_log import log as logging -import oslo_messaging as messaging from oslo_serialization import jsonutils from six.moves.urllib import parse @@ -936,37 +934,6 @@ def setup_instance_group(context, request_spec): request_spec.instance_group.members = group_info.members -def retry_on_timeout(retries=1): - """Retry the call in case a MessagingTimeout is raised. - - A decorator for retrying calls when a service dies mid-request. - - :param retries: Number of retries - :returns: Decorator - """ - def outer(func): - @functools.wraps(func) - def wrapped(*args, **kwargs): - attempt = 0 - while True: - try: - return func(*args, **kwargs) - except messaging.MessagingTimeout: - attempt += 1 - if attempt <= retries: - LOG.warning( - "Retrying %(name)s after a MessagingTimeout, " - "attempt %(attempt)s of %(retries)s.", - {'attempt': attempt, 'retries': retries, - 'name': func.__name__}) - else: - raise - return wrapped - return outer - -retry_select_destinations = retry_on_timeout(CONF.scheduler.max_attempts - 1) - - def request_is_rebuild(spec_obj): """Returns True if request is for a rebuild. diff --git a/nova/tests/unit/scheduler/test_client.py b/nova/tests/unit/scheduler/test_client.py index 5abe014c55..62df712b45 100644 --- a/nova/tests/unit/scheduler/test_client.py +++ b/nova/tests/unit/scheduler/test_client.py @@ -58,19 +58,7 @@ class SchedulerClientTestCase(test.NoDBTestCase): False] self.assertRaises(messaging.MessagingTimeout, self.client.select_destinations, *fake_args) - mock_select_destinations.assert_has_calls([mock.call(*fake_args)] * 2) - - @mock.patch.object(scheduler_query_client.SchedulerQueryClient, - 'select_destinations', side_effect=[ - messaging.MessagingTimeout(), mock.DEFAULT]) - def test_select_destinations_timeout_once(self, mock_select_destinations): - # scenario: the scheduler service times out & recovers after failure - fake_spec = objects.RequestSpec() - fake_spec.instance_uuid = uuids.instance - fake_args = ['ctxt', fake_spec, [fake_spec.instance_uuid], False, - False] - self.client.select_destinations(*fake_args) - mock_select_destinations.assert_has_calls([mock.call(*fake_args)] * 2) + mock_select_destinations.assert_called_once_with(*fake_args) @mock.patch.object(scheduler_query_client.SchedulerQueryClient, 'update_aggregates') diff --git a/nova/tests/unit/scheduler/test_rpcapi.py b/nova/tests/unit/scheduler/test_rpcapi.py index d18ad3ba32..3c56946975 100644 --- a/nova/tests/unit/scheduler/test_rpcapi.py +++ b/nova/tests/unit/scheduler/test_rpcapi.py @@ -19,12 +19,15 @@ Unit Tests for nova.scheduler.rpcapi import mock from oslo_utils.fixture import uuidsentinel as uuids +from nova import conf from nova import context from nova import exception as exc from nova import objects from nova.scheduler import rpcapi as scheduler_rpcapi from nova import test +CONF = conf.CONF + class SchedulerRpcAPITestCase(test.NoDBTestCase): def _test_scheduler_api(self, method, rpc_method, expected_args=None, @@ -45,6 +48,11 @@ class SchedulerRpcAPITestCase(test.NoDBTestCase): expected_kwargs = expected_args prepare_kwargs = {} + if method == 'select_destinations': + prepare_kwargs.update({ + 'call_monitor_timeout': CONF.rpc_response_timeout, + 'timeout': CONF.long_rpc_timeout + }) if expected_fanout: prepare_kwargs['fanout'] = True if expected_version: diff --git a/releasenotes/notes/bug-1795992-long_rpc_timeout-select_destinations-9712e8690160928f.yaml b/releasenotes/notes/bug-1795992-long_rpc_timeout-select_destinations-9712e8690160928f.yaml new file mode 100644 index 0000000000..2958b223e2 --- /dev/null +++ b/releasenotes/notes/bug-1795992-long_rpc_timeout-select_destinations-9712e8690160928f.yaml @@ -0,0 +1,7 @@ +--- +fixes: + - | + The ``long_rpc_timeout`` configuration option is now used for the RPC + call to the scheduler to select a host. This is in order to avoid a + timeout when scheduling multiple servers in a single request and/or when + the scheduler needs to process a large number of hosts.