Merge "Use long_rpc_timeout in select_destinations RPC call"

2018-11-21 23:51:14 +00:00
parent a5d63f7e9e 5af632e9ca
commit 1a1ea8e2aa
7 changed files with 20 additions and 49 deletions
@@ -27,6 +27,7 @@ instead of the global rpc_response_timeout value.
 Operations with RPC calls that utilize this value:

 * live migration
+* scheduling

 Related options:

@@ -15,7 +15,6 @@

 from nova.scheduler.client import query
 from nova.scheduler.client import report
-from nova.scheduler import utils


 class SchedulerClient(object):
@@ -25,7 +24,6 @@ class SchedulerClient(object):
        self.queryclient = query.SchedulerQueryClient()
        self.reportclient = report.SchedulerReportClient()

-    @utils.retry_select_destinations
    def select_destinations(self, context, spec_obj, instance_uuids,
            return_objects=False, return_alternates=False):
        return self.queryclient.select_destinations(context, spec_obj,
@@ -154,7 +154,9 @@ class SchedulerAPI(object):
            msg_args['filter_properties'
                     ] = spec_obj.to_legacy_filter_properties_dict()
            version = '4.0'
-        cctxt = self.client.prepare(version=version)
+        cctxt = self.client.prepare(
+            version=version, call_monitor_timeout=CONF.rpc_response_timeout,
+            timeout=CONF.long_rpc_timeout)
        return cctxt.call(ctxt, 'select_destinations', **msg_args)

    def update_aggregates(self, ctxt, aggregates):
@@ -15,12 +15,10 @@
 """Utility methods for scheduling."""

 import collections
-import functools
 import re
 import sys

 from oslo_log import log as logging
-import oslo_messaging as messaging
 from oslo_serialization import jsonutils
 from six.moves.urllib import parse

@@ -936,37 +934,6 @@ def setup_instance_group(context, request_spec):
        request_spec.instance_group.members = group_info.members


-def retry_on_timeout(retries=1):
-    """Retry the call in case a MessagingTimeout is raised.
-
-    A decorator for retrying calls when a service dies mid-request.
-
-    :param retries: Number of retries
-    :returns: Decorator
-    """
-    def outer(func):
-        @functools.wraps(func)
-        def wrapped(*args, **kwargs):
-            attempt = 0
-            while True:
-                try:
-                    return func(*args, **kwargs)
-                except messaging.MessagingTimeout:
-                    attempt += 1
-                    if attempt <= retries:
-                        LOG.warning(
-                            "Retrying %(name)s after a MessagingTimeout, "
-                            "attempt %(attempt)s of %(retries)s.",
-                            {'attempt': attempt, 'retries': retries,
-                             'name': func.__name__})
-                    else:
-                        raise
-        return wrapped
-    return outer
-
-retry_select_destinations = retry_on_timeout(CONF.scheduler.max_attempts - 1)
-
-
 def request_is_rebuild(spec_obj):
    """Returns True if request is for a rebuild.

@@ -58,19 +58,7 @@ class SchedulerClientTestCase(test.NoDBTestCase):
                False]
        self.assertRaises(messaging.MessagingTimeout,
                          self.client.select_destinations, *fake_args)
-        mock_select_destinations.assert_has_calls([mock.call(*fake_args)] * 2)
-
-    @mock.patch.object(scheduler_query_client.SchedulerQueryClient,
-                       'select_destinations', side_effect=[
-                           messaging.MessagingTimeout(), mock.DEFAULT])
-    def test_select_destinations_timeout_once(self, mock_select_destinations):
-        # scenario: the scheduler service times out & recovers after failure
-        fake_spec = objects.RequestSpec()
-        fake_spec.instance_uuid = uuids.instance
-        fake_args = ['ctxt', fake_spec, [fake_spec.instance_uuid], False,
-                False]
-        self.client.select_destinations(*fake_args)
-        mock_select_destinations.assert_has_calls([mock.call(*fake_args)] * 2)
+        mock_select_destinations.assert_called_once_with(*fake_args)

    @mock.patch.object(scheduler_query_client.SchedulerQueryClient,
                       'update_aggregates')
@@ -19,12 +19,15 @@ Unit Tests for nova.scheduler.rpcapi
 import mock
 from oslo_utils.fixture import uuidsentinel as uuids

+from nova import conf
 from nova import context
 from nova import exception as exc
 from nova import objects
 from nova.scheduler import rpcapi as scheduler_rpcapi
 from nova import test

+CONF = conf.CONF
+

 class SchedulerRpcAPITestCase(test.NoDBTestCase):
    def _test_scheduler_api(self, method, rpc_method, expected_args=None,
@@ -45,6 +48,11 @@ class SchedulerRpcAPITestCase(test.NoDBTestCase):
            expected_kwargs = expected_args

        prepare_kwargs = {}
+        if method == 'select_destinations':
+            prepare_kwargs.update({
+                'call_monitor_timeout': CONF.rpc_response_timeout,
+                'timeout': CONF.long_rpc_timeout
+            })
        if expected_fanout:
            prepare_kwargs['fanout'] = True
        if expected_version:
@@ -0,0 +1,7 @@
+---
+fixes:
+  - |
+    The ``long_rpc_timeout`` configuration option is now used for the RPC
+    call to the scheduler to select a host. This is in order to avoid a
+    timeout when scheduling multiple servers in a single request and/or when
+    the scheduler needs to process a large number of hosts.