From 5af632e9cab670cc25c2f627bb0d4c0a02258277 Mon Sep 17 00:00:00 2001
From: Matt Riedemann <mriedem.os@gmail.com>
Date: Wed, 3 Oct 2018 18:57:45 -0400
Subject: [PATCH] Use long_rpc_timeout in select_destinations RPC call

Conductor RPC calls the scheduler to get hosts during
server create, which in a multi-create request with a
lot of servers and the default rpc_response_timeout, can
trigger a MessagingTimeout. Due to the old
retry_select_destinations decorator, conductor will retry
the select_destinations RPC call up to max_attempts times,
so thrice by default. This can clobber the scheduler and
placement while the initial scheduler worker is still
trying to process the beefy request and allocate resources
in placement.

This has been recreated in a devstack test patch [1] and
shown to fail with 1000 instances in a single request with
the default rpc_response_timeout of 60 seconds. Changing the
rpc_response_timeout to 300 avoids the MessagingTimeout and
retry loop.

Since Rocky we have the long_rpc_timeout config option which
defaults to 1800 seconds. The RPC client can thus be changed
to heartbeat the scheduler service during the RPC call every
$rpc_response_timeout seconds with a hard timeout of
$long_rpc_timeout. That change is made here.

As a result, the problematic retry_select_destinations
decorator is also no longer necessary and removed here. That
decorator was added in I2b891bf6d0a3d8f45fd98ca54a665ae78eab78b3
and was a hack for scheduler high availability where a
MessagingTimeout was assumed to be a result of the scheduler
service dying so retrying the request was reasonable to hit
another scheduler worker, but is clearly not sufficient
in the large multi-create case, and long_rpc_timeout is a
better fit for that HA type scenario to heartbeat the scheduler
service.

[1] https://review.openstack.org/507918/

Change-Id: I87d89967bbc5fbf59cf44d9a63eb6e9d477ac1f3
Closes-Bug: #1795992
---
 nova/conf/rpc.py                              |  1 +
 nova/scheduler/client/__init__.py             |  2 --
 nova/scheduler/rpcapi.py                      |  4 ++-
 nova/scheduler/utils.py                       | 33 -------------------
 nova/tests/unit/scheduler/test_client.py      | 14 +-------
 nova/tests/unit/scheduler/test_rpcapi.py      |  8 +++++
 ...-select_destinations-9712e8690160928f.yaml |  7 ++++
 7 files changed, 20 insertions(+), 49 deletions(-)
 create mode 100644 releasenotes/notes/bug-1795992-long_rpc_timeout-select_destinations-9712e8690160928f.yaml

diff --git a/nova/conf/rpc.py b/nova/conf/rpc.py
index a74ef10de4..e5bd1b9898 100644
--- a/nova/conf/rpc.py
+++ b/nova/conf/rpc.py
@@ -27,6 +27,7 @@ instead of the global rpc_response_timeout value.
 Operations with RPC calls that utilize this value:
 
 * live migration
+* scheduling
 
 Related options:
 
diff --git a/nova/scheduler/client/__init__.py b/nova/scheduler/client/__init__.py
index 2602879a3a..f2007ad58c 100644
--- a/nova/scheduler/client/__init__.py
+++ b/nova/scheduler/client/__init__.py
@@ -15,7 +15,6 @@
 
 from nova.scheduler.client import query
 from nova.scheduler.client import report
-from nova.scheduler import utils
 
 
 class SchedulerClient(object):
@@ -25,7 +24,6 @@ class SchedulerClient(object):
         self.queryclient = query.SchedulerQueryClient()
         self.reportclient = report.SchedulerReportClient()
 
-    @utils.retry_select_destinations
     def select_destinations(self, context, spec_obj, instance_uuids,
             return_objects=False, return_alternates=False):
         return self.queryclient.select_destinations(context, spec_obj,
diff --git a/nova/scheduler/rpcapi.py b/nova/scheduler/rpcapi.py
index ca291e805b..a3d0109bdc 100644
--- a/nova/scheduler/rpcapi.py
+++ b/nova/scheduler/rpcapi.py
@@ -154,7 +154,9 @@ class SchedulerAPI(object):
             msg_args['filter_properties'
                      ] = spec_obj.to_legacy_filter_properties_dict()
             version = '4.0'
-        cctxt = self.client.prepare(version=version)
+        cctxt = self.client.prepare(
+            version=version, call_monitor_timeout=CONF.rpc_response_timeout,
+            timeout=CONF.long_rpc_timeout)
         return cctxt.call(ctxt, 'select_destinations', **msg_args)
 
     def update_aggregates(self, ctxt, aggregates):
diff --git a/nova/scheduler/utils.py b/nova/scheduler/utils.py
index cc9e429135..e8efebbac9 100644
--- a/nova/scheduler/utils.py
+++ b/nova/scheduler/utils.py
@@ -15,12 +15,10 @@
 """Utility methods for scheduling."""
 
 import collections
-import functools
 import re
 import sys
 
 from oslo_log import log as logging
-import oslo_messaging as messaging
 from oslo_serialization import jsonutils
 from six.moves.urllib import parse
 
@@ -936,37 +934,6 @@ def setup_instance_group(context, request_spec):
         request_spec.instance_group.members = group_info.members
 
 
-def retry_on_timeout(retries=1):
-    """Retry the call in case a MessagingTimeout is raised.
-
-    A decorator for retrying calls when a service dies mid-request.
-
-    :param retries: Number of retries
-    :returns: Decorator
-    """
-    def outer(func):
-        @functools.wraps(func)
-        def wrapped(*args, **kwargs):
-            attempt = 0
-            while True:
-                try:
-                    return func(*args, **kwargs)
-                except messaging.MessagingTimeout:
-                    attempt += 1
-                    if attempt <= retries:
-                        LOG.warning(
-                            "Retrying %(name)s after a MessagingTimeout, "
-                            "attempt %(attempt)s of %(retries)s.",
-                            {'attempt': attempt, 'retries': retries,
-                             'name': func.__name__})
-                    else:
-                        raise
-        return wrapped
-    return outer
-
-retry_select_destinations = retry_on_timeout(CONF.scheduler.max_attempts - 1)
-
-
 def request_is_rebuild(spec_obj):
     """Returns True if request is for a rebuild.
 
diff --git a/nova/tests/unit/scheduler/test_client.py b/nova/tests/unit/scheduler/test_client.py
index 5abe014c55..62df712b45 100644
--- a/nova/tests/unit/scheduler/test_client.py
+++ b/nova/tests/unit/scheduler/test_client.py
@@ -58,19 +58,7 @@ class SchedulerClientTestCase(test.NoDBTestCase):
                 False]
         self.assertRaises(messaging.MessagingTimeout,
                           self.client.select_destinations, *fake_args)
-        mock_select_destinations.assert_has_calls([mock.call(*fake_args)] * 2)
-
-    @mock.patch.object(scheduler_query_client.SchedulerQueryClient,
-                       'select_destinations', side_effect=[
-                           messaging.MessagingTimeout(), mock.DEFAULT])
-    def test_select_destinations_timeout_once(self, mock_select_destinations):
-        # scenario: the scheduler service times out & recovers after failure
-        fake_spec = objects.RequestSpec()
-        fake_spec.instance_uuid = uuids.instance
-        fake_args = ['ctxt', fake_spec, [fake_spec.instance_uuid], False,
-                False]
-        self.client.select_destinations(*fake_args)
-        mock_select_destinations.assert_has_calls([mock.call(*fake_args)] * 2)
+        mock_select_destinations.assert_called_once_with(*fake_args)
 
     @mock.patch.object(scheduler_query_client.SchedulerQueryClient,
                        'update_aggregates')
diff --git a/nova/tests/unit/scheduler/test_rpcapi.py b/nova/tests/unit/scheduler/test_rpcapi.py
index d18ad3ba32..3c56946975 100644
--- a/nova/tests/unit/scheduler/test_rpcapi.py
+++ b/nova/tests/unit/scheduler/test_rpcapi.py
@@ -19,12 +19,15 @@ Unit Tests for nova.scheduler.rpcapi
 import mock
 from oslo_utils.fixture import uuidsentinel as uuids
 
+from nova import conf
 from nova import context
 from nova import exception as exc
 from nova import objects
 from nova.scheduler import rpcapi as scheduler_rpcapi
 from nova import test
 
+CONF = conf.CONF
+
 
 class SchedulerRpcAPITestCase(test.NoDBTestCase):
     def _test_scheduler_api(self, method, rpc_method, expected_args=None,
@@ -45,6 +48,11 @@ class SchedulerRpcAPITestCase(test.NoDBTestCase):
             expected_kwargs = expected_args
 
         prepare_kwargs = {}
+        if method == 'select_destinations':
+            prepare_kwargs.update({
+                'call_monitor_timeout': CONF.rpc_response_timeout,
+                'timeout': CONF.long_rpc_timeout
+            })
         if expected_fanout:
             prepare_kwargs['fanout'] = True
         if expected_version:
diff --git a/releasenotes/notes/bug-1795992-long_rpc_timeout-select_destinations-9712e8690160928f.yaml b/releasenotes/notes/bug-1795992-long_rpc_timeout-select_destinations-9712e8690160928f.yaml
new file mode 100644
index 0000000000..2958b223e2
--- /dev/null
+++ b/releasenotes/notes/bug-1795992-long_rpc_timeout-select_destinations-9712e8690160928f.yaml
@@ -0,0 +1,7 @@
+---
+fixes:
+  - |
+    The ``long_rpc_timeout`` configuration option is now used for the RPC
+    call to the scheduler to select a host. This is in order to avoid a
+    timeout when scheduling multiple servers in a single request and/or when
+    the scheduler needs to process a large number of hosts.