diff --git a/nova/conf/compute.py b/nova/conf/compute.py index 3d0db555f5..c47cb86dbd 100644 --- a/nova/conf/compute.py +++ b/nova/conf/compute.py @@ -781,6 +781,20 @@ Possible values: Related options: * ``shutdown_timeout`` +"""), + cfg.IntOpt('sharing_providers_max_uuids_per_request', + default=200, + min=1, + help=""" +Maximum number of aggregate UUIDs per API request. The default is 200. + +In deployments with a large number of aggregates, a 'Request-Too-Long' +error may be raised by the web server or load balancer. This value +allows setting the batch size to limit the query length. + +Possible values: + +* Any positive integer. """), cfg.IntOpt('resource_provider_association_refresh', default=300, diff --git a/nova/scheduler/client/report.py b/nova/scheduler/client/report.py index 7c14f3d7ef..ec1edbc637 100644 --- a/nova/scheduler/client/report.py +++ b/nova/scheduler/client/report.py @@ -497,25 +497,39 @@ class SchedulerReportClient(object): if not agg_uuids: return [] - aggs = ','.join(agg_uuids) - url = "/resource_providers?member_of=in:%s&required=%s" % ( - aggs, os_traits.MISC_SHARES_VIA_AGGREGATE) - resp = self.get(url, version='1.18', - global_request_id=context.global_id) - if resp.status_code == 200: - return resp.json()['resource_providers'] + maxuuids = CONF.compute.sharing_providers_max_uuids_per_request - msg = _("[%(placement_req_id)s] Failed to retrieve sharing resource " - "providers associated with the following aggregates from " - "placement API: %(aggs)s. Got %(status_code)d: %(err_text)s.") - args = { - 'aggs': aggs, - 'status_code': resp.status_code, - 'err_text': resp.text, - 'placement_req_id': get_placement_request_id(resp), - } - LOG.error(msg, args) - raise exception.ResourceProviderRetrievalFailed(message=msg % args) + agg_uuids = list(agg_uuids) + resource_providers = {} + for i in range(0, len(agg_uuids), maxuuids): + aggs = ','.join(agg_uuids[i:i + maxuuids]) + url = "/resource_providers?member_of=in:%s&required=%s" % ( + aggs, os_traits.MISC_SHARES_VIA_AGGREGATE) + resp = self.get(url, version='1.18', + global_request_id=context.global_id) + if resp.status_code == 200: + # We want to ensure that an RP on different aggregate + # will not be duplicated. + for rp in resp.json()['resource_providers']: + if not rp['uuid'] in resource_providers: + resource_providers[rp['uuid']] = rp + else: + msg = _("[%(placement_req_id)s] %(iquery)s/%(isize)s Failed " + "to retrieve sharing resource providers associated " + "with the following aggregates from placement API: " + "%(aggs)s. Got %(status_code)d: %(err_text)s.") + args = { + 'aggs': aggs, + 'status_code': resp.status_code, + 'err_text': resp.text, + 'placement_req_id': get_placement_request_id(resp), + 'iquery': i + 1, + 'isize': len(agg_uuids) + } + LOG.error(msg, args) + raise exception.ResourceProviderRetrievalFailed( + message=msg % args) + return list(resource_providers.values()) def get_providers_in_tree(self, context, uuid): """Queries the placement API for a list of the resource providers in diff --git a/nova/tests/unit/scheduler/client/test_report.py b/nova/tests/unit/scheduler/client/test_report.py index 40ebac9af9..ed43b7843d 100644 --- a/nova/tests/unit/scheduler/client/test_report.py +++ b/nova/tests/unit/scheduler/client/test_report.py @@ -2335,8 +2335,11 @@ class TestProviderOperations(SchedulerReportClientTestCase): logging_mock.call_args[0][1]['placement_req_id']) def test_get_sharing_providers(self): + self.flags( + sharing_providers_max_uuids_per_request=3, group='compute') + resp_mock = mock.Mock(status_code=200) - rpjson = [ + rpjson1 = [ { 'uuid': uuids.sharing1, 'name': 'bandwidth_provider', @@ -2353,20 +2356,54 @@ class TestProviderOperations(SchedulerReportClientTestCase): 'root_provider_uuid': None, 'links': [], }, + { + 'uuid': uuids.sharing3, + 'name': 'storage_provider', + 'generation': 42, + 'parent_provider_uuid': None, + 'root_provider_uuid': None, + 'links': [], + } + ] + rpjson2 = [ + { + 'uuid': uuids.sharing4, + 'name': 'storage_provider', + 'generation': 42, + 'parent_provider_uuid': None, + 'root_provider_uuid': None, + 'links': [], + }, + ] + resp_mock.json.side_effect = [ + {'resource_providers': rpjson1}, + {'resource_providers': rpjson2} ] - resp_mock.json.return_value = {'resource_providers': rpjson} self.ks_adap_mock.get.return_value = resp_mock - result = self.client._get_sharing_providers( - self.context, [uuids.agg1, uuids.agg2]) + self.context, [uuids.agg1, uuids.agg2, + uuids.agg3, uuids.agg4]) - expected_url = ('/resource_providers?member_of=in:' + - ','.join((uuids.agg1, uuids.agg2)) + - '&required=MISC_SHARES_VIA_AGGREGATE') - self.ks_adap_mock.get.assert_called_once_with( - expected_url, microversion='1.18', - global_request_id=self.context.global_id) - self.assertEqual(rpjson, result) + self.ks_adap_mock.get.assert_has_calls( + [ + # Asserting first request with 3 uuids + mock.call( + '/resource_providers?member_of=in:' + + ','.join((uuids.agg1, uuids.agg2, uuids.agg3)) + + '&required=MISC_SHARES_VIA_AGGREGATE', + microversion='1.18', + global_request_id=self.context.global_id), + mock.call().json(), + # Asserting second request with 1 uuid + mock.call( + '/resource_providers?member_of=in:' + + uuids.agg4 + + '&required=MISC_SHARES_VIA_AGGREGATE', + microversion='1.18', + global_request_id=self.context.global_id), + mock.call().json(), + ]) + self.assertEqual(rpjson1 + rpjson2, result) def test_get_sharing_providers_emptylist(self): self.assertEqual( diff --git a/releasenotes/notes/sharing-providers-max-uuids-per-request-f05da20c697e0922.yaml b/releasenotes/notes/sharing-providers-max-uuids-per-request-f05da20c697e0922.yaml new file mode 100644 index 0000000000..8cf31ecbfc --- /dev/null +++ b/releasenotes/notes/sharing-providers-max-uuids-per-request-f05da20c697e0922.yaml @@ -0,0 +1,7 @@ +--- +fixes: + - | + Introduced a new compute configuration option + `sharing_providers_max_uuids_per_request` and applied a fix to + handle the "Request-Too-Long" error that can occur when querying + the placement API with a large number of aggregate UUIDs.