From f7c688b8ef88a7390f5b09719a2b3e80368438c0 Mon Sep 17 00:00:00 2001 From: Dan Smith Date: Fri, 17 Nov 2017 12:27:34 -0800 Subject: [PATCH] Refined fix for validating image on rebuild This aims to fix the issue described in bug 1664931 where a rebuild fails to validate the existing host with the scheduler when a new image is provided. The previous attempt to do this could cause rebuilds to fail unnecessarily because we ran _all_ of the filters during a rebuild, which could cause usage/resource filters to prevent an otherwise valid rebuild from succeeding. This aims to classify filters as useful for rebuild or not, and only apply the former during a rebuild scheduler check. We do this by using an internal scheduler hint, indicating our intent. This should (a) filter out all hosts other than the one we're running on and (b) be detectable by the filtering infrastructure as an internally-generated scheduling request in order to trigger the correct filtering behavior. Closes-Bug: #1664931 Change-Id: I1a46ef1503be2febcd20f4594f44344d05525446 --- nova/compute/api.py | 19 ++++++++++++---- nova/scheduler/filters/__init__.py | 22 +++++++++++++++++-- nova/scheduler/filters/affinity_filter.py | 12 ++++++++++ .../aggregate_image_properties_isolation.py | 2 ++ .../filters/aggregate_instance_extra_specs.py | 2 ++ .../aggregate_multitenancy_isolation.py | 2 ++ nova/scheduler/filters/all_hosts_filter.py | 2 ++ .../filters/availability_zone_filter.py | 2 ++ .../filters/compute_capabilities_filter.py | 2 ++ nova/scheduler/filters/compute_filter.py | 2 ++ nova/scheduler/filters/core_filter.py | 2 ++ nova/scheduler/filters/disk_filter.py | 4 ++++ nova/scheduler/filters/exact_core_filter.py | 2 ++ nova/scheduler/filters/exact_disk_filter.py | 2 ++ nova/scheduler/filters/exact_ram_filter.py | 2 ++ nova/scheduler/filters/image_props_filter.py | 2 ++ nova/scheduler/filters/io_ops_filter.py | 2 ++ .../filters/isolated_hosts_filter.py | 2 ++ nova/scheduler/filters/json_filter.py | 3 +++ nova/scheduler/filters/metrics_filter.py | 2 ++ .../scheduler/filters/num_instances_filter.py | 2 ++ .../scheduler/filters/numa_topology_filter.py | 2 ++ .../filters/pci_passthrough_filter.py | 2 ++ nova/scheduler/filters/ram_filter.py | 2 ++ nova/scheduler/filters/retry_filter.py | 4 ++++ nova/scheduler/filters/trusted_filter.py | 2 ++ nova/scheduler/filters/type_filter.py | 4 ++++ nova/scheduler/host_manager.py | 9 ++++++-- nova/scheduler/utils.py | 13 +++++++++++ nova/tests/functional/test_servers.py | 21 ++++++++++++++++++ nova/tests/unit/compute/test_compute_api.py | 10 ++++----- ...lidate-image-rebuild-6d730042438eec10.yaml | 20 +++++++++++++++++ 32 files changed, 168 insertions(+), 13 deletions(-) create mode 100644 releasenotes/notes/bug-1664931-refine-validate-image-rebuild-6d730042438eec10.yaml diff --git a/nova/compute/api.py b/nova/compute/api.py index b9b24b9919..974470c913 100644 --- a/nova/compute/api.py +++ b/nova/compute/api.py @@ -2941,16 +2941,27 @@ class API(base.Base): # through the scheduler again, but we want the instance to be # rebuilt on the same host it's already on. if orig_image_ref != image_href: - request_spec.requested_destination = objects.Destination( - host=instance.host, - node=instance.node) # We have to modify the request spec that goes to the scheduler # to contain the new image. We persist this since we've already # changed the instance.image_ref above so we're being # consistent. request_spec.image = objects.ImageMeta.from_dict(image) request_spec.save() - host = None # This tells conductor to call the scheduler. + if 'scheduler_hints' not in request_spec: + request_spec.scheduler_hints = {} + # Nuke the id on this so we can't accidentally save + # this hint hack later + del request_spec.id + + # NOTE(danms): Passing host=None tells conductor to + # call the scheduler. The _nova_check_type hint + # requires that the scheduler returns only the same + # host that we are currently on and only checks + # rebuild-related filters. + request_spec.scheduler_hints['_nova_check_type'] = ['rebuild'] + request_spec.force_hosts = [instance.host] + request_spec.force_nodes = [instance.node] + host = None except exception.RequestSpecNotFound: # Some old instances can still have no RequestSpec object attached # to them, we need to support the old way diff --git a/nova/scheduler/filters/__init__.py b/nova/scheduler/filters/__init__.py index cbc311cbb3..44f283f7ac 100644 --- a/nova/scheduler/filters/__init__.py +++ b/nova/scheduler/filters/__init__.py @@ -21,9 +21,27 @@ from nova import filters class BaseHostFilter(filters.BaseFilter): """Base class for host filters.""" - def _filter_one(self, obj, filter_properties): + + # This is set to True if this filter should be run for rebuild. + # For example, with rebuild, we need to ask the scheduler if the + # existing host is still legit for a rebuild with the new image and + # other parameters. We care about running policy filters (i.e. + # ImagePropertiesFilter) but not things that check usage on the + # existing compute node, etc. + RUN_ON_REBUILD = False + + def _filter_one(self, obj, spec): """Return True if the object passes the filter, otherwise False.""" - return self.host_passes(obj, filter_properties) + # Do this here so we don't get scheduler.filters.utils + from nova.scheduler import utils + if not self.RUN_ON_REBUILD and utils.request_is_rebuild(spec): + # If we don't filter, default to passing the host. + return True + else: + # We are either a rebuild filter, in which case we always run, + # or this request is not rebuild in which case all filters + # should run. + return self.host_passes(obj, spec) def host_passes(self, host_state, filter_properties): """Return True if the HostState passes the filter, otherwise False. diff --git a/nova/scheduler/filters/affinity_filter.py b/nova/scheduler/filters/affinity_filter.py index 7eb57eb3ac..f8aa47ee03 100644 --- a/nova/scheduler/filters/affinity_filter.py +++ b/nova/scheduler/filters/affinity_filter.py @@ -29,6 +29,8 @@ class DifferentHostFilter(filters.BaseHostFilter): # The hosts the instances are running on doesn't change within a request run_filter_once_per_request = True + RUN_ON_REBUILD = False + def host_passes(self, host_state, spec_obj): affinity_uuids = spec_obj.get_scheduler_hint('different_host') if affinity_uuids: @@ -45,6 +47,8 @@ class SameHostFilter(filters.BaseHostFilter): # The hosts the instances are running on doesn't change within a request run_filter_once_per_request = True + RUN_ON_REBUILD = False + def host_passes(self, host_state, spec_obj): affinity_uuids = spec_obj.get_scheduler_hint('same_host') if affinity_uuids: @@ -59,6 +63,8 @@ class SimpleCIDRAffinityFilter(filters.BaseHostFilter): # The address of a host doesn't change within a request run_filter_once_per_request = True + RUN_ON_REBUILD = False + def host_passes(self, host_state, spec_obj): affinity_cidr = spec_obj.get_scheduler_hint('cidr', '/24') affinity_host_addr = spec_obj.get_scheduler_hint('build_near_host_ip') @@ -77,6 +83,9 @@ class _GroupAntiAffinityFilter(filters.BaseHostFilter): """Schedule the instance on a different host from a set of group hosts. """ + + RUN_ON_REBUILD = False + def host_passes(self, host_state, spec_obj): # Only invoke the filter if 'anti-affinity' is configured policies = (spec_obj.instance_group.policies @@ -110,6 +119,9 @@ class ServerGroupAntiAffinityFilter(_GroupAntiAffinityFilter): class _GroupAffinityFilter(filters.BaseHostFilter): """Schedule the instance on to host from a set of group hosts. """ + + RUN_ON_REBUILD = False + def host_passes(self, host_state, spec_obj): # Only invoke the filter if 'affinity' is configured policies = (spec_obj.instance_group.policies diff --git a/nova/scheduler/filters/aggregate_image_properties_isolation.py b/nova/scheduler/filters/aggregate_image_properties_isolation.py index 600c8746fa..3649d1d1af 100644 --- a/nova/scheduler/filters/aggregate_image_properties_isolation.py +++ b/nova/scheduler/filters/aggregate_image_properties_isolation.py @@ -32,6 +32,8 @@ class AggregateImagePropertiesIsolation(filters.BaseHostFilter): # Aggregate data and instance type does not change within a request run_filter_once_per_request = True + RUN_ON_REBUILD = True + def host_passes(self, host_state, spec_obj): """Checks a host in an aggregate that metadata key/value match with image properties. diff --git a/nova/scheduler/filters/aggregate_instance_extra_specs.py b/nova/scheduler/filters/aggregate_instance_extra_specs.py index e758bb2ae1..d2e2d15207 100644 --- a/nova/scheduler/filters/aggregate_instance_extra_specs.py +++ b/nova/scheduler/filters/aggregate_instance_extra_specs.py @@ -33,6 +33,8 @@ class AggregateInstanceExtraSpecsFilter(filters.BaseHostFilter): # Aggregate data and instance type does not change within a request run_filter_once_per_request = True + RUN_ON_REBUILD = False + def host_passes(self, host_state, spec_obj): """Return a list of hosts that can create instance_type diff --git a/nova/scheduler/filters/aggregate_multitenancy_isolation.py b/nova/scheduler/filters/aggregate_multitenancy_isolation.py index 2e22787414..e74294ad7f 100644 --- a/nova/scheduler/filters/aggregate_multitenancy_isolation.py +++ b/nova/scheduler/filters/aggregate_multitenancy_isolation.py @@ -28,6 +28,8 @@ class AggregateMultiTenancyIsolation(filters.BaseHostFilter): # Aggregate data and tenant do not change within a request run_filter_once_per_request = True + RUN_ON_REBUILD = False + def host_passes(self, host_state, spec_obj): """If a host is in an aggregate that has the metadata key "filter_tenant_id" it can only create instances from that tenant(s). diff --git a/nova/scheduler/filters/all_hosts_filter.py b/nova/scheduler/filters/all_hosts_filter.py index 07df2c05e7..1556d2cfb8 100644 --- a/nova/scheduler/filters/all_hosts_filter.py +++ b/nova/scheduler/filters/all_hosts_filter.py @@ -23,5 +23,7 @@ class AllHostsFilter(filters.BaseHostFilter): # list of hosts doesn't change within a request run_filter_once_per_request = True + RUN_ON_REBUILD = False + def host_passes(self, host_state, spec_obj): return True diff --git a/nova/scheduler/filters/availability_zone_filter.py b/nova/scheduler/filters/availability_zone_filter.py index 58b0e9dd37..f48a9f3571 100644 --- a/nova/scheduler/filters/availability_zone_filter.py +++ b/nova/scheduler/filters/availability_zone_filter.py @@ -35,6 +35,8 @@ class AvailabilityZoneFilter(filters.BaseHostFilter): # Availability zones do not change within a request run_filter_once_per_request = True + RUN_ON_REBUILD = False + def host_passes(self, host_state, spec_obj): availability_zone = spec_obj.availability_zone diff --git a/nova/scheduler/filters/compute_capabilities_filter.py b/nova/scheduler/filters/compute_capabilities_filter.py index 24fd05ebd7..72ac579b8b 100644 --- a/nova/scheduler/filters/compute_capabilities_filter.py +++ b/nova/scheduler/filters/compute_capabilities_filter.py @@ -30,6 +30,8 @@ class ComputeCapabilitiesFilter(filters.BaseHostFilter): # Instance type and host capabilities do not change within a request run_filter_once_per_request = True + RUN_ON_REBUILD = False + def _get_capabilities(self, host_state, scope): cap = host_state for index in range(0, len(scope)): diff --git a/nova/scheduler/filters/compute_filter.py b/nova/scheduler/filters/compute_filter.py index ae124a94d7..259faf4d41 100644 --- a/nova/scheduler/filters/compute_filter.py +++ b/nova/scheduler/filters/compute_filter.py @@ -25,6 +25,8 @@ LOG = logging.getLogger(__name__) class ComputeFilter(filters.BaseHostFilter): """Filter on active Compute nodes.""" + RUN_ON_REBUILD = False + def __init__(self): self.servicegroup_api = servicegroup.API() diff --git a/nova/scheduler/filters/core_filter.py b/nova/scheduler/filters/core_filter.py index 15cd4da5ea..30d816fdcc 100644 --- a/nova/scheduler/filters/core_filter.py +++ b/nova/scheduler/filters/core_filter.py @@ -26,6 +26,8 @@ LOG = logging.getLogger(__name__) class BaseCoreFilter(filters.BaseHostFilter): + RUN_ON_REBUILD = False + def _get_cpu_allocation_ratio(self, host_state, spec_obj): raise NotImplementedError diff --git a/nova/scheduler/filters/disk_filter.py b/nova/scheduler/filters/disk_filter.py index d6323ea59a..1c478d69df 100644 --- a/nova/scheduler/filters/disk_filter.py +++ b/nova/scheduler/filters/disk_filter.py @@ -28,6 +28,8 @@ CONF = nova.conf.CONF class DiskFilter(filters.BaseHostFilter): """Disk Filter with over subscription flag.""" + RUN_ON_REBUILD = False + def _get_disk_allocation_ratio(self, host_state, spec_obj): return host_state.disk_allocation_ratio @@ -80,6 +82,8 @@ class AggregateDiskFilter(DiskFilter): found. """ + RUN_ON_REBUILD = False + def _get_disk_allocation_ratio(self, host_state, spec_obj): aggregate_vals = utils.aggregate_values_from_key( host_state, diff --git a/nova/scheduler/filters/exact_core_filter.py b/nova/scheduler/filters/exact_core_filter.py index f9b20f9970..6f79f25cb7 100644 --- a/nova/scheduler/filters/exact_core_filter.py +++ b/nova/scheduler/filters/exact_core_filter.py @@ -25,6 +25,8 @@ LOG = logging.getLogger(__name__) class ExactCoreFilter(filters.BaseHostFilter): """Exact Core Filter.""" + RUN_ON_REBUILD = False + def __init__(self, *args, **kwargs): super(ExactCoreFilter, self).__init__(*args, **kwargs) LOG.warning('ExactCoreFilter is deprecated in Pike and will be ' diff --git a/nova/scheduler/filters/exact_disk_filter.py b/nova/scheduler/filters/exact_disk_filter.py index e382a70e65..519dd8ca8a 100644 --- a/nova/scheduler/filters/exact_disk_filter.py +++ b/nova/scheduler/filters/exact_disk_filter.py @@ -23,6 +23,8 @@ LOG = logging.getLogger(__name__) class ExactDiskFilter(filters.BaseHostFilter): """Exact Disk Filter.""" + RUN_ON_REBUILD = False + def __init__(self, *args, **kwargs): super(ExactDiskFilter, self).__init__(*args, **kwargs) LOG.warning('ExactDiskFilter is deprecated in Pike and will be ' diff --git a/nova/scheduler/filters/exact_ram_filter.py b/nova/scheduler/filters/exact_ram_filter.py index d09b5d5ef1..123a1e11a8 100644 --- a/nova/scheduler/filters/exact_ram_filter.py +++ b/nova/scheduler/filters/exact_ram_filter.py @@ -23,6 +23,8 @@ LOG = logging.getLogger(__name__) class ExactRamFilter(filters.BaseHostFilter): """Exact RAM Filter.""" + RUN_ON_REBUILD = False + def __init__(self, *args, **kwargs): super(ExactRamFilter, self).__init__(*args, **kwargs) LOG.warning('ExactRamFilter is deprecated in Pike and will be ' diff --git a/nova/scheduler/filters/image_props_filter.py b/nova/scheduler/filters/image_props_filter.py index 1bf241a487..f6977dffb2 100644 --- a/nova/scheduler/filters/image_props_filter.py +++ b/nova/scheduler/filters/image_props_filter.py @@ -35,6 +35,8 @@ class ImagePropertiesFilter(filters.BaseHostFilter): contained in the image dictionary in the request_spec. """ + RUN_ON_REBUILD = True + # Image Properties and Compute Capabilities do not change within # a request run_filter_once_per_request = True diff --git a/nova/scheduler/filters/io_ops_filter.py b/nova/scheduler/filters/io_ops_filter.py index 9b4a16cbb6..0d2190c4eb 100644 --- a/nova/scheduler/filters/io_ops_filter.py +++ b/nova/scheduler/filters/io_ops_filter.py @@ -28,6 +28,8 @@ CONF = nova.conf.CONF class IoOpsFilter(filters.BaseHostFilter): """Filter out hosts with too many concurrent I/O operations.""" + RUN_ON_REBUILD = False + def _get_max_io_ops_per_host(self, host_state, spec_obj): return CONF.filter_scheduler.max_io_ops_per_host diff --git a/nova/scheduler/filters/isolated_hosts_filter.py b/nova/scheduler/filters/isolated_hosts_filter.py index 8b3677aea6..86ab1c4ce9 100644 --- a/nova/scheduler/filters/isolated_hosts_filter.py +++ b/nova/scheduler/filters/isolated_hosts_filter.py @@ -25,6 +25,8 @@ class IsolatedHostsFilter(filters.BaseHostFilter): # The configuration values do not change within a request run_filter_once_per_request = True + RUN_ON_REBUILD = True + def host_passes(self, host_state, spec_obj): """Result Matrix with 'restrict_isolated_hosts_to_isolated_images' set to True:: diff --git a/nova/scheduler/filters/json_filter.py b/nova/scheduler/filters/json_filter.py index 70b96ad9d9..1afabfa33b 100644 --- a/nova/scheduler/filters/json_filter.py +++ b/nova/scheduler/filters/json_filter.py @@ -26,6 +26,9 @@ class JsonFilter(filters.BaseHostFilter): """Host Filter to allow simple JSON-based grammar for selecting hosts. """ + + RUN_ON_REBUILD = False + def _op_compare(self, args, op): """Returns True if the specified operator can successfully compare the first item in the args with all the rest. Will diff --git a/nova/scheduler/filters/metrics_filter.py b/nova/scheduler/filters/metrics_filter.py index 352b6e66c2..9b4d6307f1 100644 --- a/nova/scheduler/filters/metrics_filter.py +++ b/nova/scheduler/filters/metrics_filter.py @@ -32,6 +32,8 @@ class MetricsFilter(filters.BaseHostFilter): these hosts. """ + RUN_ON_REBUILD = False + def __init__(self): super(MetricsFilter, self).__init__() opts = utils.parse_options(CONF.metrics.weight_setting, diff --git a/nova/scheduler/filters/num_instances_filter.py b/nova/scheduler/filters/num_instances_filter.py index 115b31c2be..52946fdbc1 100644 --- a/nova/scheduler/filters/num_instances_filter.py +++ b/nova/scheduler/filters/num_instances_filter.py @@ -28,6 +28,8 @@ CONF = nova.conf.CONF class NumInstancesFilter(filters.BaseHostFilter): """Filter out hosts with too many instances.""" + RUN_ON_REBUILD = False + def _get_max_instances_per_host(self, host_state, spec_obj): return CONF.filter_scheduler.max_instances_per_host diff --git a/nova/scheduler/filters/numa_topology_filter.py b/nova/scheduler/filters/numa_topology_filter.py index 55bb99246f..10079ce2f1 100644 --- a/nova/scheduler/filters/numa_topology_filter.py +++ b/nova/scheduler/filters/numa_topology_filter.py @@ -23,6 +23,8 @@ LOG = logging.getLogger(__name__) class NUMATopologyFilter(filters.BaseHostFilter): """Filter on requested NUMA topology.""" + RUN_ON_REBUILD = True + def _satisfies_cpu_policy(self, host_state, extra_specs, image_props): """Check that the host_state provided satisfies any available CPU policy requirements. diff --git a/nova/scheduler/filters/pci_passthrough_filter.py b/nova/scheduler/filters/pci_passthrough_filter.py index 0db453d532..f08899586a 100644 --- a/nova/scheduler/filters/pci_passthrough_filter.py +++ b/nova/scheduler/filters/pci_passthrough_filter.py @@ -40,6 +40,8 @@ class PciPassthroughFilter(filters.BaseHostFilter): """ + RUN_ON_REBUILD = False + def host_passes(self, host_state, spec_obj): """Return true if the host has the required PCI devices.""" pci_requests = spec_obj.pci_requests diff --git a/nova/scheduler/filters/ram_filter.py b/nova/scheduler/filters/ram_filter.py index ae7e869527..8830b709d2 100644 --- a/nova/scheduler/filters/ram_filter.py +++ b/nova/scheduler/filters/ram_filter.py @@ -25,6 +25,8 @@ LOG = logging.getLogger(__name__) class BaseRamFilter(filters.BaseHostFilter): + RUN_ON_REBUILD = False + def _get_ram_allocation_ratio(self, host_state, spec_obj): raise NotImplementedError diff --git a/nova/scheduler/filters/retry_filter.py b/nova/scheduler/filters/retry_filter.py index 4c2f0a1635..61ac2afefd 100644 --- a/nova/scheduler/filters/retry_filter.py +++ b/nova/scheduler/filters/retry_filter.py @@ -26,6 +26,10 @@ class RetryFilter(filters.BaseHostFilter): purposes """ + # NOTE(danms): This does not affect _where_ an instance lands, so not + # related to rebuild. + RUN_ON_REBUILD = False + def host_passes(self, host_state, spec_obj): """Skip nodes that have already been attempted.""" retry = spec_obj.retry diff --git a/nova/scheduler/filters/trusted_filter.py b/nova/scheduler/filters/trusted_filter.py index 059b6d8030..a0ecd3e95a 100644 --- a/nova/scheduler/filters/trusted_filter.py +++ b/nova/scheduler/filters/trusted_filter.py @@ -230,6 +230,8 @@ class ComputeAttestation(object): class TrustedFilter(filters.BaseHostFilter): """Trusted filter to support Trusted Compute Pools.""" + RUN_ON_REBUILD = False + def __init__(self): self.compute_attestation = ComputeAttestation() msg = _LW('The TrustedFilter is deprecated as it has been marked ' diff --git a/nova/scheduler/filters/type_filter.py b/nova/scheduler/filters/type_filter.py index 7b9a0ad54f..4248d50b61 100644 --- a/nova/scheduler/filters/type_filter.py +++ b/nova/scheduler/filters/type_filter.py @@ -30,6 +30,8 @@ class TypeAffinityFilter(filters.BaseHostFilter): (spread) set to 1 (default). """ + RUN_ON_REBUILD = False + def __init__(self): super(TypeAffinityFilter, self).__init__() LOG.warning('TypeAffinityFilter is deprecated for removal in the ' @@ -64,6 +66,8 @@ class AggregateTypeAffinityFilter(filters.BaseHostFilter): # Aggregate data does not change within a request run_filter_once_per_request = True + RUN_ON_REBUILD = False + def host_passes(self, host_state, spec_obj): instance_type = spec_obj.flavor diff --git a/nova/scheduler/host_manager.py b/nova/scheduler/host_manager.py index 6a154ffc57..e81426d386 100644 --- a/nova/scheduler/host_manager.py +++ b/nova/scheduler/host_manager.py @@ -577,8 +577,13 @@ class HostManager(object): _match_forced_hosts(name_to_cls_map, force_hosts) if force_nodes: _match_forced_nodes(name_to_cls_map, force_nodes) - if force_hosts or force_nodes: - # NOTE(deva): Skip filters when forcing host or node + check_type = ('scheduler_hints' in spec_obj and + spec_obj.scheduler_hints.get('_nova_check_type')) + if not check_type and (force_hosts or force_nodes): + # NOTE(deva,dansmith): Skip filters when forcing host or node + # unless we've declared the internal check type flag, in which + # case we're asking for a specific host and for filtering to + # be done. if name_to_cls_map: return name_to_cls_map.values() else: diff --git a/nova/scheduler/utils.py b/nova/scheduler/utils.py index 7b9b9658af..e48868b616 100644 --- a/nova/scheduler/utils.py +++ b/nova/scheduler/utils.py @@ -736,3 +736,16 @@ def retry_on_timeout(retries=1): return outer retry_select_destinations = retry_on_timeout(CONF.scheduler.max_attempts - 1) + + +def request_is_rebuild(spec_obj): + """Returns True if request is for a rebuild. + + :param spec_obj: An objects.RequestSpec to examine (or None). + """ + if not spec_obj: + return False + if 'scheduler_hints' not in spec_obj: + return False + check_type = spec_obj.scheduler_hints.get('_nova_check_type') + return check_type == ['rebuild'] diff --git a/nova/tests/functional/test_servers.py b/nova/tests/functional/test_servers.py index 6378d59fc9..4d9c740a6d 100644 --- a/nova/tests/functional/test_servers.py +++ b/nova/tests/functional/test_servers.py @@ -1066,6 +1066,16 @@ class ServerRebuildTestCase(integrated_helpers._IntegratedTestBase, # can use to update image metadata via our compute images proxy API. microversion = '2.38' + def _disable_compute_for(self, server): + # Refresh to get its host + server = self.api.get_server(server['id']) + host = server['OS-EXT-SRV-ATTR:host'] + + # Disable the service it is on + self.api_fixture.admin_api.put_service('disable', + {'host': host, + 'binary': 'nova-compute'}) + def test_rebuild_with_image_novalidhost(self): """Creates a server with an image that is valid for the single compute that we have. Then rebuilds the server, passing in an image with @@ -1073,6 +1083,12 @@ class ServerRebuildTestCase(integrated_helpers._IntegratedTestBase, a NoValidHost error. The ImagePropertiesFilter filter is enabled by default so that should filter out the host based on the image meta. """ + + fake.set_nodes(['host2']) + self.addCleanup(fake.restore_nodes) + self.flags(host='host2') + self.compute2 = self.start_service('compute', host='host2') + server_req_body = { 'server': { # We hard-code from a fake image since we can't get images @@ -1087,6 +1103,11 @@ class ServerRebuildTestCase(integrated_helpers._IntegratedTestBase, } server = self.api.post_server(server_req_body) self._wait_for_state_change(self.api, server, 'ACTIVE') + + # Disable the host we're on so ComputeFilter would have ruled it out + # normally + self._disable_compute_for(server) + # Now update the image metadata to be something that won't work with # the fake compute driver we're using since the fake driver has an # "x86_64" architecture. diff --git a/nova/tests/unit/compute/test_compute_api.py b/nova/tests/unit/compute/test_compute_api.py index 2b0001ea1b..edc07430bc 100644 --- a/nova/tests/unit/compute/test_compute_api.py +++ b/nova/tests/unit/compute/test_compute_api.py @@ -3218,6 +3218,7 @@ class _ComputeAPIUnitTestMixIn(object): system_metadata=orig_system_metadata, expected_attrs=['system_metadata'], image_ref=orig_image_href, + node='node', vm_mode=fields_obj.VMMode.HVM) flavor = instance.get_flavor() @@ -3229,7 +3230,7 @@ class _ComputeAPIUnitTestMixIn(object): _get_image.side_effect = get_image bdm_get_by_instance_uuid.return_value = bdms - fake_spec = objects.RequestSpec() + fake_spec = objects.RequestSpec(id=1) req_spec_get_by_inst_uuid.return_value = fake_spec with mock.patch.object(self.compute_api.compute_task_api, @@ -3247,10 +3248,9 @@ class _ComputeAPIUnitTestMixIn(object): # assert the request spec was modified so the scheduler picks # the existing instance host/node req_spec_save.assert_called_once_with() - self.assertIn('requested_destination', fake_spec) - requested_destination = fake_spec.requested_destination - self.assertEqual(instance.host, requested_destination.host) - self.assertEqual(instance.node, requested_destination.node) + self.assertIn('_nova_check_type', fake_spec.scheduler_hints) + self.assertEqual('rebuild', + fake_spec.scheduler_hints['_nova_check_type'][0]) _check_auto_disk_config.assert_called_once_with(image=new_image) _checks_for_create_and_rebuild.assert_called_once_with(self.context, diff --git a/releasenotes/notes/bug-1664931-refine-validate-image-rebuild-6d730042438eec10.yaml b/releasenotes/notes/bug-1664931-refine-validate-image-rebuild-6d730042438eec10.yaml new file mode 100644 index 0000000000..1c9bc6113f --- /dev/null +++ b/releasenotes/notes/bug-1664931-refine-validate-image-rebuild-6d730042438eec10.yaml @@ -0,0 +1,20 @@ +--- +fixes: + - | + The fix for `OSSA-2017-005`_ (CVE-2017-16239) was too far-reaching in that + rebuilds can now fail based on scheduling filters that should not apply + to rebuild. For example, a rebuild of an instance on a disabled compute + host could fail whereas it would not before the fix for CVE-2017-16239. + Similarly, rebuilding an instance on a host that is at capacity for vcpu, + memory or disk could fail since the scheduler filters would treat it as a + new build request even though the rebuild is not claiming *new* resources. + + Therefore this release contains a fix for those regressions in scheduling + behavior on rebuild while maintaining the original fix for CVE-2017-16239. + + .. note:: The fix relies on a ``RUN_ON_REBUILD`` variable which is checked + for all scheduler filters during a rebuild. The reasoning behind + the value for that variable depends on each filter. If you have + out-of-tree scheduler filters, you will likely need to assess + whether or not they need to override the default value (False) + for the new variable.