Merge "Handle rebuild of instances with image traits"

This commit is contained in:
Zuul
2018-07-13 00:43:11 +00:00
committed by Gerrit Code Review
3 changed files with 383 additions and 0 deletions
+74
View File
@@ -941,6 +941,12 @@ class ComputeTaskManager(base.Base):
# is not forced to be the original host
request_spec.reset_forced_destinations()
try:
# if this is a rebuild of instance on the same host with
# new image.
if not recreate and orig_image_ref != image_ref:
self._validate_image_traits_for_rebuild(context,
instance,
image_ref)
request_spec.ensure_project_and_user_id(instance)
host_lists = self._schedule_instances(context,
request_spec, [instance.uuid],
@@ -989,6 +995,74 @@ class ComputeTaskManager(base.Base):
host=host, node=node, limits=limits,
request_spec=request_spec)
def _validate_image_traits_for_rebuild(self, context, instance, image_ref):
"""Validates that the traits specified in the image can be satisfied
by the providers of the current allocations for the instance during
rebuild of the instance. If the traits cannot be
satisfied, fails the action by raising a NoValidHost exception.
:raises: NoValidHost exception in case the traits on the providers
of the allocated resources for the instance do not match
the required traits on the image.
"""
image_meta = objects.ImageMeta.from_image_ref(
context, self.image_api, image_ref)
if ('properties' not in image_meta or
'traits_required' not in image_meta.properties or not
image_meta.properties.traits_required):
return
image_traits = set(image_meta.properties.traits_required)
# check any of the image traits are forbidden in flavor traits.
# if so raise an exception
extra_specs = instance.flavor.extra_specs
forbidden_flavor_traits = set()
for key, val in extra_specs.items():
if key.startswith('trait'):
# get the actual key.
prefix, parsed_key = key.split(':', 1)
if val == 'forbidden':
forbidden_flavor_traits.add(parsed_key)
forbidden_traits = image_traits & forbidden_flavor_traits
if forbidden_traits:
raise exception.NoValidHost(
reason=_("Image traits are part of forbidden "
"traits in flavor associated with the Image. "
"Please relaunch the instance."))
return
# If image traits are present, then validate against allocations.
allocations = self.report_client.get_allocations_for_consumer(
context, instance.uuid)
instance_rp_uuids = list(allocations)
# Get provider tree for the instance. We use the uuid of the host
# on which the instance is rebuilding to get the provider tree.
compute_node = objects.ComputeNode.get_by_host_and_nodename(
context, instance.host, instance.node)
# TODO(karimull): Call with a read-only version, when available.
instance_rp_tree = (
self.report_client.get_provider_tree_and_ensure_root(
context, compute_node.uuid))
traits_in_instance_rps = set()
for rp_uuid in instance_rp_uuids:
traits_in_instance_rps.update(
instance_rp_tree.data(rp_uuid).traits)
missing_traits = image_traits - traits_in_instance_rps
if missing_traits:
raise exception.NoValidHost(
reason=_("Image traits cannot be "
"satisfied by the current resource providers. "
"Please relaunch the instance."))
# TODO(avolkov): move method to bdm
@staticmethod
def _volume_size(instance_type, bdm):
@@ -15,8 +15,10 @@
import fixtures
from nova.compute import instance_actions
from nova import conf
from nova.tests.functional import integrated_helpers
import nova.tests.unit.image.fake
from nova.tests.unit.virt.libvirt import fakelibvirt
from nova.tests import uuidsentinel as uuids
@@ -100,3 +102,137 @@ class SharedStorageProviderUsageTestCase(
self.assertEqual({'DISK_GB': 1}, shared_rp_usages)
self.assertEqual({'MEMORY_MB': 512, 'VCPU': 1},
cn_rp_usages)
def create_shared_storage_rp(self):
# shared storage resource provider
shared_RP = self._post_resource_provider(
rp_name='shared_resource_provider1')
# created inventory for shared storage RP
inv = {"resource_class": "DISK_GB",
"total": 78, "reserved": 0, "min_unit": 1, "max_unit": 78,
"step_size": 1, "allocation_ratio": 1.0}
self._set_inventory(shared_RP['uuid'], inv)
# Added traits to shared storage resource provider
self._set_provider_traits(shared_RP['uuid'],
['MISC_SHARES_VIA_AGGREGATE',
'STORAGE_DISK_SSD'])
return shared_RP['uuid']
def test_rebuild_instance_with_image_traits_on_shared_rp(self):
shared_rp_uuid = self.create_shared_storage_rp()
# add both cn_rp and shared_rp under one aggregate
self._set_aggregate(shared_rp_uuid, uuids.shr_disk_agg)
self._set_aggregate(self.host_uuid, uuids.shr_disk_agg)
self.assertIn("DISK_GB",
self._get_provider_inventory(self.host_uuid))
# run update_available_resource periodic task after configuring shared
# resource provider to update compute node resources
self._run_periodics()
# we expect that the virt driver stops reporting DISK_GB on the compute
# RP as soon as a shared RP with DISK_GB is created in the compute tree
self.assertNotIn("DISK_GB",
self._get_provider_inventory(self.host_uuid))
server_req_body = {
'server': {
'imageRef': '155d900f-4e14-4e4c-a73d-069cbf4541e6',
'flavorRef': '1',
'name': 'test_shared_storage_rp_configuration_with_cn_rp',
'networks': 'none'
}
}
# create server
server = self.api.post_server(server_req_body)
self._wait_for_state_change(self.api, server, 'ACTIVE')
rebuild_image_ref = (
nova.tests.unit.image.fake.AUTO_DISK_CONFIG_ENABLED_IMAGE_UUID)
with nova.utils.temporary_mutation(self.api, microversion='2.35'):
self.api.api_put('/images/%s/metadata' % rebuild_image_ref,
{'metadata': {
'trait:STORAGE_DISK_SSD': 'required'}})
rebuild_req_body = {
'rebuild': {
'imageRef': rebuild_image_ref
}
}
self.api.api_post('/servers/%s/action' % server['id'],
rebuild_req_body)
self._wait_for_server_parameter(
self.api, server, {'OS-EXT-STS:task_state': None})
# get shared_rp and cn_rp usages
shared_rp_usages = self._get_provider_usages(shared_rp_uuid)
cn_rp_usages = self._get_provider_usages(self.host_uuid)
# Check if DISK_GB resource is allocated from shared_RP and the
# remaining resources are allocated from host_uuid.
self.assertEqual({'DISK_GB': 1}, shared_rp_usages)
self.assertEqual({'MEMORY_MB': 512, 'VCPU': 1},
cn_rp_usages)
allocs = self._get_allocations_by_server_uuid(server['id'])
self.assertIn(self.host_uuid, allocs)
server = self.api.get_server(server['id'])
self.assertEqual(rebuild_image_ref, server['image']['id'])
def test_rebuild_instance_with_image_traits_on_shared_rp_no_valid_host(
self):
shared_rp_uuid = self.create_shared_storage_rp()
# add both cn_rp and shared_rp under one aggregate
self._set_aggregate(shared_rp_uuid, uuids.shr_disk_agg)
self._set_aggregate(self.host_uuid, uuids.shr_disk_agg)
self.assertIn("DISK_GB",
self._get_provider_inventory(self.host_uuid))
# run update_available_resource periodic task after configuring shared
# resource provider to update compute node resources
self._run_periodics()
# we expect that the virt driver stops reporting DISK_GB on the compute
# RP as soon as a shared RP with DISK_GB is created in the compute tree
self.assertNotIn("DISK_GB",
self._get_provider_inventory(self.host_uuid))
org_image_id = '155d900f-4e14-4e4c-a73d-069cbf4541e6'
server_req_body = {
'server': {
'imageRef': org_image_id,
'flavorRef': '1',
'name': 'test_shared_storage_rp_configuration_with_cn_rp',
'networks': 'none'
}
}
# create server
server = self.api.post_server(server_req_body)
self._wait_for_state_change(self.api, server, 'ACTIVE')
rebuild_image_ref = (
nova.tests.unit.image.fake.AUTO_DISK_CONFIG_ENABLED_IMAGE_UUID)
with nova.utils.temporary_mutation(self.api, microversion='2.35'):
self.api.api_put('/images/%s/metadata' % rebuild_image_ref,
{'metadata': {
'trait:CUSTOM_FOO': 'required'}})
rebuild_req_body = {
'rebuild': {
'imageRef': rebuild_image_ref
}
}
self.api.api_post('/servers/%s/action' % server['id'],
rebuild_req_body)
# Look for the failed rebuild action.
self._wait_for_action_fail_completion(
server, instance_actions.REBUILD, 'rebuild_server', self.admin_api)
# Assert the server image_ref was rolled back on failure.
server = self.api.get_server(server['id'])
self.assertEqual(org_image_id, server['image']['id'])
# The server should be in ERROR state
self.assertEqual('ERROR', server['status'])
self.assertIn('No valid host', server['fault']['message'])
+173
View File
@@ -3595,6 +3595,10 @@ class TraitsBasedSchedulingTest(integrated_helpers.ProviderUsageBaseTestCase):
self.flavor_with_trait['id'],
{'extra_specs': {'trait:HW_CPU_X86_VMX': 'required'}})
self.flavor_without_trait = flavors[1]
self.flavor_with_forbidden_trait = flavors[2]
self.admin_api.post_extra_spec(
self.flavor_with_forbidden_trait['id'],
{'extra_specs': {'trait:HW_CPU_X86_SGX': 'forbidden'}})
# Note that we're using v2.35 explicitly as the api returns 404
# starting with 2.36
@@ -3822,6 +3826,175 @@ class TraitsBasedSchedulingTest(integrated_helpers.ProviderUsageBaseTestCase):
self.assertIn('fault', server)
self.assertIn('No valid host', server['fault']['message'])
def test_rebuild_instance_with_image_traits(self):
"""Rebuilds a server with a different image which has traits
associated with it and which will run it through the scheduler to
validate the image is still OK with the compute host that the
instance is running on.
"""
# Decorate compute2 resource provider with both flavor and image trait.
rp_uuid = self._get_provider_uuid_by_host(self.compute2.host)
self._set_provider_traits(rp_uuid, ['HW_CPU_X86_VMX',
'HW_CPU_X86_SGX'])
# make sure we start with no usage on the compute node
rp_usages = self._get_provider_usages(rp_uuid)
self.assertEqual({'VCPU': 0, 'MEMORY_MB': 0, 'DISK_GB': 0}, rp_usages)
# create a server without traits on image and with traits on flavour
server = self._create_server_with_traits(
self.flavor_with_trait['id'], self.image_id_without_trait)
server = self._wait_for_state_change(self.admin_api, server, 'ACTIVE')
# make the compute node full and ensure rebuild still succeed
inv = {"resource_class": "VCPU",
"total": 1}
self._set_inventory(rp_uuid, inv)
# Now rebuild the server with a different image with traits
rebuild_req_body = {
'rebuild': {
'imageRef': self.image_id_with_trait
}
}
self.api.api_post('/servers/%s/action' % server['id'],
rebuild_req_body)
self._wait_for_server_parameter(
self.api, server, {'OS-EXT-STS:task_state': None})
allocs = self._get_allocations_by_server_uuid(server['id'])
self.assertIn(rp_uuid, allocs)
# Assert the server ended up on the expected compute host that has
# the required trait.
self.assertEqual(self.compute2.host, server['OS-EXT-SRV-ATTR:host'])
def test_rebuild_instance_with_image_traits_no_host(self):
"""Rebuilding a server with a different image which has required
traits on the image fails to valid the host that this server is
currently running, cause the compute host resource provider is not
associated with similar trait.
"""
# Decorate compute2 resource provider with traits on flavor
rp_uuid = self._get_provider_uuid_by_host(self.compute2.host)
self._set_provider_traits(rp_uuid, ['HW_CPU_X86_VMX'])
# make sure we start with no usage on the compute node
rp_usages = self._get_provider_usages(rp_uuid)
self.assertEqual({'VCPU': 0, 'MEMORY_MB': 0, 'DISK_GB': 0}, rp_usages)
# create a server without traits on image and with traits on flavour
server = self._create_server_with_traits(
self.flavor_with_trait['id'], self.image_id_without_trait)
server = self._wait_for_state_change(self.admin_api, server, 'ACTIVE')
# Now rebuild the server with a different image with traits
rebuild_req_body = {
'rebuild': {
'imageRef': self.image_id_with_trait
}
}
self.api.api_post('/servers/%s/action' % server['id'],
rebuild_req_body)
# Look for the failed rebuild action.
self._wait_for_action_fail_completion(
server, instance_actions.REBUILD, 'rebuild_server', self.admin_api)
# Assert the server image_ref was rolled back on failure.
server = self.api.get_server(server['id'])
self.assertEqual(self.image_id_without_trait, server['image']['id'])
# The server should be in ERROR state
self.assertEqual('ERROR', server['status'])
self.assertEqual("No valid host was found. Image traits cannot be "
"satisfied by the current resource providers. "
"Please relaunch the instance.",
server['fault']['message'])
def test_rebuild_instance_with_image_traits_no_image_change(self):
"""Rebuilds a server with a same image which has traits
associated with it and which will run it through the scheduler to
validate the image is still OK with the compute host that the
instance is running on.
"""
# Decorate compute2 resource provider with both flavor and image trait.
rp_uuid = self._get_provider_uuid_by_host(self.compute2.host)
self._set_provider_traits(rp_uuid, ['HW_CPU_X86_VMX',
'HW_CPU_X86_SGX'])
# make sure we start with no usage on the compute node
rp_usages = self._get_provider_usages(rp_uuid)
self.assertEqual({'VCPU': 0, 'MEMORY_MB': 0, 'DISK_GB': 0},
rp_usages)
# create a server with traits in both image and flavour
server = self._create_server_with_traits(
self.flavor_with_trait['id'], self.image_id_with_trait)
server = self._wait_for_state_change(self.admin_api, server,
'ACTIVE')
# Now rebuild the server with a different image with traits
rebuild_req_body = {
'rebuild': {
'imageRef': self.image_id_with_trait
}
}
self.api.api_post('/servers/%s/action' % server['id'],
rebuild_req_body)
self._wait_for_server_parameter(
self.api, server, {'OS-EXT-STS:task_state': None})
allocs = self._get_allocations_by_server_uuid(server['id'])
self.assertIn(rp_uuid, allocs)
# Assert the server ended up on the expected compute host that has
# the required trait.
self.assertEqual(self.compute2.host,
server['OS-EXT-SRV-ATTR:host'])
def test_rebuild_instance_with_image_traits_and_forbidden_flavor_traits(
self):
"""Rebuilding a server with a different image which has required
traits on the image fails to validate image traits because flavor
associated with the current instance has the similar triat that is
forbidden
"""
# Decorate compute2 resource provider with traits on flavor
rp_uuid = self._get_provider_uuid_by_host(self.compute2.host)
self._set_provider_traits(rp_uuid, ['HW_CPU_X86_VMX'])
# make sure we start with no usage on the compute node
rp_usages = self._get_provider_usages(rp_uuid)
self.assertEqual({'VCPU': 0, 'MEMORY_MB': 0, 'DISK_GB': 0}, rp_usages)
# create a server with forbidden traits on flavor and no triats on
# image
server = self._create_server_with_traits(
self.flavor_with_forbidden_trait['id'],
self.image_id_without_trait)
server = self._wait_for_state_change(self.admin_api, server, 'ACTIVE')
# Now rebuild the server with a different image with traits
rebuild_req_body = {
'rebuild': {
'imageRef': self.image_id_with_trait
}
}
self.api.api_post('/servers/%s/action' % server['id'],
rebuild_req_body)
# Look for the failed rebuild action.
self._wait_for_action_fail_completion(
server, instance_actions.REBUILD, 'rebuild_server', self.admin_api)
# Assert the server image_ref was rolled back on failure.
server = self.api.get_server(server['id'])
self.assertEqual(self.image_id_without_trait, server['image']['id'])
# The server should be in ERROR state
self.assertEqual('ERROR', server['status'])
self.assertEqual("No valid host was found. Image traits are part of "
"forbidden traits in flavor associated with the "
"Image. Please relaunch the instance.",
server['fault']['message'])
class ServerTestV256Common(ServersTestBase):
api_major_version = 'v2.1'