Merge "Handle rebuild of instances with image traits"
This commit is contained in:
@@ -941,6 +941,12 @@ class ComputeTaskManager(base.Base):
|
||||
# is not forced to be the original host
|
||||
request_spec.reset_forced_destinations()
|
||||
try:
|
||||
# if this is a rebuild of instance on the same host with
|
||||
# new image.
|
||||
if not recreate and orig_image_ref != image_ref:
|
||||
self._validate_image_traits_for_rebuild(context,
|
||||
instance,
|
||||
image_ref)
|
||||
request_spec.ensure_project_and_user_id(instance)
|
||||
host_lists = self._schedule_instances(context,
|
||||
request_spec, [instance.uuid],
|
||||
@@ -989,6 +995,74 @@ class ComputeTaskManager(base.Base):
|
||||
host=host, node=node, limits=limits,
|
||||
request_spec=request_spec)
|
||||
|
||||
def _validate_image_traits_for_rebuild(self, context, instance, image_ref):
|
||||
"""Validates that the traits specified in the image can be satisfied
|
||||
by the providers of the current allocations for the instance during
|
||||
rebuild of the instance. If the traits cannot be
|
||||
satisfied, fails the action by raising a NoValidHost exception.
|
||||
|
||||
:raises: NoValidHost exception in case the traits on the providers
|
||||
of the allocated resources for the instance do not match
|
||||
the required traits on the image.
|
||||
"""
|
||||
image_meta = objects.ImageMeta.from_image_ref(
|
||||
context, self.image_api, image_ref)
|
||||
if ('properties' not in image_meta or
|
||||
'traits_required' not in image_meta.properties or not
|
||||
image_meta.properties.traits_required):
|
||||
return
|
||||
|
||||
image_traits = set(image_meta.properties.traits_required)
|
||||
|
||||
# check any of the image traits are forbidden in flavor traits.
|
||||
# if so raise an exception
|
||||
extra_specs = instance.flavor.extra_specs
|
||||
forbidden_flavor_traits = set()
|
||||
for key, val in extra_specs.items():
|
||||
if key.startswith('trait'):
|
||||
# get the actual key.
|
||||
prefix, parsed_key = key.split(':', 1)
|
||||
if val == 'forbidden':
|
||||
forbidden_flavor_traits.add(parsed_key)
|
||||
|
||||
forbidden_traits = image_traits & forbidden_flavor_traits
|
||||
|
||||
if forbidden_traits:
|
||||
raise exception.NoValidHost(
|
||||
reason=_("Image traits are part of forbidden "
|
||||
"traits in flavor associated with the Image. "
|
||||
"Please relaunch the instance."))
|
||||
return
|
||||
|
||||
# If image traits are present, then validate against allocations.
|
||||
allocations = self.report_client.get_allocations_for_consumer(
|
||||
context, instance.uuid)
|
||||
instance_rp_uuids = list(allocations)
|
||||
|
||||
# Get provider tree for the instance. We use the uuid of the host
|
||||
# on which the instance is rebuilding to get the provider tree.
|
||||
compute_node = objects.ComputeNode.get_by_host_and_nodename(
|
||||
context, instance.host, instance.node)
|
||||
|
||||
# TODO(karimull): Call with a read-only version, when available.
|
||||
instance_rp_tree = (
|
||||
self.report_client.get_provider_tree_and_ensure_root(
|
||||
context, compute_node.uuid))
|
||||
|
||||
traits_in_instance_rps = set()
|
||||
|
||||
for rp_uuid in instance_rp_uuids:
|
||||
traits_in_instance_rps.update(
|
||||
instance_rp_tree.data(rp_uuid).traits)
|
||||
|
||||
missing_traits = image_traits - traits_in_instance_rps
|
||||
|
||||
if missing_traits:
|
||||
raise exception.NoValidHost(
|
||||
reason=_("Image traits cannot be "
|
||||
"satisfied by the current resource providers. "
|
||||
"Please relaunch the instance."))
|
||||
|
||||
# TODO(avolkov): move method to bdm
|
||||
@staticmethod
|
||||
def _volume_size(instance_type, bdm):
|
||||
|
||||
@@ -15,8 +15,10 @@
|
||||
|
||||
import fixtures
|
||||
|
||||
from nova.compute import instance_actions
|
||||
from nova import conf
|
||||
from nova.tests.functional import integrated_helpers
|
||||
import nova.tests.unit.image.fake
|
||||
from nova.tests.unit.virt.libvirt import fakelibvirt
|
||||
from nova.tests import uuidsentinel as uuids
|
||||
|
||||
@@ -100,3 +102,137 @@ class SharedStorageProviderUsageTestCase(
|
||||
self.assertEqual({'DISK_GB': 1}, shared_rp_usages)
|
||||
self.assertEqual({'MEMORY_MB': 512, 'VCPU': 1},
|
||||
cn_rp_usages)
|
||||
|
||||
def create_shared_storage_rp(self):
|
||||
# shared storage resource provider
|
||||
shared_RP = self._post_resource_provider(
|
||||
rp_name='shared_resource_provider1')
|
||||
|
||||
# created inventory for shared storage RP
|
||||
inv = {"resource_class": "DISK_GB",
|
||||
"total": 78, "reserved": 0, "min_unit": 1, "max_unit": 78,
|
||||
"step_size": 1, "allocation_ratio": 1.0}
|
||||
self._set_inventory(shared_RP['uuid'], inv)
|
||||
|
||||
# Added traits to shared storage resource provider
|
||||
self._set_provider_traits(shared_RP['uuid'],
|
||||
['MISC_SHARES_VIA_AGGREGATE',
|
||||
'STORAGE_DISK_SSD'])
|
||||
return shared_RP['uuid']
|
||||
|
||||
def test_rebuild_instance_with_image_traits_on_shared_rp(self):
|
||||
|
||||
shared_rp_uuid = self.create_shared_storage_rp()
|
||||
# add both cn_rp and shared_rp under one aggregate
|
||||
self._set_aggregate(shared_rp_uuid, uuids.shr_disk_agg)
|
||||
self._set_aggregate(self.host_uuid, uuids.shr_disk_agg)
|
||||
|
||||
self.assertIn("DISK_GB",
|
||||
self._get_provider_inventory(self.host_uuid))
|
||||
|
||||
# run update_available_resource periodic task after configuring shared
|
||||
# resource provider to update compute node resources
|
||||
self._run_periodics()
|
||||
|
||||
# we expect that the virt driver stops reporting DISK_GB on the compute
|
||||
# RP as soon as a shared RP with DISK_GB is created in the compute tree
|
||||
self.assertNotIn("DISK_GB",
|
||||
self._get_provider_inventory(self.host_uuid))
|
||||
|
||||
server_req_body = {
|
||||
'server': {
|
||||
'imageRef': '155d900f-4e14-4e4c-a73d-069cbf4541e6',
|
||||
'flavorRef': '1',
|
||||
'name': 'test_shared_storage_rp_configuration_with_cn_rp',
|
||||
'networks': 'none'
|
||||
}
|
||||
}
|
||||
# create server
|
||||
server = self.api.post_server(server_req_body)
|
||||
self._wait_for_state_change(self.api, server, 'ACTIVE')
|
||||
|
||||
rebuild_image_ref = (
|
||||
nova.tests.unit.image.fake.AUTO_DISK_CONFIG_ENABLED_IMAGE_UUID)
|
||||
|
||||
with nova.utils.temporary_mutation(self.api, microversion='2.35'):
|
||||
self.api.api_put('/images/%s/metadata' % rebuild_image_ref,
|
||||
{'metadata': {
|
||||
'trait:STORAGE_DISK_SSD': 'required'}})
|
||||
rebuild_req_body = {
|
||||
'rebuild': {
|
||||
'imageRef': rebuild_image_ref
|
||||
}
|
||||
}
|
||||
self.api.api_post('/servers/%s/action' % server['id'],
|
||||
rebuild_req_body)
|
||||
self._wait_for_server_parameter(
|
||||
self.api, server, {'OS-EXT-STS:task_state': None})
|
||||
|
||||
# get shared_rp and cn_rp usages
|
||||
shared_rp_usages = self._get_provider_usages(shared_rp_uuid)
|
||||
cn_rp_usages = self._get_provider_usages(self.host_uuid)
|
||||
# Check if DISK_GB resource is allocated from shared_RP and the
|
||||
# remaining resources are allocated from host_uuid.
|
||||
self.assertEqual({'DISK_GB': 1}, shared_rp_usages)
|
||||
self.assertEqual({'MEMORY_MB': 512, 'VCPU': 1},
|
||||
cn_rp_usages)
|
||||
allocs = self._get_allocations_by_server_uuid(server['id'])
|
||||
self.assertIn(self.host_uuid, allocs)
|
||||
server = self.api.get_server(server['id'])
|
||||
self.assertEqual(rebuild_image_ref, server['image']['id'])
|
||||
|
||||
def test_rebuild_instance_with_image_traits_on_shared_rp_no_valid_host(
|
||||
self):
|
||||
shared_rp_uuid = self.create_shared_storage_rp()
|
||||
# add both cn_rp and shared_rp under one aggregate
|
||||
self._set_aggregate(shared_rp_uuid, uuids.shr_disk_agg)
|
||||
self._set_aggregate(self.host_uuid, uuids.shr_disk_agg)
|
||||
|
||||
self.assertIn("DISK_GB",
|
||||
self._get_provider_inventory(self.host_uuid))
|
||||
|
||||
# run update_available_resource periodic task after configuring shared
|
||||
# resource provider to update compute node resources
|
||||
self._run_periodics()
|
||||
|
||||
# we expect that the virt driver stops reporting DISK_GB on the compute
|
||||
# RP as soon as a shared RP with DISK_GB is created in the compute tree
|
||||
self.assertNotIn("DISK_GB",
|
||||
self._get_provider_inventory(self.host_uuid))
|
||||
org_image_id = '155d900f-4e14-4e4c-a73d-069cbf4541e6'
|
||||
server_req_body = {
|
||||
'server': {
|
||||
'imageRef': org_image_id,
|
||||
'flavorRef': '1',
|
||||
'name': 'test_shared_storage_rp_configuration_with_cn_rp',
|
||||
'networks': 'none'
|
||||
}
|
||||
}
|
||||
# create server
|
||||
server = self.api.post_server(server_req_body)
|
||||
self._wait_for_state_change(self.api, server, 'ACTIVE')
|
||||
|
||||
rebuild_image_ref = (
|
||||
nova.tests.unit.image.fake.AUTO_DISK_CONFIG_ENABLED_IMAGE_UUID)
|
||||
|
||||
with nova.utils.temporary_mutation(self.api, microversion='2.35'):
|
||||
self.api.api_put('/images/%s/metadata' % rebuild_image_ref,
|
||||
{'metadata': {
|
||||
'trait:CUSTOM_FOO': 'required'}})
|
||||
rebuild_req_body = {
|
||||
'rebuild': {
|
||||
'imageRef': rebuild_image_ref
|
||||
}
|
||||
}
|
||||
self.api.api_post('/servers/%s/action' % server['id'],
|
||||
rebuild_req_body)
|
||||
# Look for the failed rebuild action.
|
||||
self._wait_for_action_fail_completion(
|
||||
server, instance_actions.REBUILD, 'rebuild_server', self.admin_api)
|
||||
# Assert the server image_ref was rolled back on failure.
|
||||
server = self.api.get_server(server['id'])
|
||||
self.assertEqual(org_image_id, server['image']['id'])
|
||||
|
||||
# The server should be in ERROR state
|
||||
self.assertEqual('ERROR', server['status'])
|
||||
self.assertIn('No valid host', server['fault']['message'])
|
||||
|
||||
@@ -3595,6 +3595,10 @@ class TraitsBasedSchedulingTest(integrated_helpers.ProviderUsageBaseTestCase):
|
||||
self.flavor_with_trait['id'],
|
||||
{'extra_specs': {'trait:HW_CPU_X86_VMX': 'required'}})
|
||||
self.flavor_without_trait = flavors[1]
|
||||
self.flavor_with_forbidden_trait = flavors[2]
|
||||
self.admin_api.post_extra_spec(
|
||||
self.flavor_with_forbidden_trait['id'],
|
||||
{'extra_specs': {'trait:HW_CPU_X86_SGX': 'forbidden'}})
|
||||
|
||||
# Note that we're using v2.35 explicitly as the api returns 404
|
||||
# starting with 2.36
|
||||
@@ -3822,6 +3826,175 @@ class TraitsBasedSchedulingTest(integrated_helpers.ProviderUsageBaseTestCase):
|
||||
self.assertIn('fault', server)
|
||||
self.assertIn('No valid host', server['fault']['message'])
|
||||
|
||||
def test_rebuild_instance_with_image_traits(self):
|
||||
"""Rebuilds a server with a different image which has traits
|
||||
associated with it and which will run it through the scheduler to
|
||||
validate the image is still OK with the compute host that the
|
||||
instance is running on.
|
||||
"""
|
||||
# Decorate compute2 resource provider with both flavor and image trait.
|
||||
rp_uuid = self._get_provider_uuid_by_host(self.compute2.host)
|
||||
self._set_provider_traits(rp_uuid, ['HW_CPU_X86_VMX',
|
||||
'HW_CPU_X86_SGX'])
|
||||
# make sure we start with no usage on the compute node
|
||||
rp_usages = self._get_provider_usages(rp_uuid)
|
||||
self.assertEqual({'VCPU': 0, 'MEMORY_MB': 0, 'DISK_GB': 0}, rp_usages)
|
||||
|
||||
# create a server without traits on image and with traits on flavour
|
||||
server = self._create_server_with_traits(
|
||||
self.flavor_with_trait['id'], self.image_id_without_trait)
|
||||
server = self._wait_for_state_change(self.admin_api, server, 'ACTIVE')
|
||||
|
||||
# make the compute node full and ensure rebuild still succeed
|
||||
inv = {"resource_class": "VCPU",
|
||||
"total": 1}
|
||||
self._set_inventory(rp_uuid, inv)
|
||||
|
||||
# Now rebuild the server with a different image with traits
|
||||
rebuild_req_body = {
|
||||
'rebuild': {
|
||||
'imageRef': self.image_id_with_trait
|
||||
}
|
||||
}
|
||||
self.api.api_post('/servers/%s/action' % server['id'],
|
||||
rebuild_req_body)
|
||||
self._wait_for_server_parameter(
|
||||
self.api, server, {'OS-EXT-STS:task_state': None})
|
||||
|
||||
allocs = self._get_allocations_by_server_uuid(server['id'])
|
||||
self.assertIn(rp_uuid, allocs)
|
||||
|
||||
# Assert the server ended up on the expected compute host that has
|
||||
# the required trait.
|
||||
self.assertEqual(self.compute2.host, server['OS-EXT-SRV-ATTR:host'])
|
||||
|
||||
def test_rebuild_instance_with_image_traits_no_host(self):
|
||||
"""Rebuilding a server with a different image which has required
|
||||
traits on the image fails to valid the host that this server is
|
||||
currently running, cause the compute host resource provider is not
|
||||
associated with similar trait.
|
||||
"""
|
||||
# Decorate compute2 resource provider with traits on flavor
|
||||
rp_uuid = self._get_provider_uuid_by_host(self.compute2.host)
|
||||
self._set_provider_traits(rp_uuid, ['HW_CPU_X86_VMX'])
|
||||
|
||||
# make sure we start with no usage on the compute node
|
||||
rp_usages = self._get_provider_usages(rp_uuid)
|
||||
self.assertEqual({'VCPU': 0, 'MEMORY_MB': 0, 'DISK_GB': 0}, rp_usages)
|
||||
|
||||
# create a server without traits on image and with traits on flavour
|
||||
server = self._create_server_with_traits(
|
||||
self.flavor_with_trait['id'], self.image_id_without_trait)
|
||||
server = self._wait_for_state_change(self.admin_api, server, 'ACTIVE')
|
||||
|
||||
# Now rebuild the server with a different image with traits
|
||||
rebuild_req_body = {
|
||||
'rebuild': {
|
||||
'imageRef': self.image_id_with_trait
|
||||
}
|
||||
}
|
||||
|
||||
self.api.api_post('/servers/%s/action' % server['id'],
|
||||
rebuild_req_body)
|
||||
# Look for the failed rebuild action.
|
||||
self._wait_for_action_fail_completion(
|
||||
server, instance_actions.REBUILD, 'rebuild_server', self.admin_api)
|
||||
# Assert the server image_ref was rolled back on failure.
|
||||
server = self.api.get_server(server['id'])
|
||||
self.assertEqual(self.image_id_without_trait, server['image']['id'])
|
||||
|
||||
# The server should be in ERROR state
|
||||
self.assertEqual('ERROR', server['status'])
|
||||
self.assertEqual("No valid host was found. Image traits cannot be "
|
||||
"satisfied by the current resource providers. "
|
||||
"Please relaunch the instance.",
|
||||
server['fault']['message'])
|
||||
|
||||
def test_rebuild_instance_with_image_traits_no_image_change(self):
|
||||
"""Rebuilds a server with a same image which has traits
|
||||
associated with it and which will run it through the scheduler to
|
||||
validate the image is still OK with the compute host that the
|
||||
instance is running on.
|
||||
"""
|
||||
# Decorate compute2 resource provider with both flavor and image trait.
|
||||
rp_uuid = self._get_provider_uuid_by_host(self.compute2.host)
|
||||
self._set_provider_traits(rp_uuid, ['HW_CPU_X86_VMX',
|
||||
'HW_CPU_X86_SGX'])
|
||||
# make sure we start with no usage on the compute node
|
||||
rp_usages = self._get_provider_usages(rp_uuid)
|
||||
self.assertEqual({'VCPU': 0, 'MEMORY_MB': 0, 'DISK_GB': 0},
|
||||
rp_usages)
|
||||
|
||||
# create a server with traits in both image and flavour
|
||||
server = self._create_server_with_traits(
|
||||
self.flavor_with_trait['id'], self.image_id_with_trait)
|
||||
server = self._wait_for_state_change(self.admin_api, server,
|
||||
'ACTIVE')
|
||||
|
||||
# Now rebuild the server with a different image with traits
|
||||
rebuild_req_body = {
|
||||
'rebuild': {
|
||||
'imageRef': self.image_id_with_trait
|
||||
}
|
||||
}
|
||||
self.api.api_post('/servers/%s/action' % server['id'],
|
||||
rebuild_req_body)
|
||||
self._wait_for_server_parameter(
|
||||
self.api, server, {'OS-EXT-STS:task_state': None})
|
||||
|
||||
allocs = self._get_allocations_by_server_uuid(server['id'])
|
||||
self.assertIn(rp_uuid, allocs)
|
||||
|
||||
# Assert the server ended up on the expected compute host that has
|
||||
# the required trait.
|
||||
self.assertEqual(self.compute2.host,
|
||||
server['OS-EXT-SRV-ATTR:host'])
|
||||
|
||||
def test_rebuild_instance_with_image_traits_and_forbidden_flavor_traits(
|
||||
self):
|
||||
"""Rebuilding a server with a different image which has required
|
||||
traits on the image fails to validate image traits because flavor
|
||||
associated with the current instance has the similar triat that is
|
||||
forbidden
|
||||
"""
|
||||
# Decorate compute2 resource provider with traits on flavor
|
||||
rp_uuid = self._get_provider_uuid_by_host(self.compute2.host)
|
||||
self._set_provider_traits(rp_uuid, ['HW_CPU_X86_VMX'])
|
||||
|
||||
# make sure we start with no usage on the compute node
|
||||
rp_usages = self._get_provider_usages(rp_uuid)
|
||||
self.assertEqual({'VCPU': 0, 'MEMORY_MB': 0, 'DISK_GB': 0}, rp_usages)
|
||||
|
||||
# create a server with forbidden traits on flavor and no triats on
|
||||
# image
|
||||
server = self._create_server_with_traits(
|
||||
self.flavor_with_forbidden_trait['id'],
|
||||
self.image_id_without_trait)
|
||||
server = self._wait_for_state_change(self.admin_api, server, 'ACTIVE')
|
||||
|
||||
# Now rebuild the server with a different image with traits
|
||||
rebuild_req_body = {
|
||||
'rebuild': {
|
||||
'imageRef': self.image_id_with_trait
|
||||
}
|
||||
}
|
||||
|
||||
self.api.api_post('/servers/%s/action' % server['id'],
|
||||
rebuild_req_body)
|
||||
# Look for the failed rebuild action.
|
||||
self._wait_for_action_fail_completion(
|
||||
server, instance_actions.REBUILD, 'rebuild_server', self.admin_api)
|
||||
# Assert the server image_ref was rolled back on failure.
|
||||
server = self.api.get_server(server['id'])
|
||||
self.assertEqual(self.image_id_without_trait, server['image']['id'])
|
||||
|
||||
# The server should be in ERROR state
|
||||
self.assertEqual('ERROR', server['status'])
|
||||
self.assertEqual("No valid host was found. Image traits are part of "
|
||||
"forbidden traits in flavor associated with the "
|
||||
"Image. Please relaunch the instance.",
|
||||
server['fault']['message'])
|
||||
|
||||
|
||||
class ServerTestV256Common(ServersTestBase):
|
||||
api_major_version = 'v2.1'
|
||||
|
||||
Reference in New Issue
Block a user