diff --git a/nova/objects/fields.py b/nova/objects/fields.py index 3849c342b0..ae64bbbce2 100644 --- a/nova/objects/fields.py +++ b/nova/objects/fields.py @@ -21,6 +21,7 @@ import six from nova import exception from nova.i18n import _ from nova.network import model as network_model +from nova import objects # Import field errors from oslo.versionedobjects @@ -469,6 +470,17 @@ class ResourceClass(StringField): V1_0 = (VCPU, MEMORY_MB, DISK_GB, PCI_DEVICE, SRIOV_NET_VF, NUMA_SOCKET, NUMA_CORE, NUMA_THREAD, NUMA_MEMORY_MB, IPV4_ADDRESS) + @staticmethod + def normalize_name(rc_name): + if rc_name is None: + return None + norm_name = rc_name.upper() + cust_prefix = objects.ResourceClass.CUSTOM_NAMESPACE + norm_name = cust_prefix + norm_name + # Replace some punctuation characters with underscores + norm_name = re.sub('[^0-9A-Z]+', '_', norm_name) + return norm_name + class RNGModel(BaseNovaEnum): diff --git a/nova/tests/functional/compute/__init__.py b/nova/tests/functional/compute/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/nova/tests/functional/compute/test_resource_tracker.py b/nova/tests/functional/compute/test_resource_tracker.py new file mode 100644 index 0000000000..80abb71c86 --- /dev/null +++ b/nova/tests/functional/compute/test_resource_tracker.py @@ -0,0 +1,383 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import mock +from wsgi_intercept import interceptor + +from nova.api.openstack.placement import deploy +from nova.compute import power_state +from nova.compute import resource_tracker +from nova.compute import task_states +from nova.compute import vm_states +from nova import conf +from nova import context +from nova import objects +from nova.objects import fields +from nova import test +from nova.tests.functional.api.openstack.placement import test_report_client +from nova.tests import uuidsentinel as uuids + +CONF = conf.CONF +VCPU = fields.ResourceClass.VCPU +MEMORY_MB = fields.ResourceClass.MEMORY_MB +DISK_GB = fields.ResourceClass.DISK_GB +COMPUTE_HOST = 'compute-host' + + +class IronicResourceTrackerTest(test.TestCase): + """Tests the behaviour of the resource tracker with regards to the + transitional period between adding support for custom resource classes in + the placement API and integrating inventory and allocation records for + Ironic baremetal nodes with those custom resource classes. + """ + + FLAVOR_FIXTURES = { + 'CUSTOM_SMALL_IRON': objects.Flavor( + name='CUSTOM_SMALL_IRON', + flavorid=42, + vcpus=4, + memory_mb=4096, + root_gb=1024, + swap=0, + ephemeral_gb=0, + ), + 'CUSTOM_BIG_IRON': objects.Flavor( + name='CUSTOM_BIG_IRON', + flavorid=43, + vcpus=16, + memory_mb=65536, + root_gb=1024, + swap=0, + ephemeral_gb=0, + ), + } + + COMPUTE_NODE_FIXTURES = { + uuids.cn1: objects.ComputeNode( + uuid=uuids.cn1, + hypervisor_hostname='cn1', + hypervisor_type='ironic', + hypervisor_version=0, + cpu_info="", + host=COMPUTE_HOST, + vcpus=4, + vcpus_used=0, + cpu_allocation_ratio=1.0, + memory_mb=4096, + memory_mb_used=0, + ram_allocation_ratio=1.0, + local_gb=1024, + local_gb_used=0, + disk_allocation_ratio=1.0, + ), + uuids.cn2: objects.ComputeNode( + uuid=uuids.cn2, + hypervisor_hostname='cn2', + hypervisor_type='ironic', + hypervisor_version=0, + cpu_info="", + host=COMPUTE_HOST, + vcpus=4, + vcpus_used=0, + cpu_allocation_ratio=1.0, + memory_mb=4096, + memory_mb_used=0, + ram_allocation_ratio=1.0, + local_gb=1024, + local_gb_used=0, + disk_allocation_ratio=1.0, + ), + uuids.cn3: objects.ComputeNode( + uuid=uuids.cn3, + hypervisor_hostname='cn3', + hypervisor_type='ironic', + hypervisor_version=0, + cpu_info="", + host=COMPUTE_HOST, + vcpus=16, + vcpus_used=0, + cpu_allocation_ratio=1.0, + memory_mb=65536, + memory_mb_used=0, + ram_allocation_ratio=1.0, + local_gb=2048, + local_gb_used=0, + disk_allocation_ratio=1.0, + ), + } + + INSTANCE_FIXTURES = { + uuids.instance1: objects.Instance( + uuid=uuids.instance1, + flavor=FLAVOR_FIXTURES['CUSTOM_SMALL_IRON'], + vm_state=vm_states.BUILDING, + task_state=task_states.SPAWNING, + power_state=power_state.RUNNING, + project_id='project', + ), + } + + def setUp(self): + super(IronicResourceTrackerTest, self).setUp() + self.flags(auth_strategy='noauth2', group='api') + self.flags( + reserved_host_memory_mb=0, + cpu_allocation_ratio=1.0, + ram_allocation_ratio=1.0, + disk_allocation_ratio=1.0, + ) + + self.ctx = context.RequestContext('user', 'project') + self.app = lambda: deploy.loadapp(CONF) + self.report_client = test_report_client.NoAuthReportClient() + + driver = mock.MagicMock(autospec='nova.virt.driver.ComputeDriver') + driver.node_is_available.return_value = True + self.driver_mock = driver + self.rt = resource_tracker.ResourceTracker(COMPUTE_HOST, driver) + self.rt.scheduler_client.reportclient = self.report_client + self.url = 'http://localhost/placement' + self.create_fixtures() + + def create_fixtures(self): + for flavor in self.FLAVOR_FIXTURES.values(): + flavor._context = self.ctx + flavor.obj_set_defaults() + flavor.create() + + # We create some compute node records in the Nova cell DB to simulate + # data before adding integration for Ironic baremetal nodes with the + # placement API... + for cn in self.COMPUTE_NODE_FIXTURES.values(): + cn._context = self.ctx + cn.obj_set_defaults() + cn.create() + + for instance in self.INSTANCE_FIXTURES.values(): + instance._context = self.ctx + instance.obj_set_defaults() + instance.create() + + def placement_get_inventory(self, rp_uuid): + url = '/resource_providers/%s/inventories' % rp_uuid + resp = self.report_client.get(url) + if 200 <= resp.status_code < 300: + return resp.json()['inventories'] + else: + return resp.status_code + + def placement_get_allocations(self, consumer_uuid): + url = '/allocations/%s' % consumer_uuid + resp = self.report_client.get(url) + if 200 <= resp.status_code < 300: + return resp.json()['allocations'] + else: + return resp.status_code + + def placement_get_custom_rcs(self): + url = '/resource_classes' + resp = self.report_client.get(url) + if 200 <= resp.status_code < 300: + all_rcs = resp.json()['resource_classes'] + return [rc['name'] for rc in all_rcs + if rc['name'] not in fields.ResourceClass.STANDARD] + + @mock.patch('nova.compute.utils.is_volume_backed_instance', + return_value=False) + @mock.patch('nova.objects.compute_node.ComputeNode.save') + @mock.patch('keystoneauth1.session.Session.get_auth_headers', + return_value={'x-auth-token': 'admin'}) + @mock.patch('keystoneauth1.session.Session.get_endpoint', + return_value='http://localhost/placement') + def test_ironic_ocata_to_pike(self, mock_vbi, mock_endpoint, mock_auth, + mock_cn): + """Check that when going from an Ocata installation with Ironic having + node's resource class attributes set, that we properly "auto-heal" the + inventory and allocation records in the placement API to account for + both the old-style VCPU/MEMORY_MB/DISK_GB resources as well as the new + custom resource class from Ironic's node.resource_class attribute. + """ + with interceptor.RequestsInterceptor( + app=self.app, url=self.url): + # Before the resource tracker is "initialized", we shouldn't have + # any compute nodes in the RT's cache... + self.assertEqual(0, len(self.rt.compute_nodes)) + + # There should not be any records in the placement API since we + # haven't yet run update_available_resource() in the RT. + for cn in self.COMPUTE_NODE_FIXTURES.values(): + self.assertEqual(404, self.placement_get_inventory(cn.uuid)) + + for inst in self.INSTANCE_FIXTURES.keys(): + self.assertEqual({}, self.placement_get_allocations(inst)) + + # Nor should there be any custom resource classes in the placement + # API, since we haven't had an Ironic node's resource class set yet + self.assertEqual(0, len(self.placement_get_custom_rcs())) + + # Now "initialize" the resource tracker as if the compute host is a + # Ocata host, with Ironic virt driver, but the admin has not yet + # added a resource_class attribute to the Ironic baremetal nodes in + # her system. + # NOTE(jaypipes): This is what nova.compute.manager.ComputeManager + # does when "initializing" the service... + for cn in self.COMPUTE_NODE_FIXTURES.values(): + nodename = cn.hypervisor_hostname + self.driver_mock.get_available_resource.return_value = { + 'hypervisor_hostname': nodename, + 'hypervisor_type': 'ironic', + 'hypervisor_version': 0, + 'vcpus': cn.vcpus, + 'vcpus_used': cn.vcpus_used, + 'memory_mb': cn.memory_mb, + 'memory_mb_used': cn.memory_mb_used, + 'local_gb': cn.local_gb, + 'local_gb_used': cn.local_gb_used, + 'numa_topology': None, + 'resource_class': None, # Act like admin hasn't set yet... + } + self.driver_mock.get_inventory.return_value = { + VCPU: { + 'total': cn.vcpus, + 'reserved': 0, + 'min_unit': cn.vcpus, + 'max_unit': cn.vcpus, + 'step_size': 1, + 'allocation_ratio': 1.0, + }, + MEMORY_MB: { + 'total': cn.memory_mb, + 'reserved': 0, + 'min_unit': cn.memory_mb, + 'max_unit': cn.memory_mb, + 'step_size': 1, + 'allocation_ratio': 1.0, + }, + DISK_GB: { + 'total': cn.local_gb, + 'reserved': 0, + 'min_unit': cn.local_gb, + 'max_unit': cn.local_gb, + 'step_size': 1, + 'allocation_ratio': 1.0, + }, + } + self.rt.update_available_resource(self.ctx, nodename) + + self.assertEqual(3, len(self.rt.compute_nodes)) + # A canary just to make sure the assertion below about the custom + # resource class being added wasn't already added somehow... + crcs = self.placement_get_custom_rcs() + self.assertNotIn('CUSTOM_SMALL_IRON', crcs) + + # Verify that the placement API has the "old-style" resources in + # inventory and allocations + for cn in self.COMPUTE_NODE_FIXTURES.values(): + inv = self.placement_get_inventory(cn.uuid) + self.assertEqual(3, len(inv)) + + # Now "spawn" an instance to the first compute node by calling the + # RT's instance_claim(), which should, in the case of an Ironic + # instance, grab the full compute node for the instance and write + # allocation records for VCPU, MEMORY_MB, and DISK_GB + cn1_obj = self.COMPUTE_NODE_FIXTURES[uuids.cn1] + cn1_nodename = cn1_obj.hypervisor_hostname + inst = self.INSTANCE_FIXTURES[uuids.instance1] + with self.rt.instance_claim(self.ctx, inst, cn1_nodename): + pass + + allocs = self.placement_get_allocations(inst.uuid) + self.assertEqual(1, len(allocs)) + self.assertIn(uuids.cn1, allocs) + + resources = allocs[uuids.cn1]['resources'] + self.assertEqual(3, len(resources)) + for rc in (VCPU, MEMORY_MB, DISK_GB): + self.assertIn(rc, resources) + + # Now we emulate the operator setting ONE of the Ironic node's + # resource class attribute to the value of a custom resource class + # and re-run update_available_resource(). We will expect to see the + # inventory and allocations reset for the first compute node that + # had an instance on it. The new inventory and allocation records + # will be for VCPU, MEMORY_MB, DISK_GB, and also a new record for + # the custom resource class of the Ironic node. + self.driver_mock.get_available_resource.return_value = { + 'hypervisor_hostname': cn1_obj.hypervisor_hostname, + 'hypervisor_type': 'ironic', + 'hypervisor_version': 0, + 'vcpus': cn1_obj.vcpus, + 'vcpus_used': cn1_obj.vcpus_used, + 'memory_mb': cn1_obj.memory_mb, + 'memory_mb_used': cn1_obj.memory_mb_used, + 'local_gb': cn1_obj.local_gb, + 'local_gb_used': cn1_obj.local_gb_used, + 'numa_topology': None, + 'resource_class': 'small-iron', + } + self.driver_mock.get_inventory.return_value = { + VCPU: { + 'total': cn1_obj.vcpus, + 'reserved': 0, + 'min_unit': cn1_obj.vcpus, + 'max_unit': cn1_obj.vcpus, + 'step_size': 1, + 'allocation_ratio': 1.0, + }, + MEMORY_MB: { + 'total': cn1_obj.memory_mb, + 'reserved': 0, + 'min_unit': cn1_obj.memory_mb, + 'max_unit': cn1_obj.memory_mb, + 'step_size': 1, + 'allocation_ratio': 1.0, + }, + DISK_GB: { + 'total': cn1_obj.local_gb, + 'reserved': 0, + 'min_unit': cn1_obj.local_gb, + 'max_unit': cn1_obj.local_gb, + 'step_size': 1, + 'allocation_ratio': 1.0, + }, + 'CUSTOM_SMALL_IRON': { + 'total': 1, + 'reserved': 0, + 'min_unit': 1, + 'max_unit': 1, + 'step_size': 1, + 'allocation_ratio': 1.0, + }, + } + self.rt.update_available_resource(self.ctx, cn1_nodename) + + # Verify the auto-creation of the custom resource class, normalized + # to what the placement API expects + self.assertIn('CUSTOM_SMALL_IRON', self.placement_get_custom_rcs()) + + allocs = self.placement_get_allocations(inst.uuid) + self.assertEqual(1, len(allocs)) + self.assertIn(uuids.cn1, allocs) + + resources = allocs[uuids.cn1]['resources'] + self.assertEqual(3, len(resources)) + for rc in (VCPU, MEMORY_MB, DISK_GB): + self.assertIn(rc, resources) + + # TODO(jaypipes): Check allocations include the CUSTOM_SMALL_IRON + # resource class. At the moment, we do not add an allocation record + # for the Ironic custom resource class. Once the flavor is updated + # to store a resources:$CUSTOM_RESOURCE_CLASS=1 extra_spec key and + # the scheduler is constructing the request_spec to actually + # request a single amount of that custom resource class, we will + # modify the allocation/claim to consume only the custom resource + # class and not the VCPU, MEMORY_MB and DISK_GB. diff --git a/nova/tests/unit/virt/ironic/test_driver.py b/nova/tests/unit/virt/ironic/test_driver.py index 890991ea74..a970397cd2 100644 --- a/nova/tests/unit/virt/ironic/test_driver.py +++ b/nova/tests/unit/virt/ironic/test_driver.py @@ -32,6 +32,7 @@ from nova import context as nova_context from nova import exception from nova import hash_ring from nova import objects +from nova.objects import fields from nova import servicegroup from nova import test from nova.tests.unit import fake_instance @@ -752,6 +753,123 @@ class IronicDriverTestCase(test.NoDBTestCase): expected_uuids = [n['uuid'] for n in node_dicts if n['expected']] self.assertEqual(sorted(expected_uuids), sorted(available_nodes)) + @mock.patch.object(ironic_driver.IronicDriver, '_node_resource') + @mock.patch.object(ironic_driver.IronicDriver, '_node_from_cache') + def test_get_inventory_no_rc(self, mock_nfc, mock_nr): + """Ensure that when node.resource_class is missing, that we return the + legacy VCPU, MEMORY_MB and DISK_GB resources for inventory. + """ + mock_nr.return_value = { + 'vcpus': 24, + 'memory_mb': 1024, + 'local_gb': 100, + 'resource_class': None, + } + + result = self.driver.get_inventory(mock.sentinel.nodename) + + expected = { + fields.ResourceClass.VCPU: { + 'total': 24, + 'reserved': 0, + 'min_unit': 24, + 'max_unit': 24, + 'step_size': 1, + 'allocation_ratio': 1.0, + }, + fields.ResourceClass.MEMORY_MB: { + 'total': 1024, + 'reserved': 0, + 'min_unit': 1024, + 'max_unit': 1024, + 'step_size': 1, + 'allocation_ratio': 1.0, + }, + fields.ResourceClass.DISK_GB: { + 'total': 100, + 'reserved': 0, + 'min_unit': 100, + 'max_unit': 100, + 'step_size': 1, + 'allocation_ratio': 1.0, + }, + } + mock_nfc.assert_called_once_with(mock.sentinel.nodename) + mock_nr.assert_called_once_with(mock_nfc.return_value) + self.assertEqual(expected, result) + + @mock.patch.object(ironic_driver.IronicDriver, '_node_resource') + @mock.patch.object(ironic_driver.IronicDriver, '_node_from_cache') + def test_get_inventory_with_rc(self, mock_nfc, mock_nr): + """Ensure that when node.resource_class is present, that we return the + legacy VCPU, MEMORY_MB and DISK_GB resources for inventory in addition + to the custom resource class inventory record. + """ + mock_nr.return_value = { + 'vcpus': 24, + 'memory_mb': 1024, + 'local_gb': 100, + 'resource_class': 'iron-nfv', + } + + result = self.driver.get_inventory(mock.sentinel.nodename) + + expected = { + fields.ResourceClass.VCPU: { + 'total': 24, + 'reserved': 0, + 'min_unit': 24, + 'max_unit': 24, + 'step_size': 1, + 'allocation_ratio': 1.0, + }, + fields.ResourceClass.MEMORY_MB: { + 'total': 1024, + 'reserved': 0, + 'min_unit': 1024, + 'max_unit': 1024, + 'step_size': 1, + 'allocation_ratio': 1.0, + }, + fields.ResourceClass.DISK_GB: { + 'total': 100, + 'reserved': 0, + 'min_unit': 100, + 'max_unit': 100, + 'step_size': 1, + 'allocation_ratio': 1.0, + }, + 'CUSTOM_IRON_NFV': { + 'total': 1, + 'reserved': 0, + 'min_unit': 1, + 'max_unit': 1, + 'step_size': 1, + 'allocation_ratio': 1.0, + }, + } + mock_nfc.assert_called_once_with(mock.sentinel.nodename) + mock_nr.assert_called_once_with(mock_nfc.return_value) + self.assertEqual(expected, result) + + @mock.patch.object(ironic_driver.IronicDriver, '_node_resource') + @mock.patch.object(ironic_driver.IronicDriver, '_node_from_cache') + def test_get_inventory_disabled_node(self, mock_nfc, mock_nr): + """Ensure that when vcpus == 0 (which happens when a node is disabled), + that get_inventory() returns an empty dict. + """ + mock_nr.return_value = { + 'vcpus': 0, + 'memory_mb': 0, + 'local_gb': 0, + 'resource_class': None, + } + + result = self.driver.get_inventory(mock.sentinel.nodename) + mock_nfc.assert_called_once_with(mock.sentinel.nodename) + mock_nr.assert_called_once_with(mock_nfc.return_value) + self.assertEqual({}, result) + @mock.patch.object(FAKE_CLIENT.node, 'get') @mock.patch.object(FAKE_CLIENT.node, 'list') @mock.patch.object(objects.InstanceList, 'get_uuids_by_host') diff --git a/nova/virt/ironic/driver.py b/nova/virt/ironic/driver.py index 0afcc037e4..bcd3f45616 100644 --- a/nova/virt/ironic/driver.py +++ b/nova/virt/ironic/driver.py @@ -609,6 +609,66 @@ class IronicDriver(virt_driver.ComputeDriver): return node_uuids + def get_inventory(self, nodename): + """Return a dict, keyed by resource class, of inventory information for + the supplied node. + """ + node = self._node_from_cache(nodename) + info = self._node_resource(node) + # TODO(jaypipes): Completely remove the reporting of VCPU, MEMORY_MB, + # and DISK_GB resource classes in early Queens when Ironic nodes will + # *always* return the custom resource class that represents the + # baremetal node class in an atomic, singular unit. + if info['vcpus'] == 0: + # NOTE(jaypipes): The driver can return 0-valued vcpus when the + # node is "disabled". In the future, we should detach inventory + # accounting from the concept of a node being disabled or not. The + # two things don't really have anything to do with each other. + return {} + + result = { + obj_fields.ResourceClass.VCPU: { + 'total': info['vcpus'], + 'reserved': 0, + 'min_unit': info['vcpus'], + 'max_unit': info['vcpus'], + 'step_size': 1, + 'allocation_ratio': 1.0, + }, + obj_fields.ResourceClass.MEMORY_MB: { + 'total': info['memory_mb'], + 'reserved': 0, + 'min_unit': info['memory_mb'], + 'max_unit': info['memory_mb'], + 'step_size': 1, + 'allocation_ratio': 1.0, + }, + obj_fields.ResourceClass.DISK_GB: { + 'total': info['local_gb'], + 'reserved': 0, + 'min_unit': info['local_gb'], + 'max_unit': info['local_gb'], + 'step_size': 1, + 'allocation_ratio': 1.0, + }, + } + rc_name = info.get('resource_class') + if rc_name is not None: + # TODO(jaypipes): Raise an exception in Queens if Ironic doesn't + # report a resource class for the node + norm_name = obj_fields.ResourceClass.normalize_name(rc_name) + if norm_name is not None: + result[norm_name] = { + 'total': 1, + 'reserved': 0, + 'min_unit': 1, + 'max_unit': 1, + 'step_size': 1, + 'allocation_ratio': 1.0, + } + + return result + def get_available_resource(self, nodename): """Retrieve resource information. @@ -627,16 +687,24 @@ class IronicDriver(virt_driver.ComputeDriver): # cache, let's try to populate it. self._refresh_cache() + node = self._node_from_cache(nodename) + return self._node_resource(node) + + def _node_from_cache(self, nodename): + """Returns a node from the cache, retrieving the node from Ironic API + if the node doesn't yet exist in the cache. + """ cache_age = time.time() - self.node_cache_time if nodename in self.node_cache: LOG.debug("Using cache for node %(node)s, age: %(age)s", {'node': nodename, 'age': cache_age}) - node = self.node_cache[nodename] + return self.node_cache[nodename] else: LOG.debug("Node %(node)s not found in cache, age: %(age)s", {'node': nodename, 'age': cache_age}) node = self._get_node(nodename) - return self._node_resource(node) + self.node_cache[nodename] = node + return node def get_info(self, instance): """Get the current state and resource usage for this instance.