From 0d9622f581e830e7b7bc9763aaa09ba02e99b8bb Mon Sep 17 00:00:00 2001 From: Matt Riedemann Date: Fri, 20 Dec 2019 10:03:23 -0500 Subject: [PATCH] Handle cell failures in get_compute_nodes_by_host_or_node get_compute_nodes_by_host_or_node uses the scatter_gather_cells function but was not handling the case that a failure result was returned, which could be the called function raising some exception or the cell timing out. This causes issues when the caller of get_compute_nodes_by_host_or_node expects to get a ComputeNodeList back and can do something like len(nodes) on it which fails when the result is not iterable. To be clear, if a cell is down there are going to be problems which likely result in a NoValidHost error during scheduling, but this avoids an ugly TypeError traceback in the scheduler logs. Change-Id: Ia54b5adf0a125ae1f9b86887a07dd1d79821dd54 Closes-Bug: #1857139 --- nova/scheduler/host_manager.py | 5 +++-- nova/tests/unit/scheduler/test_host_manager.py | 18 ++++++++++++++++++ 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/nova/scheduler/host_manager.py b/nova/scheduler/host_manager.py index 6fce36d748..0f16580da8 100644 --- a/nova/scheduler/host_manager.py +++ b/nova/scheduler/host_manager.py @@ -707,9 +707,10 @@ class HostManager(object): # Only one cell should have values for the compute nodes # so we get them here, or return an empty list if no cell - # has a value + # has a value; be sure to filter out cell failures. nodes = next( - (nodes for nodes in nodes_by_cell.values() if nodes), + (nodes for nodes in nodes_by_cell.values() + if nodes and not context_module.is_cell_failure_sentinel(nodes)), objects.ComputeNodeList()) return nodes diff --git a/nova/tests/unit/scheduler/test_host_manager.py b/nova/tests/unit/scheduler/test_host_manager.py index c6d333bd40..8e18fa6631 100644 --- a/nova/tests/unit/scheduler/test_host_manager.py +++ b/nova/tests/unit/scheduler/test_host_manager.py @@ -1195,6 +1195,24 @@ class HostManagerTestCase(test.NoDBTestCase): self.assertEqual(0, len(result)) + @mock.patch('nova.context.scatter_gather_cells', + side_effect=( # called twice, different return values + {uuids.cell1: test.TestingException('conn fail')}, + {uuids.cell1: nova_context.did_not_respond_sentinel})) + def test_get_compute_nodes_by_host_or_node_filter_errors(self, mock_sgc): + """Make sure get_compute_nodes_by_host_or_node filters out exception + and cell timeout results from scatter_gather_cells. + """ + ctxt = nova_context.get_context() + cell1 = objects.CellMapping(uuid=uuids.cell1) + for x in range(2): # run twice because we have two side effects + nodes = self.host_manager.get_compute_nodes_by_host_or_node( + ctxt, 'host1', None, cell=cell1) + self.assertEqual(0, len(nodes), nodes) + self.assertEqual(2, mock_sgc.call_count, mock_sgc.mock_calls) + mock_sgc.assert_has_calls([mock.call( + ctxt, [cell1], nova_context.CELL_TIMEOUT, mock.ANY)] * 2) + class HostManagerChangedNodesTestCase(test.NoDBTestCase): """Test case for HostManager class."""