diff --git a/nova/conductor/manager.py b/nova/conductor/manager.py index 6ff940419e..aed8df8396 100644 --- a/nova/conductor/manager.py +++ b/nova/conductor/manager.py @@ -14,6 +14,7 @@ """Handles database requests from other nova services.""" +import collections import contextlib import copy import eventlet @@ -1667,6 +1668,26 @@ class ComputeTaskManager(base.Base): 'cells': len(hosts_by_cell)}) clock.start() + stats = collections.defaultdict(lambda: (0, 0, 0, 0)) + failed_images = collections.defaultdict(int) + down_hosts = set() + + def wrap_cache_images(ctxt, host, image_ids): + result = self.compute_rpcapi.cache_images( + ctxt, host=host, image_ids=image_ids) + for image_id, status in result.items(): + cached, existing, error, unsupported = stats[image_id] + if status == 'error': + failed_images[image_id] += 1 + error += 1 + elif status == 'cached': + cached += 1 + elif status == 'existing': + existing += 1 + elif status == 'unsupported': + unsupported += 1 + stats[image_id] = (cached, existing, error, unsupported) + for cell_uuid, hosts in hosts_by_cell.items(): cell = cells_by_uuid[cell_uuid] with nova_context.target_cell(context, cell) as target_ctxt: @@ -1674,27 +1695,47 @@ class ComputeTaskManager(base.Base): service = objects.Service.get_by_compute_host(target_ctxt, host) if not self.servicegroup_api.service_is_up(service): + down_hosts.add(host) LOG.info( 'Skipping image pre-cache request to compute ' '%(host)r because it is not up', {'host': host}) continue - fetch_pool.spawn_n(self.compute_rpcapi.cache_images, - target_ctxt, - host=host, - image_ids=image_ids) + fetch_pool.spawn_n(wrap_cache_images, target_ctxt, host, + image_ids) # Wait until all those things finish fetch_pool.waitall() + overall_stats = {'cached': 0, 'existing': 0, 'error': 0, + 'unsupported': 0} + for cached, existing, error, unsupported in stats.values(): + overall_stats['cached'] += cached + overall_stats['existing'] += existing + overall_stats['error'] += error + overall_stats['unsupported'] += unsupported + clock.stop() - # FIXME(danms): Calculate some interesting statistics about the image - # download process to log for the admin. LOG.info('Image pre-cache operation for image(s) %(image_ids)s ' - 'completed in %(time).2f seconds', + 'completed in %(time).2f seconds; ' + '%(cached)i cached, %(existing)i existing, %(error)i errors, ' + '%(unsupported)i unsupported, %(skipped)i skipped (down) ' + 'hosts', {'image_ids': ','.join(image_ids), - 'time': clock.elapsed()}) + 'time': clock.elapsed(), + 'cached': overall_stats['cached'], + 'existing': overall_stats['existing'], + 'error': overall_stats['error'], + 'unsupported': overall_stats['unsupported'], + 'skipped': len(down_hosts), + }) + # Log error'd images specifically at warning level + for image_id, fails in failed_images.items(): + LOG.warning('Image pre-cache operation for image %(image)s ' + 'failed %(fails)i times', + {'image': image_id, + 'fails': fails}) compute_utils.notify_about_aggregate_action( context, aggregate, diff --git a/nova/tests/functional/compute/test_cache_image.py b/nova/tests/functional/compute/test_cache_image.py index 844ecae0bd..ada58ad603 100644 --- a/nova/tests/functional/compute/test_cache_image.py +++ b/nova/tests/functional/compute/test_cache_image.py @@ -74,3 +74,12 @@ class ImageCacheTest(test.TestCase): 'aggregate.cache_images.start') fake_notifier.wait_for_versioned_notifications( 'aggregate.cache_images.end') + + logtext = self.stdlog.logger.output + + self.assertIn( + '3 cached, 0 existing, 0 errors, 0 unsupported, 1 skipped', + logtext) + self.assertNotIn( + 'Image pre-cache operation for image an-image failed', + logtext) diff --git a/nova/tests/unit/conductor/test_conductor.py b/nova/tests/unit/conductor/test_conductor.py index 6ab271de03..a2f4e64502 100644 --- a/nova/tests/unit/conductor/test_conductor.py +++ b/nova/tests/unit/conductor/test_conductor.py @@ -3661,3 +3661,47 @@ class ConductorTaskAPITestCase(_BaseTaskTestCase, test_compute.BaseTestCase): mock_cache.assert_not_called() _test() + + @mock.patch('nova.objects.HostMapping.get_by_host') + @mock.patch('nova.context.target_cell') + @mock.patch('nova.objects.Service.get_by_compute_host') + def test_cache_images_failed_compute(self, mock_service, mock_target, + mock_gbh): + """Test the edge cases for cache_images(), specifically the + error, skip, and down situations. + """ + + fake_service = objects.Service(disabled=False, forced_down=False, + last_seen_up=timeutils.utcnow()) + fake_down_service = objects.Service(disabled=False, forced_down=True, + last_seen_up=None) + mock_service.side_effect = [fake_service, fake_service, + fake_down_service] + mock_target.__return_value.__enter__.return_value = self.context + fake_cell = objects.CellMapping(uuid=uuids.cell, + database_connection='', + transport_url='') + fake_mapping = objects.HostMapping(cell_mapping=fake_cell) + mock_gbh.return_value = fake_mapping + fake_agg = objects.Aggregate(name='agg', uuid=uuids.agg, + hosts=['host1', 'host2', 'host3']) + + @mock.patch.object(self.conductor_manager.compute_rpcapi, + 'cache_images') + def _test(mock_cache): + mock_cache.side_effect = [ + {'image1': 'unsupported'}, + {'image1': 'error'}, + ] + self.conductor_manager.cache_images(self.context, + fake_agg, + ['image1']) + + _test() + + logtext = self.stdlog.logger.output + self.assertIn( + '0 cached, 0 existing, 1 errors, 1 unsupported, 1 skipped', + logtext) + self.assertIn('host3\' because it is not up', logtext) + self.assertIn('image1 failed 1 times', logtext)