Merge "Compute manager to use thread pools selectively"

This commit is contained in:
Zuul
2026-01-16 21:03:28 +00:00
committed by Gerrit Code Review
6 changed files with 119 additions and 22 deletions
+18 -1
View File
@@ -47,7 +47,7 @@ Tunables for the native threading mode
As native threads are more expensive resources than greenthreads Nova provides
a set of configuration options to allow fine tuning the deployment based on
load and resource constraints. The default values are selected to support a
basic, small deployment without consuming substantially more memory resources,
basic, small deployment without consuming substantially more memory resources,
than the legacy Eventlet mode. Increasing the size of the below thread pools
means that the given service will consume more memory but will also allow more
tasks to be executed concurrently.
@@ -79,6 +79,23 @@ tasks to be executed concurrently.
This option is relevant to every nova service using ``nova.utils.spawn()``.
* :oslo.config:option:`sync_power_state_pool_size`: Used by the
nova-compute service to sync the power state of each instance on the host
between the hypervisor and the DB. Since nova 33.0.0 (2026.1 Gazpacho) the
default value of this option is changed from 1000 to 5 to have a sane default
in native threading mode. Increasing this value in native threading mode
increases the nova-compute memory consumption on a host that has many
instances.
* :oslo.config:option:`max_concurrent_live_migrations`: Used by the
nova-compute service to limit the number of outgoing concurrent live
migrations from the host. It is implemented via a thread pool. So increasing
the the number of concurrent live migrations will increase the nova-compute
service memory consumption in native threading mode. It is almost always
a bad idea to use change this config option from its default value, 1. If
more performant live migration is needed then enable
:oslo.config:option:`libvirt.live_migration_parallel_connections` instead.
Seeing the usage of the pools
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+37 -15
View File
@@ -39,7 +39,6 @@ import typing as ty
from cinderclient import exceptions as cinder_exception
from cursive import exception as cursive_exception
import futurist
from keystoneauth1 import exceptions as keystone_exception
from openstack import exceptions as sdk_exc
import os_traits
@@ -667,9 +666,10 @@ class ComputeManager(manager.Manager):
self.compute_task_api = conductor.ComputeTaskAPI()
self.query_client = query.SchedulerQueryClient()
self.instance_events = InstanceEvents()
self._sync_power_executor = futurist.GreenThreadPoolExecutor(
self._sync_power_executor = nova.utils.create_executor(
max_workers=CONF.sync_power_state_pool_size)
self._syncs_in_progress = {}
self._syncs_in_progress: set[str] = set()
self._syncs_in_progress_lock = threading.Lock()
self.send_instance_updates = (
CONF.filter_scheduler.track_instance_changes)
if CONF.max_concurrent_builds != 0:
@@ -683,11 +683,27 @@ class ComputeManager(manager.Manager):
else:
self._snapshot_semaphore = compute_utils.UnlimitedSemaphore()
if CONF.max_concurrent_live_migrations > 0:
self._live_migration_executor = futurist.GreenThreadPoolExecutor(
self._live_migration_executor = nova.utils.create_executor(
max_workers=CONF.max_concurrent_live_migrations)
else:
# CONF.max_concurrent_live_migrations is 0 (unlimited)
self._live_migration_executor = futurist.GreenThreadPoolExecutor()
# setting CONF.max_concurrent_live_migrations to 0 (unlimited)
# is deprecated but still supported, so we need to use a sane
# default values for each threading mode
LOG.warning("Nova compute deprecated the support of unlimited "
"parallel live migration so "
"[DEFAULT]max_concurrent_live_migrations configured "
"with value 0 is deprecated and will not be supported "
"in future releases. Please set an explicit positive"
"value to this config option instead.")
if utils.concurrency_mode_threading():
self._live_migration_executor = nova.utils.create_executor(
max_workers=5)
else:
# In eventlet mode we need to keep backward compatibility and
# 1000 greenthreads to emulate unlimited.
self._live_migration_executor = nova.utils.create_executor(
max_workers=1000)
# This is a dict, keyed by instance uuid, to a two-item tuple of
# migration object and Future for the queued live migration.
self._waiting_live_migrations = {}
@@ -706,6 +722,11 @@ class ComputeManager(manager.Manager):
self.rt = resource_tracker.ResourceTracker(
self.host, self.driver, reportclient=self.reportclient)
@contextlib.contextmanager
def syncs_in_progress(self) -> ty.Iterator[set[str]]:
with self._syncs_in_progress_lock:
yield self._syncs_in_progress
def reset(self):
LOG.info('Reloading compute RPC API')
compute_rpcapi.reset_globals()
@@ -11031,20 +11052,21 @@ class ComputeManager(manager.Manager):
LOG.exception("Periodic sync_power_state task had an "
"error while processing an instance.",
instance=db_instance)
self._syncs_in_progress.pop(db_instance.uuid)
with self.syncs_in_progress() as syncs:
syncs.remove(db_instance.uuid)
for db_instance in db_instances:
# process syncs asynchronously - don't want instance locking to
# block entire periodic task thread
uuid = db_instance.uuid
if uuid in self._syncs_in_progress:
LOG.debug('Sync already in progress for %s', uuid)
else:
LOG.debug('Triggering sync for uuid %s', uuid)
self._syncs_in_progress[uuid] = True
nova.utils.spawn_on(
self._sync_power_executor, _sync, db_instance)
with self.syncs_in_progress() as syncs:
if uuid in syncs:
LOG.debug('Sync already in progress for %s', uuid)
else:
LOG.debug('Triggering sync for uuid %s', uuid)
syncs.add(uuid)
nova.utils.spawn_on(
self._sync_power_executor, _sync, db_instance)
def _query_driver_power_state_and_sync(self, context, db_instance):
if db_instance.task_state is not None:
+10 -5
View File
@@ -694,7 +694,12 @@ that doing so is safe and stable in your environment.
Possible values:
* 0 : treated as unlimited.
* ``0``: Deprecated since 33.0.0 (2026.1 Gazpacho). This value was previously
documented as meaning unlimited but the actual implementation used maximum
1000 greenthreads. Since this release, the implementation keep using 1000
greenthreads in eventlet mode and will use 5 native threads in threading
mode. In the future release when eventlet support is removed, 0 as a valid
value will also be removed.
* Any positive integer representing maximum number of live migrations
to run concurrently.
"""),
@@ -732,9 +737,9 @@ Related options:
checks
"""),
cfg.IntOpt('sync_power_state_pool_size',
default=1000,
default=5,
help="""
Number of greenthreads available for use to sync power states.
Number of threads available for use to sync instance power states.
This option can be used to reduce the number of concurrent requests
made to the hypervisor or system with real instance power states
@@ -742,8 +747,8 @@ for performance reasons, for example, with Ironic.
Possible values:
* Any positive integer representing greenthreads count.
""")
* Any positive integer representing threads count.
"""),
]
compute_group_opts = [
+9
View File
@@ -18,6 +18,8 @@
"""Tests for compute service."""
import datetime
import threading
import fixtures as std_fixtures
from itertools import chain
import operator
@@ -1661,7 +1663,14 @@ class ComputeTestCase(BaseTestCase,
def setUp(self):
super(ComputeTestCase, self).setUp()
self.compute._live_migration_executor = futurist.SynchronousExecutor()
# NOTE(gibi): the _sync_power_states periodic task in the
# ComputeManager spawning concurrent tasks and uses a lock to
# synchronize a shared data structure. As the spawn is made
# synchronous meaning the tasks runs on the caller thread. This means
# the simple lock causes a deadlock in the unit test. Upgrade that lock
# to be reentrant so the test can pass with synchronous spawn.
self.useFixture(fixtures.SpawnIsSynchronousFixture())
self.compute._syncs_in_progress_lock = threading.RLock()
self.image_api = image_api.API()
self.default_flavor = objects.Flavor.get_by_name(self.context,
+17 -1
View File
@@ -72,6 +72,7 @@ from nova.tests.unit import fake_network_cache_model
from nova.tests.unit.objects import test_instance_fault
from nova.tests.unit.objects import test_instance_info_cache
from nova.tests.unit.objects import test_instance_numa
from nova import utils
from nova.virt.block_device import DriverVolumeBlockDevice as driver_bdm_volume
from nova.virt import driver as virt_driver
from nova.virt import event as virtevent
@@ -4288,6 +4289,18 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase,
power_state.NOSTATE,
use_slave=True)
def test_syncs_in_progress(self):
self.assertFalse(self.compute._syncs_in_progress_lock.locked())
self.compute._syncs_in_progress.add("fake-uuid")
with self.compute.syncs_in_progress() as syncs:
self.assertTrue(self.compute._syncs_in_progress_lock.locked())
self.assertEqual({"fake-uuid"}, syncs)
syncs.remove("fake-uuid")
self.assertFalse(self.compute._syncs_in_progress_lock.locked())
self.assertEqual(set(), self.compute._syncs_in_progress)
def test_cleanup_running_deleted_instances_virt_driver_not_ready(self):
"""Tests the scenario that the driver raises VirtDriverNotReady
when listing instances so the task returns early.
@@ -11743,7 +11756,10 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase,
def test_max_concurrent_live_semaphore_unlimited(self):
self.flags(max_concurrent_live_migrations=0)
mgr = manager.ComputeManager()
self.assertEqual(1000, mgr._live_migration_executor._max_workers)
if utils.concurrency_mode_threading():
self.assertEqual(5, mgr._live_migration_executor._max_workers)
else:
self.assertEqual(1000, mgr._live_migration_executor._max_workers)
@mock.patch('nova.objects.InstanceGroup.get_by_instance_uuid', mock.Mock(
side_effect=exception.InstanceGroupNotFound(group_uuid='')))
@@ -0,0 +1,28 @@
---
upgrade:
- |
The meaning of the 0 value of the config option
``[DEFAULT]max_concurrent_live_migrations`` has been changed. In the past
the implementation of the meaning of "unlimited" used maximum 1000
concurrent worker greenthreads. For eventlet mode this behavior is kept but
for the native threading mode it is now reduced to 5 native threads. It is
almost always a bad idea to change this config option from its default value, 1.
Please read the `concurrency
<https://docs.openstack.org/nova/latest/admin/concurrency.html>`__
guide for more details.
- |
The default value of the configuration option
``[DEFAULT]sync_power_state_thread_pool_size`` is changed from 1000 to 5 to
have a value that is safe to use in native threading mode. If you are still
using the eventlet mode and relying on a higher value then configure that
higher value explicitly before the upgrade. Please read the
`concurrency <https://docs.openstack.org/nova/latest/admin/concurrency.html>`__
guide for more details.
deprecations:
- |
The possible 0 value of the configuration option
``[DEFAULT]max_concurrent_live_migrations`` is deprecated and will be
removed in a future release. It is almost always a bad idea to change the
default value, 1, of this config option. If more performant live migration
is needed, use the ``live_migration_parallel_connections`` config option
instead.