Merge "Compute manager to use thread pools selectively"
This commit is contained in:
@@ -47,7 +47,7 @@ Tunables for the native threading mode
|
||||
As native threads are more expensive resources than greenthreads Nova provides
|
||||
a set of configuration options to allow fine tuning the deployment based on
|
||||
load and resource constraints. The default values are selected to support a
|
||||
basic, small deployment without consuming substantially more memory resources,
|
||||
basic, small deployment without consuming substantially more memory resources,
|
||||
than the legacy Eventlet mode. Increasing the size of the below thread pools
|
||||
means that the given service will consume more memory but will also allow more
|
||||
tasks to be executed concurrently.
|
||||
@@ -79,6 +79,23 @@ tasks to be executed concurrently.
|
||||
|
||||
This option is relevant to every nova service using ``nova.utils.spawn()``.
|
||||
|
||||
* :oslo.config:option:`sync_power_state_pool_size`: Used by the
|
||||
nova-compute service to sync the power state of each instance on the host
|
||||
between the hypervisor and the DB. Since nova 33.0.0 (2026.1 Gazpacho) the
|
||||
default value of this option is changed from 1000 to 5 to have a sane default
|
||||
in native threading mode. Increasing this value in native threading mode
|
||||
increases the nova-compute memory consumption on a host that has many
|
||||
instances.
|
||||
|
||||
* :oslo.config:option:`max_concurrent_live_migrations`: Used by the
|
||||
nova-compute service to limit the number of outgoing concurrent live
|
||||
migrations from the host. It is implemented via a thread pool. So increasing
|
||||
the the number of concurrent live migrations will increase the nova-compute
|
||||
service memory consumption in native threading mode. It is almost always
|
||||
a bad idea to use change this config option from its default value, 1. If
|
||||
more performant live migration is needed then enable
|
||||
:oslo.config:option:`libvirt.live_migration_parallel_connections` instead.
|
||||
|
||||
Seeing the usage of the pools
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
|
||||
+37
-15
@@ -39,7 +39,6 @@ import typing as ty
|
||||
|
||||
from cinderclient import exceptions as cinder_exception
|
||||
from cursive import exception as cursive_exception
|
||||
import futurist
|
||||
from keystoneauth1 import exceptions as keystone_exception
|
||||
from openstack import exceptions as sdk_exc
|
||||
import os_traits
|
||||
@@ -667,9 +666,10 @@ class ComputeManager(manager.Manager):
|
||||
self.compute_task_api = conductor.ComputeTaskAPI()
|
||||
self.query_client = query.SchedulerQueryClient()
|
||||
self.instance_events = InstanceEvents()
|
||||
self._sync_power_executor = futurist.GreenThreadPoolExecutor(
|
||||
self._sync_power_executor = nova.utils.create_executor(
|
||||
max_workers=CONF.sync_power_state_pool_size)
|
||||
self._syncs_in_progress = {}
|
||||
self._syncs_in_progress: set[str] = set()
|
||||
self._syncs_in_progress_lock = threading.Lock()
|
||||
self.send_instance_updates = (
|
||||
CONF.filter_scheduler.track_instance_changes)
|
||||
if CONF.max_concurrent_builds != 0:
|
||||
@@ -683,11 +683,27 @@ class ComputeManager(manager.Manager):
|
||||
else:
|
||||
self._snapshot_semaphore = compute_utils.UnlimitedSemaphore()
|
||||
if CONF.max_concurrent_live_migrations > 0:
|
||||
self._live_migration_executor = futurist.GreenThreadPoolExecutor(
|
||||
self._live_migration_executor = nova.utils.create_executor(
|
||||
max_workers=CONF.max_concurrent_live_migrations)
|
||||
else:
|
||||
# CONF.max_concurrent_live_migrations is 0 (unlimited)
|
||||
self._live_migration_executor = futurist.GreenThreadPoolExecutor()
|
||||
# setting CONF.max_concurrent_live_migrations to 0 (unlimited)
|
||||
# is deprecated but still supported, so we need to use a sane
|
||||
# default values for each threading mode
|
||||
LOG.warning("Nova compute deprecated the support of unlimited "
|
||||
"parallel live migration so "
|
||||
"[DEFAULT]max_concurrent_live_migrations configured "
|
||||
"with value 0 is deprecated and will not be supported "
|
||||
"in future releases. Please set an explicit positive"
|
||||
"value to this config option instead.")
|
||||
if utils.concurrency_mode_threading():
|
||||
self._live_migration_executor = nova.utils.create_executor(
|
||||
max_workers=5)
|
||||
else:
|
||||
# In eventlet mode we need to keep backward compatibility and
|
||||
# 1000 greenthreads to emulate unlimited.
|
||||
self._live_migration_executor = nova.utils.create_executor(
|
||||
max_workers=1000)
|
||||
|
||||
# This is a dict, keyed by instance uuid, to a two-item tuple of
|
||||
# migration object and Future for the queued live migration.
|
||||
self._waiting_live_migrations = {}
|
||||
@@ -706,6 +722,11 @@ class ComputeManager(manager.Manager):
|
||||
self.rt = resource_tracker.ResourceTracker(
|
||||
self.host, self.driver, reportclient=self.reportclient)
|
||||
|
||||
@contextlib.contextmanager
|
||||
def syncs_in_progress(self) -> ty.Iterator[set[str]]:
|
||||
with self._syncs_in_progress_lock:
|
||||
yield self._syncs_in_progress
|
||||
|
||||
def reset(self):
|
||||
LOG.info('Reloading compute RPC API')
|
||||
compute_rpcapi.reset_globals()
|
||||
@@ -11031,20 +11052,21 @@ class ComputeManager(manager.Manager):
|
||||
LOG.exception("Periodic sync_power_state task had an "
|
||||
"error while processing an instance.",
|
||||
instance=db_instance)
|
||||
|
||||
self._syncs_in_progress.pop(db_instance.uuid)
|
||||
with self.syncs_in_progress() as syncs:
|
||||
syncs.remove(db_instance.uuid)
|
||||
|
||||
for db_instance in db_instances:
|
||||
# process syncs asynchronously - don't want instance locking to
|
||||
# block entire periodic task thread
|
||||
uuid = db_instance.uuid
|
||||
if uuid in self._syncs_in_progress:
|
||||
LOG.debug('Sync already in progress for %s', uuid)
|
||||
else:
|
||||
LOG.debug('Triggering sync for uuid %s', uuid)
|
||||
self._syncs_in_progress[uuid] = True
|
||||
nova.utils.spawn_on(
|
||||
self._sync_power_executor, _sync, db_instance)
|
||||
with self.syncs_in_progress() as syncs:
|
||||
if uuid in syncs:
|
||||
LOG.debug('Sync already in progress for %s', uuid)
|
||||
else:
|
||||
LOG.debug('Triggering sync for uuid %s', uuid)
|
||||
syncs.add(uuid)
|
||||
nova.utils.spawn_on(
|
||||
self._sync_power_executor, _sync, db_instance)
|
||||
|
||||
def _query_driver_power_state_and_sync(self, context, db_instance):
|
||||
if db_instance.task_state is not None:
|
||||
|
||||
+10
-5
@@ -694,7 +694,12 @@ that doing so is safe and stable in your environment.
|
||||
|
||||
Possible values:
|
||||
|
||||
* 0 : treated as unlimited.
|
||||
* ``0``: Deprecated since 33.0.0 (2026.1 Gazpacho). This value was previously
|
||||
documented as meaning unlimited but the actual implementation used maximum
|
||||
1000 greenthreads. Since this release, the implementation keep using 1000
|
||||
greenthreads in eventlet mode and will use 5 native threads in threading
|
||||
mode. In the future release when eventlet support is removed, 0 as a valid
|
||||
value will also be removed.
|
||||
* Any positive integer representing maximum number of live migrations
|
||||
to run concurrently.
|
||||
"""),
|
||||
@@ -732,9 +737,9 @@ Related options:
|
||||
checks
|
||||
"""),
|
||||
cfg.IntOpt('sync_power_state_pool_size',
|
||||
default=1000,
|
||||
default=5,
|
||||
help="""
|
||||
Number of greenthreads available for use to sync power states.
|
||||
Number of threads available for use to sync instance power states.
|
||||
|
||||
This option can be used to reduce the number of concurrent requests
|
||||
made to the hypervisor or system with real instance power states
|
||||
@@ -742,8 +747,8 @@ for performance reasons, for example, with Ironic.
|
||||
|
||||
Possible values:
|
||||
|
||||
* Any positive integer representing greenthreads count.
|
||||
""")
|
||||
* Any positive integer representing threads count.
|
||||
"""),
|
||||
]
|
||||
|
||||
compute_group_opts = [
|
||||
|
||||
@@ -18,6 +18,8 @@
|
||||
"""Tests for compute service."""
|
||||
|
||||
import datetime
|
||||
import threading
|
||||
|
||||
import fixtures as std_fixtures
|
||||
from itertools import chain
|
||||
import operator
|
||||
@@ -1661,7 +1663,14 @@ class ComputeTestCase(BaseTestCase,
|
||||
def setUp(self):
|
||||
super(ComputeTestCase, self).setUp()
|
||||
self.compute._live_migration_executor = futurist.SynchronousExecutor()
|
||||
# NOTE(gibi): the _sync_power_states periodic task in the
|
||||
# ComputeManager spawning concurrent tasks and uses a lock to
|
||||
# synchronize a shared data structure. As the spawn is made
|
||||
# synchronous meaning the tasks runs on the caller thread. This means
|
||||
# the simple lock causes a deadlock in the unit test. Upgrade that lock
|
||||
# to be reentrant so the test can pass with synchronous spawn.
|
||||
self.useFixture(fixtures.SpawnIsSynchronousFixture())
|
||||
self.compute._syncs_in_progress_lock = threading.RLock()
|
||||
|
||||
self.image_api = image_api.API()
|
||||
self.default_flavor = objects.Flavor.get_by_name(self.context,
|
||||
|
||||
@@ -72,6 +72,7 @@ from nova.tests.unit import fake_network_cache_model
|
||||
from nova.tests.unit.objects import test_instance_fault
|
||||
from nova.tests.unit.objects import test_instance_info_cache
|
||||
from nova.tests.unit.objects import test_instance_numa
|
||||
from nova import utils
|
||||
from nova.virt.block_device import DriverVolumeBlockDevice as driver_bdm_volume
|
||||
from nova.virt import driver as virt_driver
|
||||
from nova.virt import event as virtevent
|
||||
@@ -4288,6 +4289,18 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase,
|
||||
power_state.NOSTATE,
|
||||
use_slave=True)
|
||||
|
||||
def test_syncs_in_progress(self):
|
||||
self.assertFalse(self.compute._syncs_in_progress_lock.locked())
|
||||
self.compute._syncs_in_progress.add("fake-uuid")
|
||||
|
||||
with self.compute.syncs_in_progress() as syncs:
|
||||
self.assertTrue(self.compute._syncs_in_progress_lock.locked())
|
||||
self.assertEqual({"fake-uuid"}, syncs)
|
||||
syncs.remove("fake-uuid")
|
||||
|
||||
self.assertFalse(self.compute._syncs_in_progress_lock.locked())
|
||||
self.assertEqual(set(), self.compute._syncs_in_progress)
|
||||
|
||||
def test_cleanup_running_deleted_instances_virt_driver_not_ready(self):
|
||||
"""Tests the scenario that the driver raises VirtDriverNotReady
|
||||
when listing instances so the task returns early.
|
||||
@@ -11743,7 +11756,10 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase,
|
||||
def test_max_concurrent_live_semaphore_unlimited(self):
|
||||
self.flags(max_concurrent_live_migrations=0)
|
||||
mgr = manager.ComputeManager()
|
||||
self.assertEqual(1000, mgr._live_migration_executor._max_workers)
|
||||
if utils.concurrency_mode_threading():
|
||||
self.assertEqual(5, mgr._live_migration_executor._max_workers)
|
||||
else:
|
||||
self.assertEqual(1000, mgr._live_migration_executor._max_workers)
|
||||
|
||||
@mock.patch('nova.objects.InstanceGroup.get_by_instance_uuid', mock.Mock(
|
||||
side_effect=exception.InstanceGroupNotFound(group_uuid='')))
|
||||
|
||||
+28
@@ -0,0 +1,28 @@
|
||||
---
|
||||
upgrade:
|
||||
- |
|
||||
The meaning of the 0 value of the config option
|
||||
``[DEFAULT]max_concurrent_live_migrations`` has been changed. In the past
|
||||
the implementation of the meaning of "unlimited" used maximum 1000
|
||||
concurrent worker greenthreads. For eventlet mode this behavior is kept but
|
||||
for the native threading mode it is now reduced to 5 native threads. It is
|
||||
almost always a bad idea to change this config option from its default value, 1.
|
||||
Please read the `concurrency
|
||||
<https://docs.openstack.org/nova/latest/admin/concurrency.html>`__
|
||||
guide for more details.
|
||||
- |
|
||||
The default value of the configuration option
|
||||
``[DEFAULT]sync_power_state_thread_pool_size`` is changed from 1000 to 5 to
|
||||
have a value that is safe to use in native threading mode. If you are still
|
||||
using the eventlet mode and relying on a higher value then configure that
|
||||
higher value explicitly before the upgrade. Please read the
|
||||
`concurrency <https://docs.openstack.org/nova/latest/admin/concurrency.html>`__
|
||||
guide for more details.
|
||||
deprecations:
|
||||
- |
|
||||
The possible 0 value of the configuration option
|
||||
``[DEFAULT]max_concurrent_live_migrations`` is deprecated and will be
|
||||
removed in a future release. It is almost always a bad idea to change the
|
||||
default value, 1, of this config option. If more performant live migration
|
||||
is needed, use the ``live_migration_parallel_connections`` config option
|
||||
instead.
|
||||
Reference in New Issue
Block a user