diff --git a/nova/conf/libvirt.py b/nova/conf/libvirt.py index bb801eff2b..9e2cc08c7d 100644 --- a/nova/conf/libvirt.py +++ b/nova/conf/libvirt.py @@ -654,7 +654,11 @@ this environment. Possible cache modes: -* default: Same as writethrough. +* default: "It Depends" -- For Nova-managed disks, ``none``, if the host + file system is capable of Linux's 'O_DIRECT' semantics; otherwise + ``writeback``. For volume drivers, the default is driver-dependent: + ``none`` for everything except for SMBFS and Virtuzzo (which use + ``writeback``). * none: With caching mode set to none, the host page cache is disabled, but the disk write cache is enabled for the guest. In this mode, the write performance in the guest is optimal because write operations bypass the host @@ -667,25 +671,25 @@ Possible cache modes: writethrough mode. Shareable disk devices, like for a multi-attachable block storage volume, will have their cache mode set to 'none' regardless of configuration. -* writethrough: writethrough mode is the default caching mode. With - caching set to writethrough mode, the host page cache is enabled, but the - disk write cache is disabled for the guest. Consequently, this caching mode - ensures data integrity even if the applications and storage stack in the - guest do not transfer data to permanent storage properly (either through - fsync operations or file system barriers). Because the host page cache is - enabled in this mode, the read performance for applications running in the - guest is generally better. However, the write performance might be reduced - because the disk write cache is disabled. -* writeback: With caching set to writeback mode, both the host page cache - and the disk write cache are enabled for the guest. Because of this, the - I/O performance for applications running in the guest is good, but the data - is not protected in a power failure. As a result, this caching mode is - recommended only for temporary data where potential data loss is not a - concern. - NOTE: Certain backend disk mechanisms may provide safe writeback cache - semantics. Specifically those that bypass the host page cache, such as - QEMU's integrated RBD driver. Ceph documentation recommends setting this - to writeback for maximum performance while maintaining data safety. +* writethrough: With caching set to writethrough mode, the host page cache is + enabled, but the disk write cache is disabled for the guest. Consequently, + this caching mode ensures data integrity even if the applications and storage + stack in the guest do not transfer data to permanent storage properly (either + through fsync operations or file system barriers). Because the host page + cache is enabled in this mode, the read performance for applications running + in the guest is generally better. However, the write performance might be + reduced because the disk write cache is disabled. +* writeback: With caching set to writeback mode, both the host page + cache and the disk write cache are enabled for the guest. Because of + this, the I/O performance for applications running in the guest is + good, but the data is not protected in a power failure. As a result, + this caching mode is recommended only for temporary data where + potential data loss is not a concern. + NOTE: Certain backend disk mechanisms may provide safe + writeback cache semantics. Specifically those that bypass the host + page cache, such as QEMU's integrated RBD driver. Ceph documentation + recommends setting this to writeback for maximum performance while + maintaining data safety. * directsync: Like "writethrough", but it bypasses the host page cache. * unsafe: Caching mode of unsafe ignores cache transfer operations completely. As its name implies, this caching mode should be used only for diff --git a/nova/tests/unit/virt/libvirt/test_driver.py b/nova/tests/unit/virt/libvirt/test_driver.py index a3208facf2..8a1503c442 100644 --- a/nova/tests/unit/virt/libvirt/test_driver.py +++ b/nova/tests/unit/virt/libvirt/test_driver.py @@ -8522,7 +8522,7 @@ class LibvirtConnTestCase(test.NoDBTestCase, tree = etree.fromstring(xml) disks = tree.findall('./devices/disk/driver') for guest_disk in disks: - self.assertEqual(guest_disk.get("cache"), "writethrough") + self.assertEqual(guest_disk.get("cache"), "writeback") def _check_xml_and_disk_bus(self, image_meta, block_device_info, wantConfig): @@ -16123,7 +16123,7 @@ class LibvirtConnTestCase(test.NoDBTestCase, """Tests that when conf.shareable is True, the configuration is ignored and the driver_cache is forced to 'none'. """ - self.flags(disk_cachemodes=['block=writethrough'], group='libvirt') + self.flags(disk_cachemodes=['block=writeback'], group='libvirt') drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), True) fake_conf = FakeConfigGuestDisk() fake_conf.shareable = True diff --git a/nova/virt/libvirt/driver.py b/nova/virt/libvirt/driver.py index a23b563624..6ce3877a24 100644 --- a/nova/virt/libvirt/driver.py +++ b/nova/virt/libvirt/driver.py @@ -407,16 +407,43 @@ class LibvirtDriver(driver.ComputeDriver): @property def disk_cachemode(self): + # It can be confusing to understand the QEMU cache mode + # behaviour, because each cache=$MODE is a convenient shorthand + # to toggle _three_ cache.* booleans. Consult the below table + # (quoting from the QEMU man page): + # + # | cache.writeback | cache.direct | cache.no-flush + # -------------------------------------------------------------- + # writeback | on | off | off + # none | on | on | off + # writethrough | off | off | off + # directsync | off | on | off + # unsafe | on | off | on + # + # Where: + # + # - 'cache.writeback=off' means: QEMU adds an automatic fsync() + # after each write request. + # + # - 'cache.direct=on' means: Use Linux's O_DIRECT, i.e. bypass + # the kernel page cache. Caches in any other layer (disk + # cache, QEMU metadata caches, etc.) can still be present. + # + # - 'cache.no-flush=on' means: Ignore flush requests, i.e. + # never call fsync(), even if the guest explicitly requested + # it. + # + # Use cache mode "none" (cache.writeback=on, cache.direct=on, + # cache.no-flush=off) for consistent performance and + # migration correctness. Some filesystems don't support + # O_DIRECT, though. For those we fallback to the next + # reasonable option that is "writeback" (cache.writeback=on, + # cache.direct=off, cache.no-flush=off). + if self._disk_cachemode is None: - # We prefer 'none' for consistent performance, host crash - # safety & migration correctness by avoiding host page cache. - # Some filesystems don't support O_DIRECT though. For those we - # fallback to 'writethrough' which gives host crash safety, and - # is safe for migration provided the filesystem is cache coherent - # (cluster filesystems typically are, but things like NFS are not). self._disk_cachemode = "none" if not nova.privsep.utils.supports_direct_io(CONF.instances_path): - self._disk_cachemode = "writethrough" + self._disk_cachemode = "writeback" return self._disk_cachemode def _set_cache_mode(self, conf): diff --git a/releasenotes/notes/writeback-cache-mode-for-guests-a7e4d2806c956164.yaml b/releasenotes/notes/writeback-cache-mode-for-guests-a7e4d2806c956164.yaml new file mode 100644 index 0000000000..e29addecb5 --- /dev/null +++ b/releasenotes/notes/writeback-cache-mode-for-guests-a7e4d2806c956164.yaml @@ -0,0 +1,19 @@ +--- +fixes: + - | + Update the way QEMU cache mode is configured for Nova guests: If the + file system hosting the directory with Nova instances is capable of + Linux's O_DIRECT, use ``none``; otherwise fallback to ``writeback`` + cache mode. This improves performance without compromising data + integrity. `Bug 1818847`_. + + Context: What makes ``writethrough`` so safe against host crashes is + that it never keeps data in a "write cache", but it calls fsync() + after *every* write. This is also what makes it horribly slow. But + cache mode ``none`` doesn't do this and therefore doesn't provide + this kind of safety. The guest OS must explicitly flush the cache + in the right places to make sure data is safe on the disk; and all + modern OSes flush data as needed. So if cache mode ``none`` is safe + enough for you, then ``writeback`` should be safe enough too. + + .. _Bug 1818847: https://bugs.launchpad.net/nova/+bug/1818847