From f985a11bc6b8703fc7d1bcf1d520f4450322a3af Mon Sep 17 00:00:00 2001 From: Bob Ball Date: Fri, 19 Jun 2015 17:53:11 +0100 Subject: [PATCH] XenAPI: Refactor rotate_xen_guest_logs to avoid races Moving console logs to a temporary file is a clear race when Nova tries to access a file it might not be there. We need to keep the readable file in place at all times. Atomic operations like mv are fine as if the file was opened before the mv operation Linux will keep the contents available until the file is closed by Nova's plugin. Futher, we need to be able to report the console logs from domains that have been shutdown but still exist on the host. Ensure we keep track of the domain that we last booted with Change-Id: Icfff9886d79f788be871bf28c8b729a57dcd94d2 Fixes-bug: 1466914 --- nova/tests/unit/virt/xenapi/test_vmops.py | 11 +++-- nova/tests/unit/virt/xenapi/test_xenapi.py | 6 +++ nova/virt/xenapi/vmops.py | 15 ++++++- tools/xenserver/rotate_xen_guest_logs.sh | 48 ++++++++-------------- 4 files changed, 43 insertions(+), 37 deletions(-) diff --git a/nova/tests/unit/virt/xenapi/test_vmops.py b/nova/tests/unit/virt/xenapi/test_vmops.py index 64578399a8..65f905f6b6 100644 --- a/nova/tests/unit/virt/xenapi/test_vmops.py +++ b/nova/tests/unit/virt/xenapi/test_vmops.py @@ -200,20 +200,20 @@ class InjectAutoDiskConfigTestCase(VMOpsTestBase): class GetConsoleOutputTestCase(VMOpsTestBase): def test_get_console_output_works(self): - self.mox.StubOutWithMock(self.vmops, '_get_dom_id') + self.mox.StubOutWithMock(self.vmops, '_get_last_dom_id') instance = {"name": "dummy"} - self.vmops._get_dom_id(instance, check_rescue=True).AndReturn(42) + self.vmops._get_last_dom_id(instance, check_rescue=True).AndReturn(42) self.mox.ReplayAll() self.assertEqual("dom_id: 42", self.vmops.get_console_output(instance)) def test_get_console_output_throws_nova_exception(self): - self.mox.StubOutWithMock(self.vmops, '_get_dom_id') + self.mox.StubOutWithMock(self.vmops, '_get_last_dom_id') instance = {"name": "dummy"} # dom_id=0 used to trigger exception in fake XenAPI - self.vmops._get_dom_id(instance, check_rescue=True).AndReturn(0) + self.vmops._get_last_dom_id(instance, check_rescue=True).AndReturn(0) self.mox.ReplayAll() self.assertRaises(exception.NovaException, @@ -278,6 +278,7 @@ class SpawnTestCase(VMOpsTestBase): self.mox.StubOutWithMock(self.vmops, '_remove_hostname') self.mox.StubOutWithMock(self.vmops.firewall_driver, 'apply_instance_filter') + self.mox.StubOutWithMock(self.vmops, '_update_last_dom_id') def _test_spawn(self, name_label_param=None, block_device_info_param=None, rescue=False, include_root_vdi=True, throw_exception=None, @@ -389,6 +390,7 @@ class SpawnTestCase(VMOpsTestBase): steps) self.vmops._start(instance, vm_ref) self.vmops._wait_for_instance_to_start(instance, vm_ref) + self.vmops._update_last_dom_id(vm_ref) step += 1 self.vmops._update_instance_progress(context, instance, step, steps) @@ -505,6 +507,7 @@ class SpawnTestCase(VMOpsTestBase): if power_on: self.vmops._start(instance, vm_ref) self.vmops._wait_for_instance_to_start(instance, vm_ref) + self.vmops._update_last_dom_id(vm_ref) self.vmops.firewall_driver.apply_instance_filter(instance, network_info) diff --git a/nova/tests/unit/virt/xenapi/test_xenapi.py b/nova/tests/unit/virt/xenapi/test_xenapi.py index 3817b77785..02f8b58d20 100644 --- a/nova/tests/unit/virt/xenapi/test_xenapi.py +++ b/nova/tests/unit/virt/xenapi/test_xenapi.py @@ -1258,6 +1258,12 @@ iface eth0 inet6 static self._test_spawn_fails_silently_with(exception.AgentError, value=error) + def test_spawn_sets_last_dom_id(self): + self._test_spawn(IMAGE_VHD, None, None, + os_type="linux", architecture="x86-64") + self.assertEqual(self.vm['domid'], + self.vm['other_config']['last_dom_id']) + def test_rescue(self): instance = self._create_instance(spawn=False, obj=True) xenapi_fake.create_vm(instance['name'], 'Running') diff --git a/nova/virt/xenapi/vmops.py b/nova/virt/xenapi/vmops.py index 6e3b11d693..37e852060d 100644 --- a/nova/virt/xenapi/vmops.py +++ b/nova/virt/xenapi/vmops.py @@ -394,6 +394,11 @@ class VMOps(object): return vdis + def _update_last_dom_id(self, vm_ref): + other_config = self._session.VM.get_other_config(vm_ref) + other_config['last_dom_id'] = self._session.VM.get_domid(vm_ref) + self._session.VM.set_other_config(vm_ref, other_config) + def spawn(self, context, instance, image_meta, injected_files, admin_password, network_info=None, block_device_info=None, name_label=None, rescue=False): @@ -553,6 +558,7 @@ class VMOps(object): if power_on: self._start(instance, vm_ref) self._wait_for_instance_to_start(instance, vm_ref) + self._update_last_dom_id(vm_ref) @step def configure_booted_instance_step(undo_mgr, vm_ref): @@ -1753,7 +1759,7 @@ class VMOps(object): def get_console_output(self, instance): """Return last few lines of instance console.""" - dom_id = self._get_dom_id(instance, check_rescue=True) + dom_id = self._get_last_dom_id(instance, check_rescue=True) try: raw_console_data = self._session.call_plugin('console', @@ -2001,6 +2007,13 @@ class VMOps(object): raise exception.InstanceNotFound(instance_id=instance['name']) return domid + def _get_last_dom_id(self, instance=None, vm_ref=None, check_rescue=False): + vm_ref = vm_ref or self._get_vm_opaque_ref(instance, check_rescue) + other_config = self._session.call_xenapi("VM.get_other_config", vm_ref) + if 'last_dom_id' not in other_config: + raise exception.InstanceNotFound(instance_id=instance['name']) + return other_config['last_dom_id'] + def _add_to_param_xenstore(self, vm_ref, key, val): """Takes a key/value pair and adds it to the xenstore parameter record for the given vm instance. If the key exists in xenstore, diff --git a/tools/xenserver/rotate_xen_guest_logs.sh b/tools/xenserver/rotate_xen_guest_logs.sh index e0b48730fe..08bdcbf246 100755 --- a/tools/xenserver/rotate_xen_guest_logs.sh +++ b/tools/xenserver/rotate_xen_guest_logs.sh @@ -18,48 +18,32 @@ log_dir="/var/log/xen/guest" kb=1024 max_size_bytes=$(($kb*$kb)) truncated_size_bytes=$((5*$kb)) -list_domains=/opt/xensource/bin/list_domains log_file_base="${log_dir}/console." -tmp_file_base="${log_dir}/tmp.console." # Ensure logging is setup correctly for all domains xenstore-write /local/logconsole/@ "${log_file_base}%d" -# Move logs we want to keep -domains=$($list_domains | sed '/^id*/d' | sed 's/|.*|.*$//g' | xargs) -for i in $domains; do - log="${log_file_base}$i" - tmp="${tmp_file_base}$i" - mv $log $tmp || true +# Ensure the last_dom_id is set + updated for all running VMs +for vm in $(xe vm-list power-state=running --minimal | tr ',' ' '); do + xe vm-param-set uuid=$vm other-config:last_dom_id=$(xe vm-param-get uuid=$vm param-name=dom-id) done -# Delete all console logs, -# mostly to remove logs from recently killed domains -rm -f ${log_dir}/console.* +# Get the last_dom_id for all VMs +valid_last_dom_ids=$(xe vm-list params=other-config --minimal | tr ';,' '\n\n' | grep last_dom_id | sed -e 's/last_dom_id: //g' | xargs) -# Reload domain list, in case it changed -# (note we may have just deleted a new console log) -domains=$($list_domains | sed '/^id*/d' | sed 's/|.*|.*$//g' | xargs) -for i in $domains; do - log="${log_file_base}$i" - tmp="${tmp_file_base}$i" +# Remove console files that do not correspond to valid last_dom_id's +allowed_consoles=".*console.\(${valid_last_dom_ids// /\\|}\)$" +find $log_dir -type f -not -regex $allowed_consoles -delete - if [ -e "$tmp" ]; then - size=$(stat -c%s "$tmp") - - # Trim the log if required - if [ "$size" -gt "$max_size_bytes" ]; then - tail -c $truncated_size_bytes $tmp > $log || true - else - mv $tmp $log || true - fi - fi +# Truncate all remaining logs +for log in `find $log_dir -type f -regex '.*console.*' -size +${max_size_bytes}c`; do + tmp="$log.tmp" + tail -c $truncated_size_bytes $log > $tmp + mv -f $tmp $log # Notify xen that it needs to reload the file - xenstore-write /local/logconsole/$i $log - xenstore-rm /local/logconsole/$i + domid=${log##*.} + xenstore-write /local/logconsole/$domid $log + xenstore-rm /local/logconsole/$domid done - -# Delete all the tmp files -rm -f ${tmp_file_base}* || true