996c4ff9e8
During graceful shutdown, compute service keep a 2nd RPC
server active which can be used to finish the in-progress
operations. Like live migration, resize and cold migrations
also perform RPC call among source and destination compute.
For those operation also, we can use 2nd RPC server and make
sure they will be completed during graceful shutdown.
A quick overview of what all RPC methods are involved in the
resize/cold migration and what all will be using 2nd RPC server:
Resize/cold migration
- prep_resize: No, resize/migration is not started yet.
- resize_instance: Yes, here the resize/migration starts.
- finish_resize: Yes
- cross cell resize case:
- prep_snapshot_based_resize_at_dest: NO, this is initial check and
migration is not started
- prep_snapshot_based_resize_at_source: Yes, this start the migration
Confirm resize: NO
- confirm_resize: NO
- cross cell confirm resize case:
- confirm_snapshot_based_resize - NO
Revert resize:
- revert_resize - NO
- check_instance_shared_storage: YES. This is called from dest to source
so we need source to respond to it so that revert can continue.
- finish_revert_resize on source- YES, at this stage, revert resize is
in progress and abandoning it here can lead migration to unreocverable
state.
- cross cell revert case:
- revert_snapshot_based_resize_at_dest: NO
- finish_revert_snapshot_based_resize_at_source: YES
Partial implement blueprint nova-services-graceful-shutdown-part1
Change-Id: If08b698d012a75b587144501d829403ec616f685
Signed-off-by: Ghanshyam Maan <gmaan.os14@gmail.com>
107 lines
4.3 KiB
YAML
107 lines
4.3 KiB
YAML
- name: Graceful shutdown source compute live migration
|
|
block:
|
|
- name: Start live migrations of test servers
|
|
become: true
|
|
become_user: stack
|
|
script: "start_live_migration.sh server-lm1"
|
|
environment:
|
|
SUBNODE_HOSTNAME: "{{ hostvars['compute1']['ansible_hostname'] }}"
|
|
CONTROLLER_HOSTNAME: "{{ hostvars['controller']['ansible_hostname'] }}"
|
|
register: start_live_migrations_result
|
|
failed_when: start_live_migrations_result.rc not in [0, 2]
|
|
|
|
- name: Set fact if migrations completed or timed out before SIGTERM to source compute
|
|
set_fact:
|
|
live_migrations_completed_or_timeout: "{{ start_live_migrations_result.rc == 2 }}"
|
|
|
|
- name: Run graceful shutdown tests
|
|
when: not live_migrations_completed_or_timeout
|
|
block:
|
|
- name: Send SIGTERM to source compute to start the source compute graceful shutdown
|
|
delegate_to: compute1
|
|
become: true
|
|
shell: "kill -15 $(systemctl show devstack@n-cpu -p MainPID --value)"
|
|
|
|
- name: Verify live migration is completed during graceful shutdown
|
|
become: true
|
|
become_user: stack
|
|
script: "verify_live_migration.sh server-lm1"
|
|
environment:
|
|
CONTROLLER_HOSTNAME: "{{ hostvars['controller']['ansible_hostname'] }}"
|
|
|
|
# Sleep for 180 sec: default graceful_shutdown_timeout
|
|
- name: Sleep for 180 seconds to allow source compute graceful shutdown to complete
|
|
pause:
|
|
seconds: 180
|
|
|
|
- name: Verify compute service is stopped after graceful shutdown
|
|
become: true
|
|
become_user: stack
|
|
script: "start_and_verify_compute_service.sh {{ hostvars['compute1']['ansible_hostname'] }} inactive"
|
|
|
|
- name: Start and verify subnode compute service is running
|
|
become: true
|
|
become_user: stack
|
|
script: "start_and_verify_compute_service.sh {{ hostvars['compute1']['ansible_hostname'] }}"
|
|
|
|
- name: Cleanup test servers
|
|
become: true
|
|
become_user: stack
|
|
script: "cleanup_test_servers.sh server-lm1"
|
|
ignore_errors: true
|
|
|
|
- name: Graceful shutdown source compute cold migration
|
|
block:
|
|
- name: Start cold migrations of test servers
|
|
become: true
|
|
become_user: stack
|
|
script: "start_cold_migration.sh server-cm1"
|
|
environment:
|
|
SUBNODE_HOSTNAME: "{{ hostvars['compute1']['ansible_hostname'] }}"
|
|
CONTROLLER_HOSTNAME: "{{ hostvars['controller']['ansible_hostname'] }}"
|
|
register: start_cold_migrations_result
|
|
failed_when: start_cold_migrations_result.rc not in [0, 2]
|
|
|
|
- name: Set fact if migration is completed or timed out before SIGTERM to source compute
|
|
set_fact:
|
|
cold_migrations_completed_or_timeout: "{{ start_cold_migrations_result.rc == 2 }}"
|
|
|
|
- name: Run graceful shutdown tests
|
|
when: not cold_migrations_completed_or_timeout
|
|
block:
|
|
- name: Send SIGTERM to source compute to start the source compute graceful shutdown
|
|
delegate_to: compute1
|
|
become: true
|
|
shell: "kill -15 $(systemctl show devstack@n-cpu -p MainPID --value)"
|
|
|
|
- name: Verify cold migration is completed during graceful shutdown
|
|
become: true
|
|
become_user: stack
|
|
script: "verify_cold_migration.sh server-cm1"
|
|
|
|
# Sleep for 180 sec: default graceful_shutdown_timeout
|
|
- name: Sleep for 180 seconds to allow source compute graceful shutdown to complete
|
|
pause:
|
|
seconds: 180
|
|
|
|
- name: Verify compute service is stopped after graceful shutdown
|
|
become: true
|
|
become_user: stack
|
|
script: "start_and_verify_compute_service.sh {{ hostvars['compute1']['ansible_hostname'] }} inactive"
|
|
|
|
- name: Start and verify subnode compute service is running
|
|
become: true
|
|
become_user: stack
|
|
script: "start_and_verify_compute_service.sh {{ hostvars['compute1']['ansible_hostname'] }}"
|
|
|
|
- name: Cleanup test servers
|
|
become: true
|
|
become_user: stack
|
|
script: "cleanup_test_servers.sh server-cm1"
|
|
ignore_errors: true
|
|
|
|
- name: Fail if any test is skipped
|
|
fail:
|
|
msg: "One or more test is skipped due to operation is either completed or timed out before SIGTERM signal."
|
|
when: live_migrations_completed_or_timeout or cold_migrations_completed_or_timeout
|