From bf760801804e55b045aed54bf9b1d0b0131be3f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Kl=C3=B6tzke?= Date: Tue, 16 Apr 2019 16:45:20 +0200 Subject: [PATCH] core: let user define start-/stop-timeout behaviour The usual behaviour when a timeout expires is to terminate/kill the service. This is what user usually want in production systems. To debug services that fail to start/stop (especially sporadic failures) it might be necessary to trigger the watchdog machinery and write core dumps, though. Likewise, it is usually just a waste of time to gracefully stop a stuck service. Instead it might save time to go directly into kill mode. This commit adds two new options to services: TimeoutStartFailureMode= and TimeoutStopFailureMode=. Both take the same values and tweak the behavior of systemd when a start/stop timeout expires: * 'terminate': is the default behaviour as it has always been, * 'abort': triggers the watchdog machinery and will send SIGABRT (unless WatchdogSignal was changed) and * 'kill' will directly send SIGKILL. To handle the stop failure mode in stop-post state too a new final-watchdog state needs to be introduced. --- man/systemd.service.xml | 41 +++++-- src/basic/unit-def.c | 1 + src/basic/unit-def.h | 1 + src/core/dbus-service.c | 10 ++ src/core/load-fragment-gperf.gperf.m4 | 2 + src/core/load-fragment.c | 2 + src/core/load-fragment.h | 1 + src/core/service.c | 167 +++++++++++++++++++++----- src/core/service.h | 13 ++ src/shared/bus-unit-util.c | 4 +- 10 files changed, 200 insertions(+), 42 deletions(-) diff --git a/man/systemd.service.xml b/man/systemd.service.xml index e8c869244a2..a4a49f39af2 100644 --- a/man/systemd.service.xml +++ b/man/systemd.service.xml @@ -560,16 +560,12 @@ TimeoutStartSec= - Configures the time to wait for start-up. If a - daemon service does not signal start-up completion within the - configured time, the service will be considered failed and - will be shut down again. Takes a unit-less value in seconds, - or a time span value such as "5min 20s". Pass - infinity to disable the timeout logic. Defaults to - DefaultTimeoutStartSec= from the manager - configuration file, except when - Type=oneshot is used, in which case the - timeout is disabled by default (see + Configures the time to wait for start-up. If a daemon service does not signal start-up + completion within the configured time, the service will be considered failed and will be shut down again. The + precise action depends on the TimeoutStartFailureMode= option. Takes a unit-less value in + seconds, or a time span value such as "5min 20s". Pass infinity to disable the timeout logic. + Defaults to DefaultTimeoutStartSec= from the manager configuration file, except when + Type=oneshot is used, in which case the timeout is disabled by default (see systemd-system.conf5). @@ -588,7 +584,8 @@ This option serves two purposes. First, it configures the time to wait for each ExecStop= command. If any of them times out, subsequent ExecStop= commands are skipped and the service will be terminated by SIGTERM. If no ExecStop= - commands are specified, the service gets the SIGTERM immediately. Second, it configures the time + commands are specified, the service gets the SIGTERM immediately. This default behavior + can be changed by the TimeoutStopFailureMode= option. Second, it configures the time to wait for the service itself to stop. If it doesn't terminate in the specified time, it will be forcibly terminated by SIGKILL (see KillMode= in systemd.kill5). @@ -646,6 +643,28 @@ + + TimeoutStartFailureMode= + TimeoutStopFailureMode= + + These options configure the action that is taken in case a daemon service does not signal + start-up within its configured TimeoutStartSec=, respectively if it does not stop within + TimeoutStopSec=. Takes one of , and + . Both options default to . + + If is set the service will be gracefully terminated by sending the signal + specified in KillSignal= (defaults to SIGTERM, see + systemd.kill5). If the + service does not terminate the FinalKillSignal= is sent after + TimeoutStopSec=. If is set, WatchdogSignal= is sent + instead and TimeoutAbortSec= applies before sending FinalKillSignal=. + This setting may be used to analyze services that fail to start-up or shut-down intermittently. + By using the service is immediately terminated by sending + FinalKillSignal= without any further timeout. This setting can be used to expedite the + shutdown of failing services. + + + RuntimeMaxSec= diff --git a/src/basic/unit-def.c b/src/basic/unit-def.c index 64b2b2dd7e1..94ec1f3d198 100644 --- a/src/basic/unit-def.c +++ b/src/basic/unit-def.c @@ -185,6 +185,7 @@ static const char* const service_state_table[_SERVICE_STATE_MAX] = { [SERVICE_STOP_SIGTERM] = "stop-sigterm", [SERVICE_STOP_SIGKILL] = "stop-sigkill", [SERVICE_STOP_POST] = "stop-post", + [SERVICE_FINAL_WATCHDOG] = "final-watchdog", [SERVICE_FINAL_SIGTERM] = "final-sigterm", [SERVICE_FINAL_SIGKILL] = "final-sigkill", [SERVICE_FAILED] = "failed", diff --git a/src/basic/unit-def.h b/src/basic/unit-def.h index a7d67819884..53419ecd8a1 100644 --- a/src/basic/unit-def.h +++ b/src/basic/unit-def.h @@ -127,6 +127,7 @@ typedef enum ServiceState { SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL, SERVICE_STOP_POST, + SERVICE_FINAL_WATCHDOG, /* In case the STOP_POST executable needs to be aborted. */ SERVICE_FINAL_SIGTERM, /* In case the STOP_POST executable hangs, we shoot that down, too */ SERVICE_FINAL_SIGKILL, SERVICE_FAILED, diff --git a/src/core/dbus-service.c b/src/core/dbus-service.c index 11680f0d69c..5d4f4ef5068 100644 --- a/src/core/dbus-service.c +++ b/src/core/dbus-service.c @@ -29,6 +29,7 @@ static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_notify_access, notify_access, N static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_emergency_action, emergency_action, EmergencyAction); static BUS_DEFINE_PROPERTY_GET(property_get_timeout_abort_usec, "t", Service, service_timeout_abort_usec); static BUS_DEFINE_PROPERTY_GET(property_get_watchdog_usec, "t", Service, service_get_watchdog_usec); +static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_timeout_failure_mode, service_timeout_failure_mode, ServiceTimeoutFailureMode); static int property_get_exit_status_set( sd_bus *bus, @@ -101,6 +102,8 @@ const sd_bus_vtable bus_service_vtable[] = { SD_BUS_PROPERTY("TimeoutStartUSec", "t", bus_property_get_usec, offsetof(Service, timeout_start_usec), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("TimeoutStopUSec", "t", bus_property_get_usec, offsetof(Service, timeout_stop_usec), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("TimeoutAbortUSec", "t", property_get_timeout_abort_usec, 0, 0), + SD_BUS_PROPERTY("TimeoutStartFailureMode", "s", property_get_timeout_failure_mode, offsetof(Service, timeout_start_failure_mode), SD_BUS_VTABLE_PROPERTY_CONST), + SD_BUS_PROPERTY("TimeoutStopFailureMode", "s", property_get_timeout_failure_mode, offsetof(Service, timeout_stop_failure_mode), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("RuntimeMaxUSec", "t", bus_property_get_usec, offsetof(Service, runtime_max_usec), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("WatchdogUSec", "t", property_get_watchdog_usec, 0, 0), BUS_PROPERTY_DUAL_TIMESTAMP("WatchdogTimestamp", offsetof(Service, watchdog_timestamp), 0), @@ -259,6 +262,7 @@ static BUS_DEFINE_SET_TRANSIENT_PARSE(service_type, ServiceType, service_type_fr static BUS_DEFINE_SET_TRANSIENT_PARSE(service_restart, ServiceRestart, service_restart_from_string); static BUS_DEFINE_SET_TRANSIENT_PARSE(oom_policy, OOMPolicy, oom_policy_from_string); static BUS_DEFINE_SET_TRANSIENT_STRING_WITH_CHECK(bus_name, sd_bus_service_name_is_valid); +static BUS_DEFINE_SET_TRANSIENT_PARSE(timeout_failure_mode, ServiceTimeoutFailureMode, service_timeout_failure_mode_from_string); static int bus_service_set_transient_property( Service *s, @@ -316,6 +320,12 @@ static int bus_service_set_transient_property( return r; } + if (streq(name, "TimeoutStartFailureMode")) + return bus_set_transient_timeout_failure_mode(u, name, &s->timeout_start_failure_mode, message, flags, error); + + if (streq(name, "TimeoutStopFailureMode")) + return bus_set_transient_timeout_failure_mode(u, name, &s->timeout_stop_failure_mode, message, flags, error); + if (streq(name, "RuntimeMaxUSec")) return bus_set_transient_usec(u, name, &s->runtime_max_usec, message, flags, error); diff --git a/src/core/load-fragment-gperf.gperf.m4 b/src/core/load-fragment-gperf.gperf.m4 index c76d08b3a6d..69598e8430c 100644 --- a/src/core/load-fragment-gperf.gperf.m4 +++ b/src/core/load-fragment-gperf.gperf.m4 @@ -322,6 +322,8 @@ Service.TimeoutSec, config_parse_service_timeout, 0, Service.TimeoutStartSec, config_parse_service_timeout, 0, 0 Service.TimeoutStopSec, config_parse_sec_fix_0, 0, offsetof(Service, timeout_stop_usec) Service.TimeoutAbortSec, config_parse_service_timeout_abort, 0, 0 +Service.TimeoutStartFailureMode, config_parse_service_timeout_failure_mode, 0, offsetof(Service, timeout_start_failure_mode) +Service.TimeoutStopFailureMode, config_parse_service_timeout_failure_mode, 0, offsetof(Service, timeout_stop_failure_mode) Service.RuntimeMaxSec, config_parse_sec, 0, offsetof(Service, runtime_max_usec) Service.WatchdogSec, config_parse_sec, 0, offsetof(Service, watchdog_usec) m4_dnl The following five only exist for compatibility, they moved into Unit, see above diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c index 517813986b9..a2eede4ccea 100644 --- a/src/core/load-fragment.c +++ b/src/core/load-fragment.c @@ -123,6 +123,7 @@ DEFINE_CONFIG_PARSE_ENUM(config_parse_protect_system, protect_system, ProtectSys DEFINE_CONFIG_PARSE_ENUM(config_parse_runtime_preserve_mode, exec_preserve_mode, ExecPreserveMode, "Failed to parse runtime directory preserve mode"); DEFINE_CONFIG_PARSE_ENUM(config_parse_service_type, service_type, ServiceType, "Failed to parse service type"); DEFINE_CONFIG_PARSE_ENUM(config_parse_service_restart, service_restart, ServiceRestart, "Failed to parse service restart specifier"); +DEFINE_CONFIG_PARSE_ENUM(config_parse_service_timeout_failure_mode, service_timeout_failure_mode, ServiceTimeoutFailureMode, "Failed to parse timeout failure mode"); DEFINE_CONFIG_PARSE_ENUM(config_parse_socket_bind, socket_address_bind_ipv6_only_or_bool, SocketAddressBindIPv6Only, "Failed to parse bind IPv6 only value"); DEFINE_CONFIG_PARSE_ENUM(config_parse_oom_policy, oom_policy, OOMPolicy, "Failed to parse OOM policy"); DEFINE_CONFIG_PARSE_ENUM_WITH_DEFAULT(config_parse_ip_tos, ip_tos, int, -1, "Failed to parse IP TOS value"); @@ -4941,6 +4942,7 @@ void unit_dump_config_items(FILE *f) { { config_parse_exec, "PATH [ARGUMENT [...]]" }, { config_parse_service_type, "SERVICETYPE" }, { config_parse_service_restart, "SERVICERESTART" }, + { config_parse_service_timeout_failure_mode, "TIMEOUTMODE" }, { config_parse_kill_mode, "KILLMODE" }, { config_parse_signal, "SIGNAL" }, { config_parse_socket_listen, "SOCKET [...]" }, diff --git a/src/core/load-fragment.h b/src/core/load-fragment.h index bc72ef77451..9c30b6f882b 100644 --- a/src/core/load-fragment.h +++ b/src/core/load-fragment.h @@ -30,6 +30,7 @@ CONFIG_PARSER_PROTOTYPE(config_parse_exec_coredump_filter); CONFIG_PARSER_PROTOTYPE(config_parse_exec); CONFIG_PARSER_PROTOTYPE(config_parse_service_timeout); CONFIG_PARSER_PROTOTYPE(config_parse_service_timeout_abort); +CONFIG_PARSER_PROTOTYPE(config_parse_service_timeout_failure_mode); CONFIG_PARSER_PROTOTYPE(config_parse_service_type); CONFIG_PARSER_PROTOTYPE(config_parse_service_restart); CONFIG_PARSER_PROTOTYPE(config_parse_socket_bindtodevice); diff --git a/src/core/service.c b/src/core/service.c index 340b6550591..8b3fd2e5a44 100644 --- a/src/core/service.c +++ b/src/core/service.c @@ -56,6 +56,7 @@ static const UnitActiveState state_translation_table[_SERVICE_STATE_MAX] = { [SERVICE_STOP_SIGTERM] = UNIT_DEACTIVATING, [SERVICE_STOP_SIGKILL] = UNIT_DEACTIVATING, [SERVICE_STOP_POST] = UNIT_DEACTIVATING, + [SERVICE_FINAL_WATCHDOG] = UNIT_DEACTIVATING, [SERVICE_FINAL_SIGTERM] = UNIT_DEACTIVATING, [SERVICE_FINAL_SIGKILL] = UNIT_DEACTIVATING, [SERVICE_FAILED] = UNIT_FAILED, @@ -79,6 +80,7 @@ static const UnitActiveState state_translation_table_idle[_SERVICE_STATE_MAX] = [SERVICE_STOP_SIGTERM] = UNIT_DEACTIVATING, [SERVICE_STOP_SIGKILL] = UNIT_DEACTIVATING, [SERVICE_STOP_POST] = UNIT_DEACTIVATING, + [SERVICE_FINAL_WATCHDOG] = UNIT_DEACTIVATING, [SERVICE_FINAL_SIGTERM] = UNIT_DEACTIVATING, [SERVICE_FINAL_SIGKILL] = UNIT_DEACTIVATING, [SERVICE_FAILED] = UNIT_FAILED, @@ -857,10 +859,14 @@ static void service_dump(Unit *u, FILE *f, const char *prefix) { fprintf(f, "%sRestartSec: %s\n" "%sTimeoutStartSec: %s\n" - "%sTimeoutStopSec: %s\n", + "%sTimeoutStopSec: %s\n" + "%sTimeoutStartFailureMode: %s\n" + "%sTimeoutStopFailureMode: %s\n", prefix, format_timespan(buf_restart, sizeof(buf_restart), s->restart_usec, USEC_PER_SEC), prefix, format_timespan(buf_start, sizeof(buf_start), s->timeout_start_usec, USEC_PER_SEC), - prefix, format_timespan(buf_stop, sizeof(buf_stop), s->timeout_stop_usec, USEC_PER_SEC)); + prefix, format_timespan(buf_stop, sizeof(buf_stop), s->timeout_stop_usec, USEC_PER_SEC), + prefix, service_timeout_failure_mode_to_string(s->timeout_start_failure_mode), + prefix, service_timeout_failure_mode_to_string(s->timeout_stop_failure_mode)); if (s->timeout_abort_set) fprintf(f, @@ -1072,7 +1078,7 @@ static void service_set_state(Service *s, ServiceState state) { SERVICE_RUNNING, SERVICE_RELOAD, SERVICE_STOP, SERVICE_STOP_WATCHDOG, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL, SERVICE_STOP_POST, - SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL, + SERVICE_FINAL_WATCHDOG, SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL, SERVICE_AUTO_RESTART, SERVICE_CLEANING)) s->timer_event_source = sd_event_source_unref(s->timer_event_source); @@ -1081,7 +1087,7 @@ static void service_set_state(Service *s, ServiceState state) { SERVICE_START, SERVICE_START_POST, SERVICE_RUNNING, SERVICE_RELOAD, SERVICE_STOP, SERVICE_STOP_WATCHDOG, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL, SERVICE_STOP_POST, - SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL)) { + SERVICE_FINAL_WATCHDOG, SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL)) { service_unwatch_main_pid(s); s->main_command = NULL; } @@ -1090,7 +1096,7 @@ static void service_set_state(Service *s, ServiceState state) { SERVICE_CONDITION, SERVICE_START_PRE, SERVICE_START, SERVICE_START_POST, SERVICE_RELOAD, SERVICE_STOP, SERVICE_STOP_WATCHDOG, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL, SERVICE_STOP_POST, - SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL, + SERVICE_FINAL_WATCHDOG, SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL, SERVICE_CLEANING)) { service_unwatch_control_pid(s); s->control_command = NULL; @@ -1106,7 +1112,7 @@ static void service_set_state(Service *s, ServiceState state) { SERVICE_CONDITION, SERVICE_START_PRE, SERVICE_START, SERVICE_START_POST, SERVICE_RUNNING, SERVICE_RELOAD, SERVICE_STOP, SERVICE_STOP_WATCHDOG, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL, SERVICE_STOP_POST, - SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL) && + SERVICE_FINAL_WATCHDOG, SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL) && !(state == SERVICE_DEAD && UNIT(s)->job)) service_close_socket_fd(s); @@ -1154,6 +1160,7 @@ static usec_t service_coldplug_timeout(Service *s) { return usec_add(UNIT(s)->state_change_timestamp.monotonic, s->timeout_stop_usec); case SERVICE_STOP_WATCHDOG: + case SERVICE_FINAL_WATCHDOG: return usec_add(UNIT(s)->state_change_timestamp.monotonic, service_timeout_abort_usec(s)); case SERVICE_AUTO_RESTART: @@ -1187,7 +1194,7 @@ static int service_coldplug(Unit *u) { SERVICE_START, SERVICE_START_POST, SERVICE_RUNNING, SERVICE_RELOAD, SERVICE_STOP, SERVICE_STOP_WATCHDOG, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL, SERVICE_STOP_POST, - SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL))) { + SERVICE_FINAL_WATCHDOG, SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL))) { r = unit_watch_pid(UNIT(s), s->main_pid, false); if (r < 0) return r; @@ -1199,7 +1206,7 @@ static int service_coldplug(Unit *u) { SERVICE_CONDITION, SERVICE_START_PRE, SERVICE_START, SERVICE_START_POST, SERVICE_RELOAD, SERVICE_STOP, SERVICE_STOP_WATCHDOG, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL, SERVICE_STOP_POST, - SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL, + SERVICE_FINAL_WATCHDOG, SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL, SERVICE_CLEANING)) { r = unit_watch_pid(UNIT(s), s->control_pid, false); if (r < 0) @@ -1859,6 +1866,7 @@ static int state_to_kill_operation(Service *s, ServiceState state) { switch (state) { case SERVICE_STOP_WATCHDOG: + case SERVICE_FINAL_WATCHDOG: return KILL_WATCHDOG; case SERVICE_STOP_SIGTERM: @@ -1879,7 +1887,7 @@ static int state_to_kill_operation(Service *s, ServiceState state) { } static void service_enter_signal(Service *s, ServiceState state, ServiceResult f) { - int r; + int kill_operation, r; assert(s); @@ -1893,10 +1901,11 @@ static void service_enter_signal(Service *s, ServiceState state, ServiceResult f * died now */ (void) unit_enqueue_rewatch_pids(UNIT(s)); + kill_operation = state_to_kill_operation(s, state); r = unit_kill_context( UNIT(s), &s->kill_context, - state_to_kill_operation(s, state), + kill_operation, s->main_pid, s->control_pid, s->main_pid_alien); @@ -1905,7 +1914,7 @@ static void service_enter_signal(Service *s, ServiceState state, ServiceResult f if (r > 0) { r = service_arm_timer(s, usec_add(now(CLOCK_MONOTONIC), - state == SERVICE_STOP_WATCHDOG ? service_timeout_abort_usec(s) : s->timeout_stop_usec)); + kill_operation == KILL_WATCHDOG ? service_timeout_abort_usec(s) : s->timeout_stop_usec)); if (r < 0) goto fail; @@ -1914,7 +1923,7 @@ static void service_enter_signal(Service *s, ServiceState state, ServiceResult f service_enter_signal(s, SERVICE_STOP_SIGKILL, SERVICE_SUCCESS); else if (IN_SET(state, SERVICE_STOP_WATCHDOG, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL)) service_enter_stop_post(s, SERVICE_SUCCESS); - else if (state == SERVICE_FINAL_SIGTERM && s->kill_context.send_sigkill) + else if (IN_SET(state, SERVICE_FINAL_WATCHDOG, SERVICE_FINAL_SIGTERM) && s->kill_context.send_sigkill) service_enter_signal(s, SERVICE_FINAL_SIGKILL, SERVICE_SUCCESS); else service_enter_dead(s, SERVICE_SUCCESS, true); @@ -2444,7 +2453,7 @@ static int service_start(Unit *u) { * please! */ if (IN_SET(s->state, SERVICE_STOP, SERVICE_STOP_WATCHDOG, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL, SERVICE_STOP_POST, - SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL, SERVICE_CLEANING)) + SERVICE_FINAL_WATCHDOG, SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL, SERVICE_CLEANING)) return -EAGAIN; /* Already on it! */ @@ -2515,7 +2524,7 @@ static int service_stop(Unit *u) { /* Already on it */ if (IN_SET(s->state, SERVICE_STOP, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL, SERVICE_STOP_POST, - SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL)) + SERVICE_FINAL_WATCHDOG, SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL)) return 0; /* A restart will be scheduled or is in progress. */ @@ -3321,6 +3330,7 @@ static void service_notify_cgroup_empty_event(Unit *u) { break; case SERVICE_STOP_POST: + case SERVICE_FINAL_WATCHDOG: case SERVICE_FINAL_SIGTERM: case SERVICE_FINAL_SIGKILL: if (main_pid_good(s) <= 0 && control_pid_good(s) <= 0) @@ -3521,6 +3531,7 @@ static void service_sigchld_event(Unit *u, pid_t pid, int code, int status) { break; + case SERVICE_FINAL_WATCHDOG: case SERVICE_FINAL_SIGTERM: case SERVICE_FINAL_SIGKILL: @@ -3674,6 +3685,7 @@ static void service_sigchld_event(Unit *u, pid_t pid, int code, int status) { service_enter_signal(s, SERVICE_FINAL_SIGTERM, f); break; + case SERVICE_FINAL_WATCHDOG: case SERVICE_FINAL_SIGTERM: case SERVICE_FINAL_SIGKILL: if (main_pid_good(s) <= 0) @@ -3720,13 +3732,32 @@ static int service_dispatch_timer(sd_event_source *source, usec_t usec, void *us case SERVICE_CONDITION: case SERVICE_START_PRE: case SERVICE_START: - log_unit_warning(UNIT(s), "%s operation timed out. Terminating.", service_state_to_string(s->state)); - service_enter_signal(s, SERVICE_STOP_SIGTERM, SERVICE_FAILURE_TIMEOUT); - break; - case SERVICE_START_POST: - log_unit_warning(UNIT(s), "Start-post operation timed out. Stopping."); - service_enter_signal(s, SERVICE_STOP_SIGTERM, SERVICE_FAILURE_TIMEOUT); + switch (s->timeout_start_failure_mode) { + + case SERVICE_TIMEOUT_TERMINATE: + log_unit_warning(UNIT(s), "%s operation timed out. Terminating.", service_state_to_string(s->state)); + service_enter_signal(s, SERVICE_STOP_SIGTERM, SERVICE_FAILURE_TIMEOUT); + break; + + case SERVICE_TIMEOUT_ABORT: + log_unit_warning(UNIT(s), "%s operation timed out. Aborting.", service_state_to_string(s->state)); + service_enter_signal(s, SERVICE_STOP_WATCHDOG, SERVICE_FAILURE_TIMEOUT); + break; + + case SERVICE_TIMEOUT_KILL: + if (s->kill_context.send_sigkill) { + log_unit_warning(UNIT(s), "%s operation timed out. Killing.", service_state_to_string(s->state)); + service_enter_signal(s, SERVICE_STOP_SIGKILL, SERVICE_FAILURE_TIMEOUT); + } else { + log_unit_warning(UNIT(s), "%s operation timed out. Skipping SIGKILL.", service_state_to_string(s->state)); + service_enter_stop_post(s, SERVICE_FAILURE_TIMEOUT); + } + break; + + default: + assert_not_reached("unknown timeout mode"); + } break; case SERVICE_RUNNING: @@ -3742,17 +3773,48 @@ static int service_dispatch_timer(sd_event_source *source, usec_t usec, void *us break; case SERVICE_STOP: - log_unit_warning(UNIT(s), "Stopping timed out. Terminating."); - service_enter_signal(s, SERVICE_STOP_SIGTERM, SERVICE_FAILURE_TIMEOUT); + switch (s->timeout_stop_failure_mode) { + + case SERVICE_TIMEOUT_TERMINATE: + log_unit_warning(UNIT(s), "Stopping timed out. Terminating."); + service_enter_signal(s, SERVICE_STOP_SIGTERM, SERVICE_FAILURE_TIMEOUT); + break; + + case SERVICE_TIMEOUT_ABORT: + log_unit_warning(UNIT(s), "Stopping timed out. Aborting."); + service_enter_signal(s, SERVICE_STOP_WATCHDOG, SERVICE_FAILURE_TIMEOUT); + break; + + case SERVICE_TIMEOUT_KILL: + if (s->kill_context.send_sigkill) { + log_unit_warning(UNIT(s), "Stopping timed out. Killing."); + service_enter_signal(s, SERVICE_STOP_SIGKILL, SERVICE_FAILURE_TIMEOUT); + } else { + log_unit_warning(UNIT(s), "Stopping timed out. Skipping SIGKILL."); + service_enter_stop_post(s, SERVICE_FAILURE_TIMEOUT); + } + break; + + default: + assert_not_reached("unknown timeout mode"); + } break; case SERVICE_STOP_WATCHDOG: - log_unit_warning(UNIT(s), "State 'stop-watchdog' timed out. Terminating."); - service_enter_signal(s, SERVICE_STOP_SIGTERM, SERVICE_FAILURE_TIMEOUT); + if (s->kill_context.send_sigkill) { + log_unit_warning(UNIT(s), "State 'stop-watchdog' timed out. Killing."); + service_enter_signal(s, SERVICE_STOP_SIGKILL, SERVICE_FAILURE_TIMEOUT); + } else { + log_unit_warning(UNIT(s), "State 'stop-watchdog' timed out. Skipping SIGKILL."); + service_enter_stop_post(s, SERVICE_FAILURE_TIMEOUT); + } break; case SERVICE_STOP_SIGTERM: - if (s->kill_context.send_sigkill) { + if (s->timeout_stop_failure_mode == SERVICE_TIMEOUT_ABORT) { + log_unit_warning(UNIT(s), "State 'stop-sigterm' timed out. Aborting."); + service_enter_signal(s, SERVICE_STOP_WATCHDOG, SERVICE_FAILURE_TIMEOUT); + } else if (s->kill_context.send_sigkill) { log_unit_warning(UNIT(s), "State 'stop-sigterm' timed out. Killing."); service_enter_signal(s, SERVICE_STOP_SIGKILL, SERVICE_FAILURE_TIMEOUT); } else { @@ -3772,16 +3834,52 @@ static int service_dispatch_timer(sd_event_source *source, usec_t usec, void *us break; case SERVICE_STOP_POST: - log_unit_warning(UNIT(s), "State 'stop-post' timed out. Terminating."); - service_enter_signal(s, SERVICE_FINAL_SIGTERM, SERVICE_FAILURE_TIMEOUT); + switch (s->timeout_stop_failure_mode) { + + case SERVICE_TIMEOUT_TERMINATE: + log_unit_warning(UNIT(s), "State 'stop-post' timed out. Terminating."); + service_enter_signal(s, SERVICE_FINAL_SIGTERM, SERVICE_FAILURE_TIMEOUT); + break; + + case SERVICE_TIMEOUT_ABORT: + log_unit_warning(UNIT(s), "State 'stop-post' timed out. Aborting."); + service_enter_signal(s, SERVICE_FINAL_WATCHDOG, SERVICE_FAILURE_TIMEOUT); + break; + + case SERVICE_TIMEOUT_KILL: + if (s->kill_context.send_sigkill) { + log_unit_warning(UNIT(s), "State 'stop-post' timed out. Killing."); + service_enter_signal(s, SERVICE_FINAL_SIGKILL, SERVICE_FAILURE_TIMEOUT); + } else { + log_unit_warning(UNIT(s), "State 'stop-post' timed out. Skipping SIGKILL. Entering failed mode."); + service_enter_dead(s, SERVICE_FAILURE_TIMEOUT, false); + } + break; + + default: + assert_not_reached("unknown timeout mode"); + } + break; + + case SERVICE_FINAL_WATCHDOG: + if (s->kill_context.send_sigkill) { + log_unit_warning(UNIT(s), "State 'final-watchdog' timed out. Killing."); + service_enter_signal(s, SERVICE_FINAL_SIGKILL, SERVICE_FAILURE_TIMEOUT); + } else { + log_unit_warning(UNIT(s), "State 'final-watchdog' timed out. Skipping SIGKILL. Entering failed mode."); + service_enter_dead(s, SERVICE_FAILURE_TIMEOUT, false); + } break; case SERVICE_FINAL_SIGTERM: - if (s->kill_context.send_sigkill) { - log_unit_warning(UNIT(s), "State 'stop-final-sigterm' timed out. Killing."); + if (s->timeout_stop_failure_mode == SERVICE_TIMEOUT_ABORT) { + log_unit_warning(UNIT(s), "State 'final-sigterm' timed out. Aborting."); + service_enter_signal(s, SERVICE_FINAL_WATCHDOG, SERVICE_FAILURE_TIMEOUT); + } else if (s->kill_context.send_sigkill) { + log_unit_warning(UNIT(s), "State 'final-sigterm' timed out. Killing."); service_enter_signal(s, SERVICE_FINAL_SIGKILL, SERVICE_FAILURE_TIMEOUT); } else { - log_unit_warning(UNIT(s), "State 'stop-final-sigterm' timed out. Skipping SIGKILL. Entering failed mode."); + log_unit_warning(UNIT(s), "State 'final-sigterm' timed out. Skipping SIGKILL. Entering failed mode."); service_enter_dead(s, SERVICE_FAILURE_TIMEOUT, false); } @@ -4263,6 +4361,7 @@ static bool service_needs_console(Unit *u) { SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL, SERVICE_STOP_POST, + SERVICE_FINAL_WATCHDOG, SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL); } @@ -4417,6 +4516,14 @@ static const char* const service_result_table[_SERVICE_RESULT_MAX] = { DEFINE_STRING_TABLE_LOOKUP(service_result, ServiceResult); +static const char* const service_timeout_failure_mode_table[_SERVICE_TIMEOUT_FAILURE_MODE_MAX] = { + [SERVICE_TIMEOUT_TERMINATE] = "terminate", + [SERVICE_TIMEOUT_ABORT] = "abort", + [SERVICE_TIMEOUT_KILL] = "kill", +}; + +DEFINE_STRING_TABLE_LOOKUP(service_timeout_failure_mode, ServiceTimeoutFailureMode); + const UnitVTable service_vtable = { .object_size = sizeof(Service), .exec_context_offset = offsetof(Service, exec_context), diff --git a/src/core/service.h b/src/core/service.h index 3a84db14c47..4423f893bb7 100644 --- a/src/core/service.h +++ b/src/core/service.h @@ -74,6 +74,14 @@ typedef enum ServiceResult { _SERVICE_RESULT_INVALID = -1 } ServiceResult; +typedef enum ServiceTimeoutFailureMode { + SERVICE_TIMEOUT_TERMINATE, + SERVICE_TIMEOUT_ABORT, + SERVICE_TIMEOUT_KILL, + _SERVICE_TIMEOUT_FAILURE_MODE_MAX, + _SERVICE_TIMEOUT_FAILURE_MODE_INVALID = -1 +} ServiceTimeoutFailureMode; + struct ServiceFDStore { Service *service; @@ -103,6 +111,8 @@ struct Service { usec_t timeout_abort_usec; bool timeout_abort_set; usec_t runtime_max_usec; + ServiceTimeoutFailureMode timeout_start_failure_mode; + ServiceTimeoutFailureMode timeout_stop_failure_mode; dual_timestamp watchdog_timestamp; usec_t watchdog_usec; /* the requested watchdog timeout in the unit file */ @@ -228,6 +238,9 @@ NotifyState notify_state_from_string(const char *s) _pure_; const char* service_result_to_string(ServiceResult i) _const_; ServiceResult service_result_from_string(const char *s) _pure_; +const char* service_timeout_failure_mode_to_string(ServiceTimeoutFailureMode i) _const_; +ServiceTimeoutFailureMode service_timeout_failure_mode_from_string(const char *s) _pure_; + DEFINE_CAST(SERVICE, Service); #define STATUS_TEXT_MAX (16U*1024U) diff --git a/src/shared/bus-unit-util.c b/src/shared/bus-unit-util.c index 9a5730f3eae..69f79ea1761 100644 --- a/src/shared/bus-unit-util.c +++ b/src/shared/bus-unit-util.c @@ -1490,7 +1490,9 @@ static int bus_append_service_property(sd_bus_message *m, const char *field, con "NotifyAccess", "USBFunctionDescriptors", "USBFunctionStrings", - "OOMPolicy")) + "OOMPolicy", + "TimeoutStartFailureMode", + "TimeoutStopFailureMode")) return bus_append_string(m, field, eq); if (STR_IN_SET(field, "PermissionsStartOnly",