From 3673040722b75c0a4d06fbeb272f917c7d1ea7c4 Mon Sep 17 00:00:00 2001 From: William Hubbs Date: Tue, 9 May 2017 18:20:52 -0500 Subject: [PATCH] supervise-daemon: add a --respawn-limit option Allow limiting the number of times supervise-daemon will attempt to respawn a daemon once it has died to prevent infinite respawning. Also, set a reasonable default limit (10 times in a 5 second period). This is for issue #126. --- man/openrc-run.8 | 6 ++++++ man/supervise-daemon.8 | 20 ++++++++++++++++++++ sh/supervise-daemon.sh | 1 + src/rc/supervise-daemon.c | 37 ++++++++++++++++++++++++++++++++++++- 4 files changed, 63 insertions(+), 1 deletion(-) diff --git a/man/openrc-run.8 b/man/openrc-run.8 index 25ec5b91..c7ac2ac1 100644 --- a/man/openrc-run.8 +++ b/man/openrc-run.8 @@ -167,6 +167,12 @@ Display name used for the above defined command. Process name to match when signaling the daemon. .It Ar stopsig Signal to send when stopping the daemon. +.It Ar respawn_limit +Respawn limit +.Xr supervise-daemon 8 +will use for this daemon. See +.Xr supervise-daemon 8 +for more information about this setting. .It Ar retry Retry schedule to use when stopping the daemon. It can either be a timeout in seconds or multiple signal/timeout pairs (like SIGTERM/5). diff --git a/man/supervise-daemon.8 b/man/supervise-daemon.8 index 06087675..43e74ef7 100644 --- a/man/supervise-daemon.8 +++ b/man/supervise-daemon.8 @@ -34,6 +34,8 @@ .Ar user .Fl r , -chroot .Ar chrootpath +.Fl R , -respawn-limit +.Ar limit .Fl 1 , -stdout .Ar logfile .Fl 2 , -stderr @@ -99,6 +101,24 @@ Modifies the scheduling priority of the daemon. .It Fl r , -chroot Ar path chroot to this directory before starting the daemon. All other paths, such as the path to the daemon, chdir and pidfile, should be relative to the chroot. +.It Fl R , -respawn-limit Ar limit +Control how agressively +.Nm +will try to respawn a daemon when it fails to start. The limit argument +can be a pair of integers separated bya colon or the string unlimited. +.Pp +If a pair of integers is given, the first is a maximum number of respawn +attempts and the second is a time period. It should be interpreted as: +If the daemon dies and has to be respawned more than +times in any time period of seconds, exit and give up. +.Pp +For example, the default is 10:5. +This means if the supervisor respawns a daemon more than ten times +in any 5 second period, it gives up and exits. +.Pp +if unlimited is given as the limit, it means that the supervisor will +not exit or give up, no matter how many times the daemon it is +supervising needs to be respawned. .It Fl u , -user Ar user Start the daemon as the specified user. .It Fl 1 , -stdout Ar logfile diff --git a/sh/supervise-daemon.sh b/sh/supervise-daemon.sh index bff68a4c..c6130edb 100644 --- a/sh/supervise-daemon.sh +++ b/sh/supervise-daemon.sh @@ -25,6 +25,7 @@ supervise_start() eval supervise-daemon --start \ ${chroot:+--chroot} $chroot \ ${pidfile:+--pidfile} $pidfile \ + ${respawn_limit:+--respawn-limit} $respawn_limit \ ${command_user+--user} $command_user \ $supervise_daemon_args \ $command \ diff --git a/src/rc/supervise-daemon.c b/src/rc/supervise-daemon.c index 2080e4a6..bd24d782 100644 --- a/src/rc/supervise-daemon.c +++ b/src/rc/supervise-daemon.c @@ -66,7 +66,7 @@ static struct pam_conv conv = { NULL, NULL}; const char *applet = NULL; const char *extraopts = NULL; -const char *getoptstring = "d:e:g:I:Kk:N:p:r:Su:1:2:" \ +const char *getoptstring = "d:e:g:I:Kk:N:p:r:R:Su:1:2:" \ getoptstring_COMMON; const struct option longopts[] = { { "chdir", 1, NULL, 'd'}, @@ -79,6 +79,7 @@ const struct option longopts[] = { { "pidfile", 1, NULL, 'p'}, { "user", 1, NULL, 'u'}, { "chroot", 1, NULL, 'r'}, + { "respawn-limit", 1, NULL, 'R'}, { "start", 0, NULL, 'S'}, { "stdout", 1, NULL, '1'}, { "stderr", 1, NULL, '2'}, @@ -95,6 +96,7 @@ const char * const longopts_help[] = { "Match pid found in this file", "Change the process user", "Chroot to this directory", + "set a respawn limit", "Start daemon", "Redirect stdout to file", "Redirect stderr to file", @@ -424,7 +426,13 @@ int main(int argc, char **argv) char *p; char *token; int i; + int n; char exec_file[PATH_MAX]; + int respawn_count = 0; + int respawn_max = 10; + int respawn_period = 5; + time_t respawn_now= 0; + time_t first_spawn= 0; struct passwd *pw; struct group *gr; FILE *fp; @@ -519,6 +527,17 @@ int main(int argc, char **argv) ch_root = optarg; break; + case 'R': /* --respawn-limit unlimited|count:period */ + if (strcasecmp(optarg, "unlimited") == 0) { + respawn_max = 0; + respawn_period = 0; + } else { + n = sscanf(optarg, "%d:%d", &respawn_max, &respawn_period); + if (n != 2 || respawn_max < 1 || respawn_period < 1) + eerrorx("Invalid respawn-limit setting '%s'", optarg); + } + break; + case 'u': /* --user | */ { p = optarg; @@ -713,6 +732,22 @@ int main(int argc, char **argv) syslog(LOG_INFO, "stopping %s, pid %d", exec, child_pid); kill(child_pid, SIGTERM); } else { + if (respawn_max > 0 && respawn_period > 0) { + respawn_now = time(NULL); + if (first_spawn == 0) + first_spawn = respawn_now; + if (respawn_now - first_spawn > respawn_period) { + respawn_count = 0; + first_spawn = 0; + } else + respawn_count++; + if (respawn_count >= respawn_max) { + syslog(LOG_INFO, "respawned \"%s\" too many times, " + "exiting", exec); + exiting = true; + continue; + } + } if (WIFEXITED(i)) syslog(LOG_INFO, "%s, pid %d, exited with return code %d", exec, child_pid, WEXITSTATUS(i));