diff options
Diffstat (limited to 'src/libnm-systemd-shared/src/basic/process-util.c')
-rw-r--r-- | src/libnm-systemd-shared/src/basic/process-util.c | 284 |
1 files changed, 88 insertions, 196 deletions
diff --git a/src/libnm-systemd-shared/src/basic/process-util.c b/src/libnm-systemd-shared/src/basic/process-util.c index fb0b38fa49..70aa15f060 100644 --- a/src/libnm-systemd-shared/src/basic/process-util.c +++ b/src/libnm-systemd-shared/src/basic/process-util.c @@ -8,7 +8,6 @@ #include <stdbool.h> #include <stdio.h> #include <stdlib.h> -#include <sys/mman.h> #include <sys/mount.h> #include <sys/personality.h> #include <sys/prctl.h> @@ -22,19 +21,25 @@ #include "alloc-util.h" #include "architecture.h" +#include "argv-util.h" +#include "env-file.h" #include "env-util.h" #include "errno-util.h" #include "escape.h" #include "fd-util.h" #include "fileio.h" #include "fs-util.h" +#include "hostname-util.h" #include "locale-util.h" #include "log.h" #include "macro.h" #include "memory-util.h" #include "missing_sched.h" #include "missing_syscall.h" +#include "mountpoint-util.h" #include "namespace-util.h" +#include "nulstr-util.h" +#include "parse-util.h" #include "path-util.h" #include "process-util.h" #include "raw-clone.h" @@ -253,149 +258,45 @@ int get_process_cmdline(pid_t pid, size_t max_columns, ProcessCmdlineFlags flags return 0; } -static int update_argv(const char name[], size_t l) { - static int can_do = -1; - - if (can_do == 0) - return 0; - can_do = false; /* We'll set it to true only if the whole process works */ - - /* Let's not bother with this if we don't have euid == 0. Strictly speaking we should check for the - * CAP_SYS_RESOURCE capability which is independent of the euid. In our own code the capability generally is - * present only for euid == 0, hence let's use this as quick bypass check, to avoid calling mmap() if - * PR_SET_MM_ARG_{START,END} fails with EPERM later on anyway. After all geteuid() is dead cheap to call, but - * mmap() is not. */ - if (geteuid() != 0) - return log_debug_errno(SYNTHETIC_ERRNO(EPERM), - "Skipping PR_SET_MM, as we don't have privileges."); - - static size_t mm_size = 0; - static char *mm = NULL; +int container_get_leader(const char *machine, pid_t *pid) { + _cleanup_free_ char *s = NULL, *class = NULL; + const char *p; + pid_t leader; int r; - if (mm_size < l+1) { - size_t nn_size; - char *nn; - - nn_size = PAGE_ALIGN(l+1); - nn = mmap(NULL, nn_size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); - if (nn == MAP_FAILED) - return log_debug_errno(errno, "mmap() failed: %m"); - - strncpy(nn, name, nn_size); - - /* Now, let's tell the kernel about this new memory */ - if (prctl(PR_SET_MM, PR_SET_MM_ARG_START, (unsigned long) nn, 0, 0) < 0) { - if (ERRNO_IS_PRIVILEGE(errno)) - return log_debug_errno(errno, "PR_SET_MM_ARG_START failed: %m"); - - /* HACK: prctl() API is kind of dumb on this point. The existing end address may already be - * below the desired start address, in which case the kernel may have kicked this back due - * to a range-check failure (see linux/kernel/sys.c:validate_prctl_map() to see this in - * action). The proper solution would be to have a prctl() API that could set both start+end - * simultaneously, or at least let us query the existing address to anticipate this condition - * and respond accordingly. For now, we can only guess at the cause of this failure and try - * a workaround--which will briefly expand the arg space to something potentially huge before - * resizing it to what we want. */ - log_debug_errno(errno, "PR_SET_MM_ARG_START failed, attempting PR_SET_MM_ARG_END hack: %m"); - - if (prctl(PR_SET_MM, PR_SET_MM_ARG_END, (unsigned long) nn + l + 1, 0, 0) < 0) { - r = log_debug_errno(errno, "PR_SET_MM_ARG_END hack failed, proceeding without: %m"); - (void) munmap(nn, nn_size); - return r; - } - - if (prctl(PR_SET_MM, PR_SET_MM_ARG_START, (unsigned long) nn, 0, 0) < 0) - return log_debug_errno(errno, "PR_SET_MM_ARG_START still failed, proceeding without: %m"); - } else { - /* And update the end pointer to the new end, too. If this fails, we don't really know what - * to do, it's pretty unlikely that we can rollback, hence we'll just accept the failure, - * and continue. */ - if (prctl(PR_SET_MM, PR_SET_MM_ARG_END, (unsigned long) nn + l + 1, 0, 0) < 0) - log_debug_errno(errno, "PR_SET_MM_ARG_END failed, proceeding without: %m"); - } + assert(machine); + assert(pid); - if (mm) - (void) munmap(mm, mm_size); - - mm = nn; - mm_size = nn_size; - } else { - strncpy(mm, name, mm_size); - - /* Update the end pointer, continuing regardless of any failure. */ - if (prctl(PR_SET_MM, PR_SET_MM_ARG_END, (unsigned long) mm + l + 1, 0, 0) < 0) - log_debug_errno(errno, "PR_SET_MM_ARG_END failed, proceeding without: %m"); + if (streq(machine, ".host")) { + *pid = 1; + return 0; } - can_do = true; - return 0; -} - -int rename_process(const char name[]) { - bool truncated = false; - - /* This is a like a poor man's setproctitle(). It changes the comm field, argv[0], and also the glibc's - * internally used name of the process. For the first one a limit of 16 chars applies; to the second one in - * many cases one of 10 (i.e. length of "/sbin/init") — however if we have CAP_SYS_RESOURCES it is unbounded; - * to the third one 7 (i.e. the length of "systemd". If you pass a longer string it will likely be - * truncated. - * - * Returns 0 if a name was set but truncated, > 0 if it was set but not truncated. */ - - if (isempty(name)) - return -EINVAL; /* let's not confuse users unnecessarily with an empty name */ - - if (!is_main_thread()) - return -EPERM; /* Let's not allow setting the process name from other threads than the main one, as we - * cache things without locking, and we make assumptions that PR_SET_NAME sets the - * process name that isn't correct on any other threads */ - - size_t l = strlen(name); - - /* First step, change the comm field. The main thread's comm is identical to the process comm. This means we - * can use PR_SET_NAME, which sets the thread name for the calling thread. */ - if (prctl(PR_SET_NAME, name) < 0) - log_debug_errno(errno, "PR_SET_NAME failed: %m"); - if (l >= TASK_COMM_LEN) /* Linux userspace process names can be 15 chars at max */ - truncated = true; - - /* Second step, change glibc's ID of the process name. */ - if (program_invocation_name) { - size_t k; - - k = strlen(program_invocation_name); - strncpy(program_invocation_name, name, k); - if (l > k) - truncated = true; - } + if (!hostname_is_valid(machine, 0)) + return -EINVAL; - /* Third step, completely replace the argv[] array the kernel maintains for us. This requires privileges, but - * has the advantage that the argv[] array is exactly what we want it to be, and not filled up with zeros at - * the end. This is the best option for changing /proc/self/cmdline. */ - (void) update_argv(name, l); - - /* Fourth step: in all cases we'll also update the original argv[], so that our own code gets it right too if - * it still looks here */ - if (saved_argc > 0) { - if (saved_argv[0]) { - size_t k; - - k = strlen(saved_argv[0]); - strncpy(saved_argv[0], name, k); - if (l > k) - truncated = true; - } + p = strjoina("/run/systemd/machines/", machine); + r = parse_env_file(NULL, p, + "LEADER", &s, + "CLASS", &class); + if (r == -ENOENT) + return -EHOSTDOWN; + if (r < 0) + return r; + if (!s) + return -EIO; - for (int i = 1; i < saved_argc; i++) { - if (!saved_argv[i]) - break; + if (!streq_ptr(class, "container")) + return -EIO; - memzero(saved_argv[i], strlen(saved_argv[i])); - } - } + r = parse_pid(s, &leader); + if (r < 0) + return r; + if (leader <= 1) + return -EIO; - return !truncated; + *pid = leader; + return 0; } int is_kernel_thread(pid_t pid) { @@ -864,6 +765,23 @@ void sigterm_wait(pid_t pid) { (void) wait_for_terminate(pid, NULL); } +void sigkill_nowait(pid_t pid) { + assert(pid > 1); + + (void) kill(pid, SIGKILL); +} + +void sigkill_nowaitp(pid_t *pid) { + PROTECT_ERRNO; + + if (!pid) + return; + if (*pid <= 1) + return; + + sigkill_nowait(*pid); +} + int kill_and_sigcont(pid_t pid, int sig) { int r; @@ -1352,15 +1270,26 @@ int safe_fork_full( } if (FLAGS_SET(flags, FORK_NEW_MOUNTNS | FORK_MOUNTNS_SLAVE)) { - /* Optionally, make sure we never propagate mounts to the host. */ - if (mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL) < 0) { log_full_errno(prio, errno, "Failed to remount root directory as MS_SLAVE: %m"); _exit(EXIT_FAILURE); } } + if (FLAGS_SET(flags, FORK_PRIVATE_TMP)) { + assert(FLAGS_SET(flags, FORK_NEW_MOUNTNS)); + + /* Optionally, overmount new tmpfs instance on /tmp/. */ + r = mount_nofollow("tmpfs", "/tmp", "tmpfs", + MS_NOSUID|MS_NODEV, + "mode=01777" TMPFS_LIMITS_RUN); + if (r < 0) { + log_full_errno(prio, r, "Failed to overmount /tmp/: %m"); + _exit(EXIT_FAILURE); + } + } + if (flags & FORK_CLOSE_ALL_FDS) { /* Close the logs here in case it got reopened above, as close_all_fds() would close them for us */ log_close(); @@ -1372,6 +1301,14 @@ int safe_fork_full( } } + if (flags & FORK_CLOEXEC_OFF) { + r = fd_cloexec_many(except_fds, n_except_fds, false); + if (r < 0) { + log_full_errno(prio, r, "Failed to turn off O_CLOEXEC on file descriptors: %m"); + _exit(EXIT_FAILURE); + } + } + /* When we were asked to reopen the logs, do so again now */ if (flags & FORK_REOPEN_LOG) { log_open(); @@ -1519,6 +1456,20 @@ int pidfd_get_pid(int fd, pid_t *ret) { return parse_pid(p, ret); } +int pidfd_verify_pid(int pidfd, pid_t pid) { + pid_t current_pid; + int r; + + assert(pidfd >= 0); + assert(pid > 0); + + r = pidfd_get_pid(pidfd, ¤t_pid); + if (r < 0) + return r; + + return current_pid != pid ? -ESRCH : 0; +} + static int rlimit_to_nice(rlim_t limit) { if (limit <= 1) return PRIO_MAX-1; /* i.e. 19 */ @@ -1575,40 +1526,6 @@ int setpriority_closest(int priority) { return 0; } -bool invoked_as(char *argv[], const char *token) { - if (!argv || isempty(argv[0])) - return false; - - if (isempty(token)) - return false; - - return strstr(last_path_component(argv[0]), token); -} - -bool invoked_by_systemd(void) { - int r; - - /* If the process is directly executed by PID1 (e.g. ExecStart= or generator), systemd-importd, - * or systemd-homed, then $SYSTEMD_EXEC_PID= is set, and read the command line. */ - const char *e = getenv("SYSTEMD_EXEC_PID"); - if (!e) - return false; - - if (streq(e, "*")) - /* For testing. */ - return true; - - pid_t p; - r = parse_pid(e, &p); - if (r < 0) { - /* We know that systemd sets the variable correctly. Something else must have set it. */ - log_debug_errno(r, "Failed to parse \"SYSTEMD_EXEC_PID=%s\", ignoring: %m", e); - return false; - } - - return getpid_cached() == p; -} - _noreturn_ void freeze(void) { log_close(); @@ -1630,31 +1547,6 @@ _noreturn_ void freeze(void) { pause(); } -bool argv_looks_like_help(int argc, char **argv) { - char **l; - - /* Scans the command line for indications the user asks for help. This is supposed to be called by - * tools that do not implement getopt() style command line parsing because they are not primarily - * user-facing. Detects four ways of asking for help: - * - * 1. Passing zero arguments - * 2. Passing "help" as first argument - * 3. Passing --help as any argument - * 4. Passing -h as any argument - */ - - if (argc <= 1) - return true; - - if (streq_ptr(argv[1], "help")) - return true; - - l = strv_skip(argv, 1); - - return strv_contains(l, "--help") || - strv_contains(l, "-h"); -} - static const char *const sigchld_code_table[] = { [CLD_EXITED] = "exited", [CLD_KILLED] = "killed", |