summaryrefslogtreecommitdiff
path: root/src/libnm-systemd-shared/src/basic/process-util.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/libnm-systemd-shared/src/basic/process-util.c')
-rw-r--r--src/libnm-systemd-shared/src/basic/process-util.c284
1 files changed, 88 insertions, 196 deletions
diff --git a/src/libnm-systemd-shared/src/basic/process-util.c b/src/libnm-systemd-shared/src/basic/process-util.c
index fb0b38fa49..70aa15f060 100644
--- a/src/libnm-systemd-shared/src/basic/process-util.c
+++ b/src/libnm-systemd-shared/src/basic/process-util.c
@@ -8,7 +8,6 @@
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
-#include <sys/mman.h>
#include <sys/mount.h>
#include <sys/personality.h>
#include <sys/prctl.h>
@@ -22,19 +21,25 @@
#include "alloc-util.h"
#include "architecture.h"
+#include "argv-util.h"
+#include "env-file.h"
#include "env-util.h"
#include "errno-util.h"
#include "escape.h"
#include "fd-util.h"
#include "fileio.h"
#include "fs-util.h"
+#include "hostname-util.h"
#include "locale-util.h"
#include "log.h"
#include "macro.h"
#include "memory-util.h"
#include "missing_sched.h"
#include "missing_syscall.h"
+#include "mountpoint-util.h"
#include "namespace-util.h"
+#include "nulstr-util.h"
+#include "parse-util.h"
#include "path-util.h"
#include "process-util.h"
#include "raw-clone.h"
@@ -253,149 +258,45 @@ int get_process_cmdline(pid_t pid, size_t max_columns, ProcessCmdlineFlags flags
return 0;
}
-static int update_argv(const char name[], size_t l) {
- static int can_do = -1;
-
- if (can_do == 0)
- return 0;
- can_do = false; /* We'll set it to true only if the whole process works */
-
- /* Let's not bother with this if we don't have euid == 0. Strictly speaking we should check for the
- * CAP_SYS_RESOURCE capability which is independent of the euid. In our own code the capability generally is
- * present only for euid == 0, hence let's use this as quick bypass check, to avoid calling mmap() if
- * PR_SET_MM_ARG_{START,END} fails with EPERM later on anyway. After all geteuid() is dead cheap to call, but
- * mmap() is not. */
- if (geteuid() != 0)
- return log_debug_errno(SYNTHETIC_ERRNO(EPERM),
- "Skipping PR_SET_MM, as we don't have privileges.");
-
- static size_t mm_size = 0;
- static char *mm = NULL;
+int container_get_leader(const char *machine, pid_t *pid) {
+ _cleanup_free_ char *s = NULL, *class = NULL;
+ const char *p;
+ pid_t leader;
int r;
- if (mm_size < l+1) {
- size_t nn_size;
- char *nn;
-
- nn_size = PAGE_ALIGN(l+1);
- nn = mmap(NULL, nn_size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
- if (nn == MAP_FAILED)
- return log_debug_errno(errno, "mmap() failed: %m");
-
- strncpy(nn, name, nn_size);
-
- /* Now, let's tell the kernel about this new memory */
- if (prctl(PR_SET_MM, PR_SET_MM_ARG_START, (unsigned long) nn, 0, 0) < 0) {
- if (ERRNO_IS_PRIVILEGE(errno))
- return log_debug_errno(errno, "PR_SET_MM_ARG_START failed: %m");
-
- /* HACK: prctl() API is kind of dumb on this point. The existing end address may already be
- * below the desired start address, in which case the kernel may have kicked this back due
- * to a range-check failure (see linux/kernel/sys.c:validate_prctl_map() to see this in
- * action). The proper solution would be to have a prctl() API that could set both start+end
- * simultaneously, or at least let us query the existing address to anticipate this condition
- * and respond accordingly. For now, we can only guess at the cause of this failure and try
- * a workaround--which will briefly expand the arg space to something potentially huge before
- * resizing it to what we want. */
- log_debug_errno(errno, "PR_SET_MM_ARG_START failed, attempting PR_SET_MM_ARG_END hack: %m");
-
- if (prctl(PR_SET_MM, PR_SET_MM_ARG_END, (unsigned long) nn + l + 1, 0, 0) < 0) {
- r = log_debug_errno(errno, "PR_SET_MM_ARG_END hack failed, proceeding without: %m");
- (void) munmap(nn, nn_size);
- return r;
- }
-
- if (prctl(PR_SET_MM, PR_SET_MM_ARG_START, (unsigned long) nn, 0, 0) < 0)
- return log_debug_errno(errno, "PR_SET_MM_ARG_START still failed, proceeding without: %m");
- } else {
- /* And update the end pointer to the new end, too. If this fails, we don't really know what
- * to do, it's pretty unlikely that we can rollback, hence we'll just accept the failure,
- * and continue. */
- if (prctl(PR_SET_MM, PR_SET_MM_ARG_END, (unsigned long) nn + l + 1, 0, 0) < 0)
- log_debug_errno(errno, "PR_SET_MM_ARG_END failed, proceeding without: %m");
- }
+ assert(machine);
+ assert(pid);
- if (mm)
- (void) munmap(mm, mm_size);
-
- mm = nn;
- mm_size = nn_size;
- } else {
- strncpy(mm, name, mm_size);
-
- /* Update the end pointer, continuing regardless of any failure. */
- if (prctl(PR_SET_MM, PR_SET_MM_ARG_END, (unsigned long) mm + l + 1, 0, 0) < 0)
- log_debug_errno(errno, "PR_SET_MM_ARG_END failed, proceeding without: %m");
+ if (streq(machine, ".host")) {
+ *pid = 1;
+ return 0;
}
- can_do = true;
- return 0;
-}
-
-int rename_process(const char name[]) {
- bool truncated = false;
-
- /* This is a like a poor man's setproctitle(). It changes the comm field, argv[0], and also the glibc's
- * internally used name of the process. For the first one a limit of 16 chars applies; to the second one in
- * many cases one of 10 (i.e. length of "/sbin/init") — however if we have CAP_SYS_RESOURCES it is unbounded;
- * to the third one 7 (i.e. the length of "systemd". If you pass a longer string it will likely be
- * truncated.
- *
- * Returns 0 if a name was set but truncated, > 0 if it was set but not truncated. */
-
- if (isempty(name))
- return -EINVAL; /* let's not confuse users unnecessarily with an empty name */
-
- if (!is_main_thread())
- return -EPERM; /* Let's not allow setting the process name from other threads than the main one, as we
- * cache things without locking, and we make assumptions that PR_SET_NAME sets the
- * process name that isn't correct on any other threads */
-
- size_t l = strlen(name);
-
- /* First step, change the comm field. The main thread's comm is identical to the process comm. This means we
- * can use PR_SET_NAME, which sets the thread name for the calling thread. */
- if (prctl(PR_SET_NAME, name) < 0)
- log_debug_errno(errno, "PR_SET_NAME failed: %m");
- if (l >= TASK_COMM_LEN) /* Linux userspace process names can be 15 chars at max */
- truncated = true;
-
- /* Second step, change glibc's ID of the process name. */
- if (program_invocation_name) {
- size_t k;
-
- k = strlen(program_invocation_name);
- strncpy(program_invocation_name, name, k);
- if (l > k)
- truncated = true;
- }
+ if (!hostname_is_valid(machine, 0))
+ return -EINVAL;
- /* Third step, completely replace the argv[] array the kernel maintains for us. This requires privileges, but
- * has the advantage that the argv[] array is exactly what we want it to be, and not filled up with zeros at
- * the end. This is the best option for changing /proc/self/cmdline. */
- (void) update_argv(name, l);
-
- /* Fourth step: in all cases we'll also update the original argv[], so that our own code gets it right too if
- * it still looks here */
- if (saved_argc > 0) {
- if (saved_argv[0]) {
- size_t k;
-
- k = strlen(saved_argv[0]);
- strncpy(saved_argv[0], name, k);
- if (l > k)
- truncated = true;
- }
+ p = strjoina("/run/systemd/machines/", machine);
+ r = parse_env_file(NULL, p,
+ "LEADER", &s,
+ "CLASS", &class);
+ if (r == -ENOENT)
+ return -EHOSTDOWN;
+ if (r < 0)
+ return r;
+ if (!s)
+ return -EIO;
- for (int i = 1; i < saved_argc; i++) {
- if (!saved_argv[i])
- break;
+ if (!streq_ptr(class, "container"))
+ return -EIO;
- memzero(saved_argv[i], strlen(saved_argv[i]));
- }
- }
+ r = parse_pid(s, &leader);
+ if (r < 0)
+ return r;
+ if (leader <= 1)
+ return -EIO;
- return !truncated;
+ *pid = leader;
+ return 0;
}
int is_kernel_thread(pid_t pid) {
@@ -864,6 +765,23 @@ void sigterm_wait(pid_t pid) {
(void) wait_for_terminate(pid, NULL);
}
+void sigkill_nowait(pid_t pid) {
+ assert(pid > 1);
+
+ (void) kill(pid, SIGKILL);
+}
+
+void sigkill_nowaitp(pid_t *pid) {
+ PROTECT_ERRNO;
+
+ if (!pid)
+ return;
+ if (*pid <= 1)
+ return;
+
+ sigkill_nowait(*pid);
+}
+
int kill_and_sigcont(pid_t pid, int sig) {
int r;
@@ -1352,15 +1270,26 @@ int safe_fork_full(
}
if (FLAGS_SET(flags, FORK_NEW_MOUNTNS | FORK_MOUNTNS_SLAVE)) {
-
/* Optionally, make sure we never propagate mounts to the host. */
-
if (mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL) < 0) {
log_full_errno(prio, errno, "Failed to remount root directory as MS_SLAVE: %m");
_exit(EXIT_FAILURE);
}
}
+ if (FLAGS_SET(flags, FORK_PRIVATE_TMP)) {
+ assert(FLAGS_SET(flags, FORK_NEW_MOUNTNS));
+
+ /* Optionally, overmount new tmpfs instance on /tmp/. */
+ r = mount_nofollow("tmpfs", "/tmp", "tmpfs",
+ MS_NOSUID|MS_NODEV,
+ "mode=01777" TMPFS_LIMITS_RUN);
+ if (r < 0) {
+ log_full_errno(prio, r, "Failed to overmount /tmp/: %m");
+ _exit(EXIT_FAILURE);
+ }
+ }
+
if (flags & FORK_CLOSE_ALL_FDS) {
/* Close the logs here in case it got reopened above, as close_all_fds() would close them for us */
log_close();
@@ -1372,6 +1301,14 @@ int safe_fork_full(
}
}
+ if (flags & FORK_CLOEXEC_OFF) {
+ r = fd_cloexec_many(except_fds, n_except_fds, false);
+ if (r < 0) {
+ log_full_errno(prio, r, "Failed to turn off O_CLOEXEC on file descriptors: %m");
+ _exit(EXIT_FAILURE);
+ }
+ }
+
/* When we were asked to reopen the logs, do so again now */
if (flags & FORK_REOPEN_LOG) {
log_open();
@@ -1519,6 +1456,20 @@ int pidfd_get_pid(int fd, pid_t *ret) {
return parse_pid(p, ret);
}
+int pidfd_verify_pid(int pidfd, pid_t pid) {
+ pid_t current_pid;
+ int r;
+
+ assert(pidfd >= 0);
+ assert(pid > 0);
+
+ r = pidfd_get_pid(pidfd, &current_pid);
+ if (r < 0)
+ return r;
+
+ return current_pid != pid ? -ESRCH : 0;
+}
+
static int rlimit_to_nice(rlim_t limit) {
if (limit <= 1)
return PRIO_MAX-1; /* i.e. 19 */
@@ -1575,40 +1526,6 @@ int setpriority_closest(int priority) {
return 0;
}
-bool invoked_as(char *argv[], const char *token) {
- if (!argv || isempty(argv[0]))
- return false;
-
- if (isempty(token))
- return false;
-
- return strstr(last_path_component(argv[0]), token);
-}
-
-bool invoked_by_systemd(void) {
- int r;
-
- /* If the process is directly executed by PID1 (e.g. ExecStart= or generator), systemd-importd,
- * or systemd-homed, then $SYSTEMD_EXEC_PID= is set, and read the command line. */
- const char *e = getenv("SYSTEMD_EXEC_PID");
- if (!e)
- return false;
-
- if (streq(e, "*"))
- /* For testing. */
- return true;
-
- pid_t p;
- r = parse_pid(e, &p);
- if (r < 0) {
- /* We know that systemd sets the variable correctly. Something else must have set it. */
- log_debug_errno(r, "Failed to parse \"SYSTEMD_EXEC_PID=%s\", ignoring: %m", e);
- return false;
- }
-
- return getpid_cached() == p;
-}
-
_noreturn_ void freeze(void) {
log_close();
@@ -1630,31 +1547,6 @@ _noreturn_ void freeze(void) {
pause();
}
-bool argv_looks_like_help(int argc, char **argv) {
- char **l;
-
- /* Scans the command line for indications the user asks for help. This is supposed to be called by
- * tools that do not implement getopt() style command line parsing because they are not primarily
- * user-facing. Detects four ways of asking for help:
- *
- * 1. Passing zero arguments
- * 2. Passing "help" as first argument
- * 3. Passing --help as any argument
- * 4. Passing -h as any argument
- */
-
- if (argc <= 1)
- return true;
-
- if (streq_ptr(argv[1], "help"))
- return true;
-
- l = strv_skip(argv, 1);
-
- return strv_contains(l, "--help") ||
- strv_contains(l, "-h");
-}
-
static const char *const sigchld_code_table[] = {
[CLD_EXITED] = "exited",
[CLD_KILLED] = "killed",