diff options
81 files changed, 2919 insertions, 879 deletions
diff --git a/Makefile.am b/Makefile.am index acd2de317c..f20cbaa8d7 100644 --- a/Makefile.am +++ b/Makefile.am @@ -2321,6 +2321,7 @@ src_libnm_systemd_shared_libnm_systemd_shared_la_SOURCES = \ src/libnm-systemd-shared/sd-adapt-shared/blockdev-util.h \ src/libnm-systemd-shared/sd-adapt-shared/build.h \ src/libnm-systemd-shared/sd-adapt-shared/chase-symlinks.h \ + src/libnm-systemd-shared/sd-adapt-shared/chase.h \ src/libnm-systemd-shared/sd-adapt-shared/copy.h \ src/libnm-systemd-shared/sd-adapt-shared/dhcp-server-internal.h \ src/libnm-systemd-shared/sd-adapt-shared/dirent-util.h \ @@ -2331,7 +2332,9 @@ src_libnm_systemd_shared_libnm_systemd_shared_la_SOURCES = \ src/libnm-systemd-shared/sd-adapt-shared/hmac.h \ src/libnm-systemd-shared/sd-adapt-shared/idn-util.h \ src/libnm-systemd-shared/sd-adapt-shared/ioprio.h \ + src/libnm-systemd-shared/sd-adapt-shared/mallinfo-util.h \ src/libnm-systemd-shared/sd-adapt-shared/memfd-util.h \ + src/libnm-systemd-shared/sd-adapt-shared/memstream-util.h \ src/libnm-systemd-shared/sd-adapt-shared/missing_fs.h \ src/libnm-systemd-shared/sd-adapt-shared/missing_ioprio.h \ src/libnm-systemd-shared/sd-adapt-shared/missing_keyctl.h \ @@ -2347,6 +2350,7 @@ src_libnm_systemd_shared_libnm_systemd_shared_la_SOURCES = \ src/libnm-systemd-shared/sd-adapt-shared/nm-sd-adapt-shared.h \ src/libnm-systemd-shared/sd-adapt-shared/nulstr-util.h \ src/libnm-systemd-shared/sd-adapt-shared/os-util.h \ + src/libnm-systemd-shared/sd-adapt-shared/psi-util.h \ src/libnm-systemd-shared/sd-adapt-shared/raw-clone.h \ src/libnm-systemd-shared/sd-adapt-shared/rlimit-util.h \ src/libnm-systemd-shared/sd-adapt-shared/sync-util.h \ @@ -2396,11 +2400,13 @@ src_libnm_systemd_shared_libnm_systemd_shared_la_SOURCES = \ src/libnm-systemd-shared/src/basic/inotify-util.h \ src/libnm-systemd-shared/src/basic/io-util.c \ src/libnm-systemd-shared/src/basic/io-util.h \ + src/libnm-systemd-shared/src/basic/label.c \ + src/libnm-systemd-shared/src/basic/label.h \ src/libnm-systemd-shared/src/basic/list.h \ src/libnm-systemd-shared/src/basic/locale-util.c \ src/libnm-systemd-shared/src/basic/locale-util.h \ + src/libnm-systemd-shared/src/basic/lock-util.h \ src/libnm-systemd-shared/src/basic/log.h \ - src/libnm-systemd-shared/src/basic/logarithm.h \ src/libnm-systemd-shared/src/basic/macro.h \ src/libnm-systemd-shared/src/basic/memory-util.c \ src/libnm-systemd-shared/src/basic/memory-util.h \ @@ -2411,9 +2417,11 @@ src_libnm_systemd_shared_libnm_systemd_shared_la_SOURCES = \ src/libnm-systemd-shared/src/basic/missing_socket.h \ src/libnm-systemd-shared/src/basic/missing_stat.h \ src/libnm-systemd-shared/src/basic/missing_syscall.h \ + src/libnm-systemd-shared/src/basic/missing_threads.h \ src/libnm-systemd-shared/src/basic/missing_type.h \ src/libnm-systemd-shared/src/basic/ordered-set.c \ src/libnm-systemd-shared/src/basic/ordered-set.h \ + src/libnm-systemd-shared/src/basic/origin-id.h \ src/libnm-systemd-shared/src/basic/parse-util.c \ src/libnm-systemd-shared/src/basic/parse-util.h \ src/libnm-systemd-shared/src/basic/path-util.c \ @@ -2453,6 +2461,7 @@ src_libnm_systemd_shared_libnm_systemd_shared_la_SOURCES = \ src/libnm-systemd-shared/src/basic/user-util.h \ src/libnm-systemd-shared/src/basic/utf8.c \ src/libnm-systemd-shared/src/basic/utf8.h \ + src/libnm-systemd-shared/src/fundamental/logarithm.h \ src/libnm-systemd-shared/src/fundamental/macro-fundamental.h \ src/libnm-systemd-shared/src/fundamental/memory-util-fundamental.h \ src/libnm-systemd-shared/src/fundamental/sha256.c \ @@ -2500,6 +2509,7 @@ src_libnm_systemd_core_libnm_systemd_core_la_SOURCES = \ src/libnm-systemd-core/sd-adapt-core/nm-sd-adapt-core.c \ src/libnm-systemd-core/sd-adapt-core/nm-sd-adapt-core.h \ src/libnm-systemd-core/sd-adapt-core/sd-daemon.h \ + src/libnm-systemd-core/sd-adapt-core/sd-messages.h \ src/libnm-systemd-core/sd-adapt-core/udev-util.h \ src/libnm-systemd-core/src/libsystemd-network/dhcp-identifier.c \ src/libnm-systemd-core/src/libsystemd-network/dhcp-identifier.h \ diff --git a/config.h.meson b/config.h.meson index bda542035b..a08ce2e0ad 100644 --- a/config.h.meson +++ b/config.h.meson @@ -46,6 +46,9 @@ /* Define to 1 if you have the <sys/auxv.h> header file. */ #mesondefine HAVE_SYS_AUXV_H +/* Define to 1 if you have the <threads.h> header file. */ +#mesondefine HAVE_THREADS_H + /* Define if you have Linux Wireless Extensions support */ #mesondefine HAVE_WEXT diff --git a/configure.ac b/configure.ac index 5d746ef81c..8c9b0f1dc5 100644 --- a/configure.ac +++ b/configure.ac @@ -120,6 +120,9 @@ AC_CHECK_DECLS([ ]]) AC_CHECK_HEADERS(sys/auxv.h) +AC_CHECK_HEADERS(threads.h, + [], + [AC_DEFINE([HAVE_THREADS_H], [0], [Define to 1 if you have the <threads.h> header file.])]) AC_CHECK_DECLS([getrandom], [AC_DEFINE([USE_SYS_RANDOM_H], [1], [sys/random.h is usable]) diff --git a/meson.build b/meson.build index c108c3bfb5..c4b459ceb8 100644 --- a/meson.build +++ b/meson.build @@ -104,6 +104,7 @@ endforeach # headers config_h.set10('HAVE_SYS_AUXV_H', cc.has_header('sys/auxv.h')) +config_h.set10('HAVE_THREADS_H', cc.has_header('threads.h')) use_sys_random = cc.has_function('getrandom', prefix: '#include <sys/random.h>') config_h.set10('USE_SYS_RANDOM_H', use_sys_random) diff --git a/src/libnm-systemd-core/sd-adapt-core/sd-messages.h b/src/libnm-systemd-core/sd-adapt-core/sd-messages.h new file mode 100644 index 0000000000..637892c2d6 --- /dev/null +++ b/src/libnm-systemd-core/sd-adapt-core/sd-messages.h @@ -0,0 +1,3 @@ +#pragma once + +/* dummy header */ diff --git a/src/libnm-systemd-core/src/libsystemd-network/sd-dhcp6-client.c b/src/libnm-systemd-core/src/libsystemd-network/sd-dhcp6-client.c index 3fafde8485..a2605b718c 100644 --- a/src/libnm-systemd-core/src/libsystemd-network/sd-dhcp6-client.c +++ b/src/libnm-systemd-core/src/libsystemd-network/sd-dhcp6-client.c @@ -1286,7 +1286,6 @@ static int client_receive_message( .msg_control = &control, .msg_controllen = sizeof(control), }; - struct cmsghdr *cmsg; triple_timestamp t = {}; _cleanup_free_ DHCP6Message *message = NULL; struct in6_addr *server_address = NULL; @@ -1330,12 +1329,9 @@ static int client_receive_message( server_address = &sa.in6.sin6_addr; } - CMSG_FOREACH(cmsg, &msg) { - if (cmsg->cmsg_level == SOL_SOCKET && - cmsg->cmsg_type == SO_TIMESTAMP && - cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval))) - triple_timestamp_from_realtime(&t, timeval_load(CMSG_TYPED_DATA(cmsg, struct timeval))); - } + struct timeval *tv = CMSG_FIND_AND_COPY_DATA(&msg, SOL_SOCKET, SCM_TIMESTAMP, struct timeval); + if (tv) + triple_timestamp_from_realtime(&t, timeval_load(tv)); if (client->transaction_id != (message->transaction_id & htobe32(0x00ffffff))) return 0; diff --git a/src/libnm-systemd-core/src/libsystemd/sd-event/event-source.h b/src/libnm-systemd-core/src/libsystemd/sd-event/event-source.h index 6092652d0f..f4e38d78d0 100644 --- a/src/libnm-systemd-core/src/libsystemd/sd-event/event-source.h +++ b/src/libnm-systemd-core/src/libsystemd/sd-event/event-source.h @@ -27,6 +27,7 @@ typedef enum EventSourceType { SOURCE_EXIT, SOURCE_WATCHDOG, SOURCE_INOTIFY, + SOURCE_MEMORY_PRESSURE, _SOURCE_EVENT_SOURCE_TYPE_MAX, _SOURCE_EVENT_SOURCE_TYPE_INVALID = -EINVAL, } EventSourceType; @@ -129,6 +130,17 @@ struct sd_event_source { struct inode_data *inode_data; LIST_FIELDS(sd_event_source, by_inode_data); } inotify; + struct { + int fd; + sd_event_handler_t callback; + void *write_buffer; + size_t write_buffer_size; + uint32_t events, revents; + LIST_FIELDS(sd_event_source, write_list); + bool registered:1; + bool locked:1; + bool in_write_list:1; + } memory_pressure; }; }; diff --git a/src/libnm-systemd-core/src/libsystemd/sd-event/event-util.c b/src/libnm-systemd-core/src/libsystemd/sd-event/event-util.c index da275aa95c..663296f408 100644 --- a/src/libnm-systemd-core/src/libsystemd/sd-event/event-util.c +++ b/src/libnm-systemd-core/src/libsystemd/sd-event/event-util.c @@ -48,7 +48,7 @@ int event_reset_time( return log_debug_errno(SYNTHETIC_ERRNO(EINVAL), "sd-event: Current clock id %i of event source \"%s\" is different from specified one %i.", (int)c, - strna((*s)->description ? : description), + strna((*s)->description ?: description), (int)clock); r = sd_event_source_set_time(*s, usec); diff --git a/src/libnm-systemd-core/src/libsystemd/sd-event/sd-event.c b/src/libnm-systemd-core/src/libsystemd/sd-event/sd-event.c index e6680e90d4..5c7ba1821b 100644 --- a/src/libnm-systemd-core/src/libsystemd/sd-event/sd-event.c +++ b/src/libnm-systemd-core/src/libsystemd/sd-event/sd-event.c @@ -9,6 +9,7 @@ #include "sd-daemon.h" #include "sd-event.h" #include "sd-id128.h" +#include "sd-messages.h" #include "alloc-util.h" #include "env-util.h" @@ -17,15 +18,24 @@ #include "fs-util.h" #include "glyph-util.h" #include "hashmap.h" +#include "hexdecoct.h" #include "list.h" #include "logarithm.h" #include "macro.h" +#include "mallinfo-util.h" #include "memory-util.h" +#include "missing_magic.h" #include "missing_syscall.h" +#include "missing_threads.h" +#include "origin-id.h" +#include "path-util.h" #include "prioq.h" #include "process-util.h" +#include "psi-util.h" #include "set.h" #include "signal-util.h" +#include "socket-util.h" +#include "stat-util.h" #include "string-table.h" #include "string-util.h" #include "strxcpyx.h" @@ -54,7 +64,7 @@ static bool event_source_is_offline(sd_event_source *s) { static const char* const event_source_type_table[_SOURCE_EVENT_SOURCE_TYPE_MAX] = { [SOURCE_IO] = "io", [SOURCE_TIME_REALTIME] = "realtime", - [SOURCE_TIME_BOOTTIME] = "bootime", + [SOURCE_TIME_BOOTTIME] = "boottime", [SOURCE_TIME_MONOTONIC] = "monotonic", [SOURCE_TIME_REALTIME_ALARM] = "realtime-alarm", [SOURCE_TIME_BOOTTIME_ALARM] = "boottime-alarm", @@ -65,6 +75,7 @@ static const char* const event_source_type_table[_SOURCE_EVENT_SOURCE_TYPE_MAX] [SOURCE_EXIT] = "exit", [SOURCE_WATCHDOG] = "watchdog", [SOURCE_INOTIFY] = "inotify", + [SOURCE_MEMORY_PRESSURE] = "memory-pressure", }; DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(event_source_type, int); @@ -87,7 +98,8 @@ DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(event_source_type, int); SOURCE_TIME_BOOTTIME_ALARM, \ SOURCE_SIGNAL, \ SOURCE_DEFER, \ - SOURCE_INOTIFY) + SOURCE_INOTIFY, \ + SOURCE_MEMORY_PRESSURE) /* This is used to assert that we didn't pass an unexpected source type to event_source_time_prioq_put(). * Time sources and ratelimited sources can be passed, so effectively this is the same as the @@ -132,7 +144,10 @@ struct sd_event { /* A list of inotify objects that already have events buffered which aren't processed yet */ LIST_HEAD(struct inotify_data, buffered_inotify_data_list); - pid_t original_pid; + /* A list of memory pressure event sources that still need their subscription string written */ + LIST_HEAD(sd_event_source, memory_pressure_write_list); + + uint64_t origin_id; uint64_t iteration; triple_timestamp timestamp; @@ -162,6 +177,8 @@ struct sd_event { unsigned delays[sizeof(usec_t) * 8]; }; +DEFINE_PRIVATE_ORIGIN_ID_HELPERS(sd_event, event); + static thread_local sd_event *default_event = NULL; static void source_disconnect(sd_event_source *s); @@ -398,7 +415,7 @@ _public_ int sd_event_new(sd_event** ret) { .boottime_alarm.fd = -EBADF, .boottime_alarm.next = USEC_INFINITY, .perturb = USEC_INFINITY, - .original_pid = getpid_cached(), + .origin_id = origin_id_query(), }; r = prioq_ensure_allocated(&e->pending, pending_prioq_compare); @@ -427,7 +444,31 @@ fail: return r; } -DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_event, sd_event, event_free); +/* Define manually so we can add the origin check */ +_public_ sd_event *sd_event_ref(sd_event *e) { + if (!e) + return NULL; + if (event_origin_changed(e)) + return NULL; + + e->n_ref++; + + return e; +} + +_public_ sd_event* sd_event_unref(sd_event *e) { + if (!e) + return NULL; + if (event_origin_changed(e)) + return NULL; + + assert(e->n_ref > 0); + if (--e->n_ref > 0) + return NULL; + + return event_free(e); +} + #define PROTECT_EVENT(e) \ _unused_ _cleanup_(sd_event_unrefp) sd_event *_ref = sd_event_ref(e); @@ -437,20 +478,11 @@ _public_ sd_event_source* sd_event_source_disable_unref(sd_event_source *s) { return sd_event_source_unref(s); } -static bool event_pid_changed(sd_event *e) { - assert(e); - - /* We don't support people creating an event loop and keeping - * it around over a fork(). Let's complain. */ - - return e->original_pid != getpid_cached(); -} - static void source_io_unregister(sd_event_source *s) { assert(s); assert(s->type == SOURCE_IO); - if (event_pid_changed(s->event)) + if (event_origin_changed(s->event)) return; if (!s->io.registered) @@ -491,7 +523,7 @@ static void source_child_pidfd_unregister(sd_event_source *s) { assert(s); assert(s->type == SOURCE_CHILD); - if (event_pid_changed(s->event)) + if (event_origin_changed(s->event)) return; if (!s->child.registered) @@ -526,6 +558,67 @@ static int source_child_pidfd_register(sd_event_source *s, int enabled) { return 0; } +static void source_memory_pressure_unregister(sd_event_source *s) { + assert(s); + assert(s->type == SOURCE_MEMORY_PRESSURE); + + if (event_origin_changed(s->event)) + return; + + if (!s->memory_pressure.registered) + return; + + if (epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->memory_pressure.fd, NULL) < 0) + log_debug_errno(errno, "Failed to remove source %s (type %s) from epoll, ignoring: %m", + strna(s->description), event_source_type_to_string(s->type)); + + s->memory_pressure.registered = false; +} + +static int source_memory_pressure_register(sd_event_source *s, int enabled) { + assert(s); + assert(s->type == SOURCE_MEMORY_PRESSURE); + assert(enabled != SD_EVENT_OFF); + + struct epoll_event ev = { + .events = s->memory_pressure.write_buffer_size > 0 ? EPOLLOUT : + (s->memory_pressure.events | (enabled == SD_EVENT_ONESHOT ? EPOLLONESHOT : 0)), + .data.ptr = s, + }; + + if (epoll_ctl(s->event->epoll_fd, + s->memory_pressure.registered ? EPOLL_CTL_MOD : EPOLL_CTL_ADD, + s->memory_pressure.fd, &ev) < 0) + return -errno; + + s->memory_pressure.registered = true; + return 0; +} + +#if 0 /* NM_IGNORED */ +static void source_memory_pressure_add_to_write_list(sd_event_source *s) { + assert(s); + assert(s->type == SOURCE_MEMORY_PRESSURE); + + if (s->memory_pressure.in_write_list) + return; + + LIST_PREPEND(memory_pressure.write_list, s->event->memory_pressure_write_list, s); + s->memory_pressure.in_write_list = true; +} +#endif /* NM_IGNORED */ + +static void source_memory_pressure_remove_from_write_list(sd_event_source *s) { + assert(s); + assert(s->type == SOURCE_MEMORY_PRESSURE); + + if (!s->memory_pressure.in_write_list) + return; + + LIST_REMOVE(memory_pressure.write_list, s->event->memory_pressure_write_list, s); + s->memory_pressure.in_write_list = false; +} + static clockid_t event_source_type_to_clock(EventSourceType t) { switch (t) { @@ -623,7 +716,7 @@ static int event_make_signal_data( assert(e); - if (event_pid_changed(e)) + if (event_origin_changed(e)) return -ECHILD; if (e->signal_sources && e->signal_sources[sig]) @@ -720,7 +813,7 @@ static void event_unmask_signal_data(sd_event *e, struct signal_data *d, int sig return; } - if (event_pid_changed(e)) + if (event_origin_changed(e)) return; assert(d->fd >= 0); @@ -883,7 +976,7 @@ static void source_disconnect(sd_event_source *s) { break; case SOURCE_CHILD: - if (event_pid_changed(s->event)) + if (event_origin_changed(s->event)) s->child.process_owned = false; if (s->child.pid > 0) { @@ -949,6 +1042,11 @@ static void source_disconnect(sd_event_source *s) { break; } + case SOURCE_MEMORY_PRESSURE: + source_memory_pressure_remove_from_write_list(s); + source_memory_pressure_unregister(s); + break; + default: assert_not_reached(); } @@ -1019,6 +1117,11 @@ static sd_event_source* source_free(sd_event_source *s) { s->child.pidfd = safe_close(s->child.pidfd); } + if (s->type == SOURCE_MEMORY_PRESSURE) { + s->memory_pressure.fd = safe_close(s->memory_pressure.fd); + s->memory_pressure.write_buffer = mfree(s->memory_pressure.write_buffer); + } + if (s->destroy_callback) s->destroy_callback(s->userdata); @@ -1077,22 +1180,48 @@ static int source_set_pending(sd_event_source *s, bool b) { } static sd_event_source *source_new(sd_event *e, bool floating, EventSourceType type) { + + /* Let's allocate exactly what we need. Note that the difference of the smallest event source + * structure to the largest is 144 bytes on x86-64 at the time of writing, i.e. more than two cache + * lines. */ + static const size_t size_table[_SOURCE_EVENT_SOURCE_TYPE_MAX] = { + [SOURCE_IO] = endoffsetof_field(sd_event_source, io), + [SOURCE_TIME_REALTIME] = endoffsetof_field(sd_event_source, time), + [SOURCE_TIME_BOOTTIME] = endoffsetof_field(sd_event_source, time), + [SOURCE_TIME_MONOTONIC] = endoffsetof_field(sd_event_source, time), + [SOURCE_TIME_REALTIME_ALARM] = endoffsetof_field(sd_event_source, time), + [SOURCE_TIME_BOOTTIME_ALARM] = endoffsetof_field(sd_event_source, time), + [SOURCE_SIGNAL] = endoffsetof_field(sd_event_source, signal), + [SOURCE_CHILD] = endoffsetof_field(sd_event_source, child), + [SOURCE_DEFER] = endoffsetof_field(sd_event_source, defer), + [SOURCE_POST] = endoffsetof_field(sd_event_source, post), + [SOURCE_EXIT] = endoffsetof_field(sd_event_source, exit), + [SOURCE_INOTIFY] = endoffsetof_field(sd_event_source, inotify), + [SOURCE_MEMORY_PRESSURE] = endoffsetof_field(sd_event_source, memory_pressure), + }; + sd_event_source *s; assert(e); + assert(type >= 0); + assert(type < _SOURCE_EVENT_SOURCE_TYPE_MAX); + assert(size_table[type] > 0); - s = new(sd_event_source, 1); + s = malloc0(size_table[type]); if (!s) return NULL; - - *s = (struct sd_event_source) { - .n_ref = 1, - .event = e, - .floating = floating, - .type = type, - .pending_index = PRIOQ_IDX_NULL, - .prepare_index = PRIOQ_IDX_NULL, - }; + /* We use expand_to_usable() here to tell gcc that it should consider this an object of the full + * size, even if we only allocate the initial part we need. */ + s = expand_to_usable(s, sizeof(sd_event_source)); + + /* Note: we cannot use compound initialization here, because sizeof(sd_event_source) is likely larger + * than what we allocated here. */ + s->n_ref = 1; + s->event = e; + s->floating = floating; + s->type = type; + s->pending_index = PRIOQ_IDX_NULL; + s->prepare_index = PRIOQ_IDX_NULL; if (!floating) sd_event_ref(e); @@ -1125,7 +1254,7 @@ _public_ int sd_event_add_io( assert_return(fd >= 0, -EBADF); assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL); assert_return(e->state != SD_EVENT_FINISHED, -ESTALE); - assert_return(!event_pid_changed(e), -ECHILD); + assert_return(!event_origin_changed(e), -ECHILD); if (!callback) callback = io_exit_callback; @@ -1153,22 +1282,21 @@ _public_ int sd_event_add_io( } static void initialize_perturb(sd_event *e) { - sd_id128_t bootid = {}; + sd_id128_t id = {}; - /* When we sleep for longer, we try to realign the wakeup to - the same time within each minute/second/250ms, so that - events all across the system can be coalesced into a single - CPU wakeup. However, let's take some system-specific - randomness for this value, so that in a network of systems - with synced clocks timer events are distributed a - bit. Here, we calculate a perturbation usec offset from the - boot ID. */ + /* When we sleep for longer, we try to realign the wakeup to the same time within each + * minute/second/250ms, so that events all across the system can be coalesced into a single CPU + * wakeup. However, let's take some system-specific randomness for this value, so that in a network + * of systems with synced clocks timer events are distributed a bit. Here, we calculate a + * perturbation usec offset from the boot ID (or machine ID if failed, e.g. /proc is not mounted). */ if (_likely_(e->perturb != USEC_INFINITY)) return; - if (sd_id128_get_boot(&bootid) >= 0) - e->perturb = (bootid.qwords[0] ^ bootid.qwords[1]) % USEC_PER_MINUTE; + if (sd_id128_get_boot(&id) >= 0 || sd_id128_get_machine(&id) >= 0) + e->perturb = (id.qwords[0] ^ id.qwords[1]) % USEC_PER_MINUTE; + else + e->perturb = 0; /* This is a super early process without /proc and /etc ?? */ } static int event_setup_timer_fd( @@ -1273,7 +1401,7 @@ _public_ int sd_event_add_time( assert_return(e = event_resolve(e), -ENOPKG); assert_return(accuracy != UINT64_MAX, -EINVAL); assert_return(e->state != SD_EVENT_FINISHED, -ESTALE); - assert_return(!event_pid_changed(e), -ECHILD); + assert_return(!event_origin_changed(e), -ECHILD); if (!clock_supported(clock)) /* Checks whether the kernel supports the clock */ return -EOPNOTSUPP; @@ -1361,7 +1489,7 @@ _public_ int sd_event_add_signal( assert_return(e, -EINVAL); assert_return(e = event_resolve(e), -ENOPKG); assert_return(e->state != SD_EVENT_FINISHED, -ESTALE); - assert_return(!event_pid_changed(e), -ECHILD); + assert_return(!event_origin_changed(e), -ECHILD); /* Let's make sure our special flag stays outside of the valid signal range */ assert_cc(_NSIG < SD_EVENT_SIGNAL_PROCMASK); @@ -1471,7 +1599,7 @@ _public_ int sd_event_add_child( assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL); assert_return(options != 0, -EINVAL); assert_return(e->state != SD_EVENT_FINISHED, -ESTALE); - assert_return(!event_pid_changed(e), -ECHILD); + assert_return(!event_origin_changed(e), -ECHILD); if (!callback) callback = child_exit_callback; @@ -1569,7 +1697,7 @@ _public_ int sd_event_add_child_pidfd( assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL); assert_return(options != 0, -EINVAL); assert_return(e->state != SD_EVENT_FINISHED, -ESTALE); - assert_return(!event_pid_changed(e), -ECHILD); + assert_return(!event_origin_changed(e), -ECHILD); if (!callback) callback = child_exit_callback; @@ -1651,7 +1779,7 @@ _public_ int sd_event_add_defer( assert_return(e, -EINVAL); assert_return(e = event_resolve(e), -ENOPKG); assert_return(e->state != SD_EVENT_FINISHED, -ESTALE); - assert_return(!event_pid_changed(e), -ECHILD); + assert_return(!event_origin_changed(e), -ECHILD); if (!callback) callback = generic_exit_callback; @@ -1687,7 +1815,7 @@ _public_ int sd_event_add_post( assert_return(e, -EINVAL); assert_return(e = event_resolve(e), -ENOPKG); assert_return(e->state != SD_EVENT_FINISHED, -ESTALE); - assert_return(!event_pid_changed(e), -ECHILD); + assert_return(!event_origin_changed(e), -ECHILD); if (!callback) callback = generic_exit_callback; @@ -1725,7 +1853,7 @@ _public_ int sd_event_add_exit( assert_return(e = event_resolve(e), -ENOPKG); assert_return(callback, -EINVAL); assert_return(e->state != SD_EVENT_FINISHED, -ESTALE); - assert_return(!event_pid_changed(e), -ECHILD); + assert_return(!event_origin_changed(e), -ECHILD); r = prioq_ensure_allocated(&e->exit, exit_prioq_compare); if (r < 0) @@ -1751,6 +1879,262 @@ _public_ int sd_event_add_exit( return 0; } +#if 0 /* NM_IGNORED */ +_public_ int sd_event_trim_memory(void) { + int r; + + /* A default implementation of a memory pressure callback. Simply releases our own allocation caches + * and glibc's. This is automatically used when people call sd_event_add_memory_pressure() with a + * NULL callback parameter. */ + + log_debug("Memory pressure event, trimming malloc() memory."); + +#if HAVE_GENERIC_MALLINFO + generic_mallinfo before_mallinfo = generic_mallinfo_get(); +#endif + + usec_t before_timestamp = now(CLOCK_MONOTONIC); + hashmap_trim_pools(); + r = malloc_trim(0); + usec_t after_timestamp = now(CLOCK_MONOTONIC); + + if (r > 0) + log_debug("Successfully trimmed some memory."); + else + log_debug("Couldn't trim any memory."); + + usec_t period = after_timestamp - before_timestamp; + +#if HAVE_GENERIC_MALLINFO + generic_mallinfo after_mallinfo = generic_mallinfo_get(); + size_t l = LESS_BY((size_t) before_mallinfo.hblkhd, (size_t) after_mallinfo.hblkhd) + + LESS_BY((size_t) before_mallinfo.arena, (size_t) after_mallinfo.arena); + log_struct(LOG_DEBUG, + LOG_MESSAGE("Memory trimming took %s, returned %s to OS.", + FORMAT_TIMESPAN(period, 0), + FORMAT_BYTES(l)), + "MESSAGE_ID=" SD_MESSAGE_MEMORY_TRIM_STR, + "TRIMMED_BYTES=%zu", l, + "TRIMMED_USEC=" USEC_FMT, period); +#else + log_struct(LOG_DEBUG, + LOG_MESSAGE("Memory trimming took %s.", + FORMAT_TIMESPAN(period, 0)), + "MESSAGE_ID=" SD_MESSAGE_MEMORY_TRIM_STR, + "TRIMMED_USEC=" USEC_FMT, period); +#endif + + return 0; +} + +static int memory_pressure_callback(sd_event_source *s, void *userdata) { + assert(s); + + sd_event_trim_memory(); + return 0; +} + +_public_ int sd_event_add_memory_pressure( + sd_event *e, + sd_event_source **ret, + sd_event_handler_t callback, + void *userdata) { + + _cleanup_free_ char *w = NULL; + _cleanup_(source_freep) sd_event_source *s = NULL; + _cleanup_close_ int path_fd = -EBADF, fd = -EBADF; + _cleanup_free_ void *write_buffer = NULL; + const char *watch, *watch_fallback = NULL, *env; + size_t write_buffer_size = 0; + struct stat st; + uint32_t events; + bool locked; + int r; + + assert_return(e, -EINVAL); + assert_return(e = event_resolve(e), -ENOPKG); + assert_return(e->state != SD_EVENT_FINISHED, -ESTALE); + assert_return(!event_origin_changed(e), -ECHILD); + + if (!callback) + callback = memory_pressure_callback; + + s = source_new(e, !ret, SOURCE_MEMORY_PRESSURE); + if (!s) + return -ENOMEM; + + s->wakeup = WAKEUP_EVENT_SOURCE; + s->memory_pressure.callback = callback; + s->userdata = userdata; + s->enabled = SD_EVENT_ON; + s->memory_pressure.fd = -EBADF; + + env = secure_getenv("MEMORY_PRESSURE_WATCH"); + if (env) { + if (isempty(env) || path_equal(env, "/dev/null")) + return log_debug_errno(SYNTHETIC_ERRNO(EHOSTDOWN), + "Memory pressure logic is explicitly disabled via $MEMORY_PRESSURE_WATCH."); + + if (!path_is_absolute(env) || !path_is_normalized(env)) + return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG), + "$MEMORY_PRESSURE_WATCH set to invalid path: %s", env); + + watch = env; + + env = secure_getenv("MEMORY_PRESSURE_WRITE"); + if (env) { + r = unbase64mem(env, SIZE_MAX, &write_buffer, &write_buffer_size); + if (r < 0) + return r; + } + + locked = true; + } else { + + r = is_pressure_supported(); + if (r < 0) + return r; + if (r == 0) + return -EOPNOTSUPP; + + /* By default we want to watch memory pressure on the local cgroup, but we'll fall back on + * the system wide pressure if for some reason we cannot (which could be: memory controller + * not delegated to us, or PSI simply not available in the kernel). On legacy cgroupv1 we'll + * only use the system-wide logic. */ + r = cg_all_unified(); + if (r < 0) + return r; + if (r == 0) + watch = "/proc/pressure/memory"; + else { + _cleanup_free_ char *cg = NULL; + + r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 0, &cg); + if (r < 0) + return r; + + w = path_join("/sys/fs/cgroup", cg, "memory.pressure"); + if (!w) + return -ENOMEM; + + watch = w; + watch_fallback = "/proc/pressure/memory"; + } + + /* Android uses three levels in its userspace low memory killer logic: + * some 70000 1000000 + * some 100000 1000000 + * full 70000 1000000 + * + * GNOME's low memory monitor uses: + * some 70000 1000000 + * some 100000 1000000 + * full 100000 1000000 + * + * We'll default to the middle level that both agree on. Except we do it on a 2s window + * (i.e. 200ms per 2s, rather than 100ms per 1s), because that's the window duration the + * kernel will allow us to do unprivileged, also in the future. */ + if (asprintf((char**) &write_buffer, + "%s " USEC_FMT " " USEC_FMT, + MEMORY_PRESSURE_DEFAULT_TYPE, + MEMORY_PRESSURE_DEFAULT_THRESHOLD_USEC, + MEMORY_PRESSURE_DEFAULT_WINDOW_USEC) < 0) + return -ENOMEM; + + write_buffer_size = strlen(write_buffer) + 1; + locked = false; + } + + path_fd = open(watch, O_PATH|O_CLOEXEC); + if (path_fd < 0) { + if (errno != ENOENT) + return -errno; + + /* We got ENOENT. Three options now: try the fallback if we have one, or return the error as + * is (if based on user/env config), or return -EOPNOTSUPP (because we picked the path, and + * the PSI service apparently is not supported) */ + if (!watch_fallback) + return locked ? -ENOENT : -EOPNOTSUPP; + + path_fd = open(watch_fallback, O_PATH|O_CLOEXEC); + if (path_fd < 0) { + if (errno == ENOENT) /* PSI is not available in the kernel even under the fallback path? */ + return -EOPNOTSUPP; + return -errno; + } + } + + if (fstat(path_fd, &st) < 0) + return -errno; + + if (S_ISSOCK(st.st_mode)) { + fd = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0); + if (fd < 0) + return -errno; + + r = connect_unix_path(fd, path_fd, NULL); + if (r < 0) + return r; + + events = EPOLLIN; + + } else if (S_ISREG(st.st_mode) || S_ISFIFO(st.st_mode) || S_ISCHR(st.st_mode)) { + fd = fd_reopen(path_fd, (write_buffer_size > 0 ? O_RDWR : O_RDONLY) |O_CLOEXEC|O_NONBLOCK|O_NOCTTY); + if (fd < 0) + return fd; + + if (S_ISREG(st.st_mode)) { + struct statfs sfs; + + /* If this is a regular file validate this is a procfs or cgroupfs file, where we look for EPOLLPRI */ + + if (fstatfs(fd, &sfs) < 0) + return -errno; + + if (!is_fs_type(&sfs, PROC_SUPER_MAGIC) && + !is_fs_type(&sfs, CGROUP2_SUPER_MAGIC)) + return -ENOTTY; + + events = EPOLLPRI; + } else + /* For fifos and char devices just watch for EPOLLIN */ + events = EPOLLIN; + + } else if (S_ISDIR(st.st_mode)) + return -EISDIR; + else + return -EBADF; + + s->memory_pressure.fd = TAKE_FD(fd); + s->memory_pressure.write_buffer = TAKE_PTR(write_buffer); + s->memory_pressure.write_buffer_size = write_buffer_size; + s->memory_pressure.events = events; + s->memory_pressure.locked = locked; + + /* So here's the thing: if we are talking to PSI we need to write the watch string before adding the + * fd to epoll (if we ignore this, then the watch won't work). Hence we'll not actually register the + * fd with the epoll right-away. Instead, we just add the event source to a list of memory pressure + * event sources on which writes must be executed before the first event loop iteration is + * executed. (We could also write the data here, right away, but we want to give the caller the + * freedom to call sd_event_source_set_memory_pressure_type() and + * sd_event_source_set_memory_pressure_rate() before we write it. */ + + if (s->memory_pressure.write_buffer_size > 0) + source_memory_pressure_add_to_write_list(s); + else { + r = source_memory_pressure_register(s, s->enabled); + if (r < 0) + return r; + } + + if (ret) + *ret = s; + TAKE_PTR(s); + + return 0; +} +#endif /* NM_IGNORED */ + static void event_free_inotify_data(sd_event *e, struct inotify_data *d) { assert(e); @@ -1769,7 +2153,7 @@ static void event_free_inotify_data(sd_event *e, struct inotify_data *d) { assert_se(hashmap_remove(e->inotify_data, &d->priority) == d); if (d->fd >= 0) { - if (!event_pid_changed(e) && + if (!event_origin_changed(e) && epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, d->fd, NULL) < 0) log_debug_errno(errno, "Failed to remove inotify fd from epoll, ignoring: %m"); @@ -1880,7 +2264,7 @@ static void event_free_inode_data( if (d->inotify_data) { if (d->wd >= 0) { - if (d->inotify_data->fd >= 0 && !event_pid_changed(e)) { + if (d->inotify_data->fd >= 0 && !event_origin_changed(e)) { /* So here's a problem. At the time this runs the watch descriptor might already be * invalidated, because an IN_IGNORED event might be queued right the moment we enter * the syscall. Hence, whenever we get EINVAL, ignore it entirely, since it's a very @@ -2087,7 +2471,7 @@ static int event_add_inotify_fd_internal( assert_return(e = event_resolve(e), -ENOPKG); assert_return(fd >= 0, -EBADF); assert_return(e->state != SD_EVENT_FINISHED, -ESTALE); - assert_return(!event_pid_changed(e), -ECHILD); + assert_return(!event_origin_changed(e), -ECHILD); if (!callback) callback = inotify_exit_callback; @@ -2220,7 +2604,7 @@ DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_event_source, sd_event_source, event_sou _public_ int sd_event_source_set_description(sd_event_source *s, const char *description) { assert_return(s, -EINVAL); - assert_return(!event_pid_changed(s->event), -ECHILD); + assert_return(!event_origin_changed(s->event), -ECHILD); return free_and_strdup(&s->description, description); } @@ -2228,7 +2612,6 @@ _public_ int sd_event_source_set_description(sd_event_source *s, const char *des _public_ int sd_event_source_get_description(sd_event_source *s, const char **description) { assert_return(s, -EINVAL); assert_return(description, -EINVAL); - assert_return(!event_pid_changed(s->event), -ECHILD); if (!s->description) return -ENXIO; @@ -2239,6 +2622,7 @@ _public_ int sd_event_source_get_description(sd_event_source *s, const char **de _public_ sd_event *sd_event_source_get_event(sd_event_source *s) { assert_return(s, NULL); + assert_return(!event_origin_changed(s->event), NULL); return s->event; } @@ -2247,7 +2631,7 @@ _public_ int sd_event_source_get_pending(sd_event_source *s) { assert_return(s, -EINVAL); assert_return(s->type != SOURCE_EXIT, -EDOM); assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE); - assert_return(!event_pid_changed(s->event), -ECHILD); + assert_return(!event_origin_changed(s->event), -ECHILD); return s->pending; } @@ -2255,7 +2639,7 @@ _public_ int sd_event_source_get_pending(sd_event_source *s) { _public_ int sd_event_source_get_io_fd(sd_event_source *s) { assert_return(s, -EINVAL); assert_return(s->type == SOURCE_IO, -EDOM); - assert_return(!event_pid_changed(s->event), -ECHILD); + assert_return(!event_origin_changed(s->event), -ECHILD); return s->io.fd; } @@ -2266,7 +2650,7 @@ _public_ int sd_event_source_set_io_fd(sd_event_source *s, int fd) { assert_return(s, -EINVAL); assert_return(fd >= 0, -EBADF); assert_return(s->type == SOURCE_IO, -EDOM); - assert_return(!event_pid_changed(s->event), -ECHILD); + assert_return(!event_origin_changed(s->event), -ECHILD); if (s->io.fd == fd) return 0; @@ -2299,6 +2683,7 @@ _public_ int sd_event_source_set_io_fd(sd_event_source *s, int fd) { _public_ int sd_event_source_get_io_fd_own(sd_event_source *s) { assert_return(s, -EINVAL); assert_return(s->type == SOURCE_IO, -EDOM); + assert_return(!event_origin_changed(s->event), -ECHILD); return s->io.owned; } @@ -2306,6 +2691,7 @@ _public_ int sd_event_source_get_io_fd_own(sd_event_source *s) { _public_ int sd_event_source_set_io_fd_own(sd_event_source *s, int own) { assert_return(s, -EINVAL); assert_return(s->type == SOURCE_IO, -EDOM); + assert_return(!event_origin_changed(s->event), -ECHILD); s->io.owned = own; return 0; @@ -2315,7 +2701,7 @@ _public_ int sd_event_source_get_io_events(sd_event_source *s, uint32_t* events) assert_return(s, -EINVAL); assert_return(events, -EINVAL); assert_return(s->type == SOURCE_IO, -EDOM); - assert_return(!event_pid_changed(s->event), -ECHILD); + assert_return(!event_origin_changed(s->event), -ECHILD); *events = s->io.events; return 0; @@ -2328,7 +2714,7 @@ _public_ int sd_event_source_set_io_events(sd_event_source *s, uint32_t events) assert_return(s->type == SOURCE_IO, -EDOM); assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL); assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE); - assert_return(!event_pid_changed(s->event), -ECHILD); + assert_return(!event_origin_changed(s->event), -ECHILD); /* edge-triggered updates are never skipped, so we can reset edges */ if (s->io.events == events && !(events & EPOLLET)) @@ -2354,7 +2740,7 @@ _public_ int sd_event_source_get_io_revents(sd_event_source *s, uint32_t* revent assert_return(revents, -EINVAL); assert_return(s->type == SOURCE_IO, -EDOM); assert_return(s->pending, -ENODATA); - assert_return(!event_pid_changed(s->event), -ECHILD); + assert_return(!event_origin_changed(s->event), -ECHILD); *revents = s->io.revents; return 0; @@ -2363,14 +2749,14 @@ _public_ int sd_event_source_get_io_revents(sd_event_source *s, uint32_t* revent _public_ int sd_event_source_get_signal(sd_event_source *s) { assert_return(s, -EINVAL); assert_return(s->type == SOURCE_SIGNAL, -EDOM); - assert_return(!event_pid_changed(s->event), -ECHILD); + assert_return(!event_origin_changed(s->event), -ECHILD); return s->signal.sig; } _public_ int sd_event_source_get_priority(sd_event_source *s, int64_t *priority) { assert_return(s, -EINVAL); - assert_return(!event_pid_changed(s->event), -ECHILD); + assert_return(!event_origin_changed(s->event), -ECHILD); *priority = s->priority; return 0; @@ -2384,7 +2770,7 @@ _public_ int sd_event_source_set_priority(sd_event_source *s, int64_t priority) assert_return(s, -EINVAL); assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE); - assert_return(!event_pid_changed(s->event), -ECHILD); + assert_return(!event_origin_changed(s->event), -ECHILD); if (s->priority == priority) return 0; @@ -2484,7 +2870,7 @@ _public_ int sd_event_source_get_enabled(sd_event_source *s, int *ret) { return false; assert_return(s, -EINVAL); - assert_return(!event_pid_changed(s->event), -ECHILD); + assert_return(!event_origin_changed(s->event), -ECHILD); if (ret) *ret = s->enabled; @@ -2542,6 +2928,10 @@ static int event_source_offline( prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index); break; + case SOURCE_MEMORY_PRESSURE: + source_memory_pressure_unregister(s); + break; + case SOURCE_TIME_REALTIME: case SOURCE_TIME_BOOTTIME: case SOURCE_TIME_MONOTONIC: @@ -2629,6 +3019,13 @@ static int event_source_online( s->event->n_online_child_sources++; break; + case SOURCE_MEMORY_PRESSURE: + r = source_memory_pressure_register(s, enabled); + if (r < 0) + return r; + + break; + case SOURCE_TIME_REALTIME: case SOURCE_TIME_BOOTTIME: case SOURCE_TIME_MONOTONIC: @@ -2667,7 +3064,7 @@ _public_ int sd_event_source_set_enabled(sd_event_source *s, int m) { return 0; assert_return(s, -EINVAL); - assert_return(!event_pid_changed(s->event), -ECHILD); + assert_return(!event_origin_changed(s->event), -ECHILD); /* If we are dead anyway, we are fine with turning off sources, but everything else needs to fail. */ if (s->event->state == SD_EVENT_FINISHED) @@ -2699,7 +3096,7 @@ _public_ int sd_event_source_get_time(sd_event_source *s, uint64_t *usec) { assert_return(s, -EINVAL); assert_return(usec, -EINVAL); assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM); - assert_return(!event_pid_changed(s->event), -ECHILD); + assert_return(!event_origin_changed(s->event), -ECHILD); *usec = s->time.next; return 0; @@ -2711,7 +3108,7 @@ _public_ int sd_event_source_set_time(sd_event_source *s, uint64_t usec) { assert_return(s, -EINVAL); assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM); assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE); - assert_return(!event_pid_changed(s->event), -ECHILD); + assert_return(!event_origin_changed(s->event), -ECHILD); r = source_set_pending(s, false); if (r < 0) @@ -2729,6 +3126,7 @@ _public_ int sd_event_source_set_time_relative(sd_event_source *s, uint64_t usec assert_return(s, -EINVAL); assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM); + assert_return(!event_origin_changed(s->event), -ECHILD); if (usec == USEC_INFINITY) return sd_event_source_set_time(s, USEC_INFINITY); @@ -2748,7 +3146,7 @@ _public_ int sd_event_source_get_time_accuracy(sd_event_source *s, uint64_t *use assert_return(s, -EINVAL); assert_return(usec, -EINVAL); assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM); - assert_return(!event_pid_changed(s->event), -ECHILD); + assert_return(!event_origin_changed(s->event), -ECHILD); *usec = s->time.accuracy; return 0; @@ -2761,7 +3159,7 @@ _public_ int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec assert_return(usec != UINT64_MAX, -EINVAL); assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM); assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE); - assert_return(!event_pid_changed(s->event), -ECHILD); + assert_return(!event_origin_changed(s->event), -ECHILD); r = source_set_pending(s, false); if (r < 0) @@ -2780,7 +3178,7 @@ _public_ int sd_event_source_get_time_clock(sd_event_source *s, clockid_t *clock assert_return(s, -EINVAL); assert_return(clock, -EINVAL); assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM); - assert_return(!event_pid_changed(s->event), -ECHILD); + assert_return(!event_origin_changed(s->event), -ECHILD); *clock = event_source_type_to_clock(s->type); return 0; @@ -2790,7 +3188,7 @@ _public_ int sd_event_source_get_child_pid(sd_event_source *s, pid_t *pid) { assert_return(s, -EINVAL); assert_return(pid, -EINVAL); assert_return(s->type == SOURCE_CHILD, -EDOM); - assert_return(!event_pid_changed(s->event), -ECHILD); + assert_return(!event_origin_changed(s->event), -ECHILD); *pid = s->child.pid; return 0; @@ -2799,7 +3197,7 @@ _public_ int sd_event_source_get_child_pid(sd_event_source *s, pid_t *pid) { _public_ int sd_event_source_get_child_pidfd(sd_event_source *s) { assert_return(s, -EINVAL); assert_return(s->type == SOURCE_CHILD, -EDOM); - assert_return(!event_pid_changed(s->event), -ECHILD); + assert_return(!event_origin_changed(s->event), -ECHILD); if (s->child.pidfd < 0) return -EOPNOTSUPP; @@ -2810,7 +3208,7 @@ _public_ int sd_event_source_get_child_pidfd(sd_event_source *s) { _public_ int sd_event_source_send_child_signal(sd_event_source *s, int sig, const siginfo_t *si, unsigned flags) { assert_return(s, -EINVAL); assert_return(s->type == SOURCE_CHILD, -EDOM); - assert_return(!event_pid_changed(s->event), -ECHILD); + assert_return(!event_origin_changed(s->event), -ECHILD); assert_return(SIGNAL_VALID(sig), -EINVAL); /* If we already have seen indication the process exited refuse sending a signal early. This way we @@ -2855,6 +3253,7 @@ _public_ int sd_event_source_send_child_signal(sd_event_source *s, int sig, cons _public_ int sd_event_source_get_child_pidfd_own(sd_event_source *s) { assert_return(s, -EINVAL); assert_return(s->type == SOURCE_CHILD, -EDOM); + assert_return(!event_origin_changed(s->event), -ECHILD); if (s->child.pidfd < 0) return -EOPNOTSUPP; @@ -2865,6 +3264,7 @@ _public_ int sd_event_source_get_child_pidfd_own(sd_event_source *s) { _public_ int sd_event_source_set_child_pidfd_own(sd_event_source *s, int own) { assert_return(s, -EINVAL); assert_return(s->type == SOURCE_CHILD, -EDOM); + assert_return(!event_origin_changed(s->event), -ECHILD); if (s->child.pidfd < 0) return -EOPNOTSUPP; @@ -2876,6 +3276,7 @@ _public_ int sd_event_source_set_child_pidfd_own(sd_event_source *s, int own) { _public_ int sd_event_source_get_child_process_own(sd_event_source *s) { assert_return(s, -EINVAL); assert_return(s->type == SOURCE_CHILD, -EDOM); + assert_return(!event_origin_changed(s->event), -ECHILD); return s->child.process_owned; } @@ -2883,6 +3284,7 @@ _public_ int sd_event_source_get_child_process_own(sd_event_source *s) { _public_ int sd_event_source_set_child_process_own(sd_event_source *s, int own) { assert_return(s, -EINVAL); assert_return(s->type == SOURCE_CHILD, -EDOM); + assert_return(!event_origin_changed(s->event), -ECHILD); s->child.process_owned = own; return 0; @@ -2892,7 +3294,7 @@ _public_ int sd_event_source_get_inotify_mask(sd_event_source *s, uint32_t *mask assert_return(s, -EINVAL); assert_return(mask, -EINVAL); assert_return(s->type == SOURCE_INOTIFY, -EDOM); - assert_return(!event_pid_changed(s->event), -ECHILD); + assert_return(!event_origin_changed(s->event), -ECHILD); *mask = s->inotify.mask; return 0; @@ -2904,7 +3306,7 @@ _public_ int sd_event_source_set_prepare(sd_event_source *s, sd_event_handler_t assert_return(s, -EINVAL); assert_return(s->type != SOURCE_EXIT, -EDOM); assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE); - assert_return(!event_pid_changed(s->event), -ECHILD); + assert_return(!event_origin_changed(s->event), -ECHILD); if (s->prepare == callback) return 0; @@ -2932,6 +3334,7 @@ _public_ int sd_event_source_set_prepare(sd_event_source *s, sd_event_handler_t _public_ void* sd_event_source_get_userdata(sd_event_source *s) { assert_return(s, NULL); + assert_return(!event_origin_changed(s->event), NULL); return s->userdata; } @@ -2940,6 +3343,7 @@ _public_ void *sd_event_source_set_userdata(sd_event_source *s, void *userdata) void *ret; assert_return(s, NULL); + assert_return(!event_origin_changed(s->event), NULL); ret = s->userdata; s->userdata = userdata; @@ -3610,6 +4014,115 @@ static int process_inotify(sd_event *e) { return done; } +static int process_memory_pressure(sd_event_source *s, uint32_t revents) { + assert(s); + assert(s->type == SOURCE_MEMORY_PRESSURE); + + if (s->pending) + s->memory_pressure.revents |= revents; + else + s->memory_pressure.revents = revents; + + return source_set_pending(s, true); +} + +static int source_memory_pressure_write(sd_event_source *s) { + ssize_t n; + int r; + + assert(s); + assert(s->type == SOURCE_MEMORY_PRESSURE); + + /* once we start writing, the buffer is locked, we allow no further changes. */ + s->memory_pressure.locked = true; + + if (s->memory_pressure.write_buffer_size > 0) { + n = write(s->memory_pressure.fd, s->memory_pressure.write_buffer, s->memory_pressure.write_buffer_size); + if (n < 0) { + if (!ERRNO_IS_TRANSIENT(errno)) { + /* If kernel is built with CONFIG_PSI_DEFAULT_DISABLED it will expose PSI + * files, but then generates EOPNOSUPP on read() and write() (instead of on + * open()!). This sucks hard, since we can only detect this kind of failure + * so late. Let's make the best of it, and turn off the event source like we + * do for failed event source handlers. */ + + log_debug_errno(errno, "Writing memory pressure settings to kernel failed, disabling memory pressure event source: %m"); + assert_se(sd_event_source_set_enabled(s, SD_EVENT_OFF) >= 0); + return 0; + } + + n = 0; + } + } else + n = 0; + + assert(n >= 0); + + if ((size_t) n == s->memory_pressure.write_buffer_size) { + s->memory_pressure.write_buffer = mfree(s->memory_pressure.write_buffer); + + if (n > 0) { + s->memory_pressure.write_buffer_size = 0; + + /* Update epoll events mask, since we have now written everything and don't care for EPOLLOUT anymore */ + r = source_memory_pressure_register(s, s->enabled); + if (r < 0) + return r; + } + } else if (n > 0) { + _cleanup_free_ void *c = NULL; + + assert((size_t) n < s->memory_pressure.write_buffer_size); + + c = memdup((uint8_t*) s->memory_pressure.write_buffer + n, s->memory_pressure.write_buffer_size - n); + if (!c) + return -ENOMEM; + + free_and_replace(s->memory_pressure.write_buffer, c); + s->memory_pressure.write_buffer_size -= n; + return 1; + } + + return 0; +} + +static int source_memory_pressure_initiate_dispatch(sd_event_source *s) { + int r; + + assert(s); + assert(s->type == SOURCE_MEMORY_PRESSURE); + + r = source_memory_pressure_write(s); + if (r < 0) + return r; + if (r > 0) + return 1; /* if we wrote something, then don't continue with dispatching user dispatch + * function. Instead, shortcut it so that we wait for next EPOLLOUT immediately. */ + + /* No pending incoming IO? Then let's not continue further */ + if ((s->memory_pressure.revents & (EPOLLIN|EPOLLPRI)) == 0) { + + /* Treat IO errors on the notifier the same ways errors returned from a callback */ + if ((s->memory_pressure.revents & (EPOLLHUP|EPOLLERR|EPOLLRDHUP)) != 0) + return -EIO; + + return 1; /* leave dispatch, we already processed everything */ + } + + if (s->memory_pressure.revents & EPOLLIN) { + uint8_t pipe_buf[PIPE_BUF]; + ssize_t n; + + /* If the fd is readable, then flush out anything that might be queued */ + + n = read(s->memory_pressure.fd, pipe_buf, sizeof(pipe_buf)); + if (n < 0 && !ERRNO_IS_TRANSIENT(errno)) + return -errno; + } + + return 0; /* go on, dispatch to user callback */ +} + static int source_dispatch(sd_event_source *s) { EventSourceType saved_type; sd_event *saved_event; @@ -3658,6 +4171,16 @@ static int source_dispatch(sd_event_source *s) { } } + if (s->type == SOURCE_MEMORY_PRESSURE) { + r = source_memory_pressure_initiate_dispatch(s); + if (r == -EIO) /* handle EIO errors similar to callback errors */ + goto finish; + if (r < 0) + return r; + if (r > 0) /* already handled */ + return 1; + } + if (s->enabled == SD_EVENT_ONESHOT) { r = sd_event_source_set_enabled(s, SD_EVENT_OFF); if (r < 0) @@ -3744,6 +4267,10 @@ static int source_dispatch(sd_event_source *s) { break; } + case SOURCE_MEMORY_PRESSURE: + r = s->memory_pressure.callback(s, s->userdata); + break; + case SOURCE_WATCHDOG: case _SOURCE_EVENT_SOURCE_TYPE_MAX: case _SOURCE_EVENT_SOURCE_TYPE_INVALID: @@ -3752,6 +4279,7 @@ static int source_dispatch(sd_event_source *s) { s->dispatching = false; +finish: if (r < 0) { log_debug_errno(r, "Event source %s (type %s) returned error, %s: %m", strna(s->description), @@ -3902,12 +4430,36 @@ static void event_close_inode_data_fds(sd_event *e) { } } +static int event_memory_pressure_write_list(sd_event *e) { + int r; + + assert(e); + + for (;;) { + sd_event_source *s; + + s = LIST_POP(memory_pressure.write_list, e->memory_pressure_write_list); + if (!s) + break; + + assert(s->type == SOURCE_MEMORY_PRESSURE); + assert(s->memory_pressure.write_buffer_size > 0); + s->memory_pressure.in_write_list = false; + + r = source_memory_pressure_write(s); + if (r < 0) + return r; + } + + return 0; +} + _public_ int sd_event_prepare(sd_event *e) { int r; assert_return(e, -EINVAL); assert_return(e = event_resolve(e), -ENOPKG); - assert_return(!event_pid_changed(e), -ECHILD); + assert_return(!event_origin_changed(e), -ECHILD); assert_return(e->state != SD_EVENT_FINISHED, -ESTALE); assert_return(e->state == SD_EVENT_INITIAL, -EBUSY); @@ -3930,6 +4482,10 @@ _public_ int sd_event_prepare(sd_event *e) { if (r < 0) return r; + r = event_memory_pressure_write_list(e); + if (r < 0) + return r; + r = event_arm_timer(e, &e->realtime); if (r < 0) return r; @@ -4095,6 +4651,10 @@ static int process_epoll(sd_event *e, usec_t timeout, int64_t threshold, int64_t r = process_pidfd(e, s, e->event_queue[i].events); break; + case SOURCE_MEMORY_PRESSURE: + r = process_memory_pressure(s, e->event_queue[i].events); + break; + default: assert_not_reached(); } @@ -4138,7 +4698,7 @@ _public_ int sd_event_wait(sd_event *e, uint64_t timeout) { assert_return(e, -EINVAL); assert_return(e = event_resolve(e), -ENOPKG); - assert_return(!event_pid_changed(e), -ECHILD); + assert_return(!event_origin_changed(e), -ECHILD); assert_return(e->state != SD_EVENT_FINISHED, -ESTALE); assert_return(e->state == SD_EVENT_ARMED, -EBUSY); @@ -4241,7 +4801,7 @@ _public_ int sd_event_dispatch(sd_event *e) { assert_return(e, -EINVAL); assert_return(e = event_resolve(e), -ENOPKG); - assert_return(!event_pid_changed(e), -ECHILD); + assert_return(!event_origin_changed(e), -ECHILD); assert_return(e->state != SD_EVENT_FINISHED, -ESTALE); assert_return(e->state == SD_EVENT_PENDING, -EBUSY); @@ -4281,7 +4841,7 @@ _public_ int sd_event_run(sd_event *e, uint64_t timeout) { assert_return(e, -EINVAL); assert_return(e = event_resolve(e), -ENOPKG); - assert_return(!event_pid_changed(e), -ECHILD); + assert_return(!event_origin_changed(e), -ECHILD); assert_return(e->state != SD_EVENT_FINISHED, -ESTALE); assert_return(e->state == SD_EVENT_INITIAL, -EBUSY); @@ -4329,9 +4889,10 @@ _public_ int sd_event_loop(sd_event *e) { assert_return(e, -EINVAL); assert_return(e = event_resolve(e), -ENOPKG); - assert_return(!event_pid_changed(e), -ECHILD); + assert_return(!event_origin_changed(e), -ECHILD); assert_return(e->state == SD_EVENT_INITIAL, -EBUSY); + PROTECT_EVENT(e); while (e->state != SD_EVENT_FINISHED) { @@ -4346,7 +4907,7 @@ _public_ int sd_event_loop(sd_event *e) { _public_ int sd_event_get_fd(sd_event *e) { assert_return(e, -EINVAL); assert_return(e = event_resolve(e), -ENOPKG); - assert_return(!event_pid_changed(e), -ECHILD); + assert_return(!event_origin_changed(e), -ECHILD); return e->epoll_fd; } @@ -4354,7 +4915,7 @@ _public_ int sd_event_get_fd(sd_event *e) { _public_ int sd_event_get_state(sd_event *e) { assert_return(e, -EINVAL); assert_return(e = event_resolve(e), -ENOPKG); - assert_return(!event_pid_changed(e), -ECHILD); + assert_return(!event_origin_changed(e), -ECHILD); return e->state; } @@ -4363,7 +4924,7 @@ _public_ int sd_event_get_exit_code(sd_event *e, int *code) { assert_return(e, -EINVAL); assert_return(e = event_resolve(e), -ENOPKG); assert_return(code, -EINVAL); - assert_return(!event_pid_changed(e), -ECHILD); + assert_return(!event_origin_changed(e), -ECHILD); if (!e->exit_requested) return -ENODATA; @@ -4376,7 +4937,7 @@ _public_ int sd_event_exit(sd_event *e, int code) { assert_return(e, -EINVAL); assert_return(e = event_resolve(e), -ENOPKG); assert_return(e->state != SD_EVENT_FINISHED, -ESTALE); - assert_return(!event_pid_changed(e), -ECHILD); + assert_return(!event_origin_changed(e), -ECHILD); e->exit_requested = true; e->exit_code = code; @@ -4388,7 +4949,7 @@ _public_ int sd_event_now(sd_event *e, clockid_t clock, uint64_t *usec) { assert_return(e, -EINVAL); assert_return(e = event_resolve(e), -ENOPKG); assert_return(usec, -EINVAL); - assert_return(!event_pid_changed(e), -ECHILD); + assert_return(!event_origin_changed(e), -ECHILD); if (!TRIPLE_TIMESTAMP_HAS_CLOCK(clock)) return -EOPNOTSUPP; @@ -4432,7 +4993,7 @@ _public_ int sd_event_get_tid(sd_event *e, pid_t *tid) { assert_return(e, -EINVAL); assert_return(e = event_resolve(e), -ENOPKG); assert_return(tid, -EINVAL); - assert_return(!event_pid_changed(e), -ECHILD); + assert_return(!event_origin_changed(e), -ECHILD); if (e->tid != 0) { *tid = e->tid; @@ -4447,7 +5008,7 @@ _public_ int sd_event_set_watchdog(sd_event *e, int b) { assert_return(e, -EINVAL); assert_return(e = event_resolve(e), -ENOPKG); - assert_return(!event_pid_changed(e), -ECHILD); + assert_return(!event_origin_changed(e), -ECHILD); if (e->watchdog == !!b) return e->watchdog; @@ -4497,7 +5058,7 @@ fail: _public_ int sd_event_get_watchdog(sd_event *e) { assert_return(e, -EINVAL); assert_return(e = event_resolve(e), -ENOPKG); - assert_return(!event_pid_changed(e), -ECHILD); + assert_return(!event_origin_changed(e), -ECHILD); return e->watchdog; } @@ -4505,7 +5066,7 @@ _public_ int sd_event_get_watchdog(sd_event *e) { _public_ int sd_event_get_iteration(sd_event *e, uint64_t *ret) { assert_return(e, -EINVAL); assert_return(e = event_resolve(e), -ENOPKG); - assert_return(!event_pid_changed(e), -ECHILD); + assert_return(!event_origin_changed(e), -ECHILD); *ret = e->iteration; return 0; @@ -4513,6 +5074,8 @@ _public_ int sd_event_get_iteration(sd_event *e, uint64_t *ret) { _public_ int sd_event_source_set_destroy_callback(sd_event_source *s, sd_event_destroy_t callback) { assert_return(s, -EINVAL); + assert_return(s->event, -EINVAL); + assert_return(!event_origin_changed(s->event), -ECHILD); s->destroy_callback = callback; return 0; @@ -4520,6 +5083,7 @@ _public_ int sd_event_source_set_destroy_callback(sd_event_source *s, sd_event_d _public_ int sd_event_source_get_destroy_callback(sd_event_source *s, sd_event_destroy_t *ret) { assert_return(s, -EINVAL); + assert_return(!event_origin_changed(s->event), -ECHILD); if (ret) *ret = s->destroy_callback; @@ -4529,6 +5093,7 @@ _public_ int sd_event_source_get_destroy_callback(sd_event_source *s, sd_event_d _public_ int sd_event_source_get_floating(sd_event_source *s) { assert_return(s, -EINVAL); + assert_return(!event_origin_changed(s->event), -ECHILD); return s->floating; } @@ -4536,6 +5101,7 @@ _public_ int sd_event_source_get_floating(sd_event_source *s) { _public_ int sd_event_source_set_floating(sd_event_source *s, int b) { assert_return(s, -EINVAL); + assert_return(!event_origin_changed(s->event), -ECHILD); if (s->floating == !!b) return 0; @@ -4560,6 +5126,7 @@ _public_ int sd_event_source_set_floating(sd_event_source *s, int b) { _public_ int sd_event_source_get_exit_on_failure(sd_event_source *s) { assert_return(s, -EINVAL); assert_return(s->type != SOURCE_EXIT, -EDOM); + assert_return(!event_origin_changed(s->event), -ECHILD); return s->exit_on_failure; } @@ -4567,6 +5134,7 @@ _public_ int sd_event_source_get_exit_on_failure(sd_event_source *s) { _public_ int sd_event_source_set_exit_on_failure(sd_event_source *s, int b) { assert_return(s, -EINVAL); assert_return(s->type != SOURCE_EXIT, -EDOM); + assert_return(!event_origin_changed(s->event), -ECHILD); if (s->exit_on_failure == !!b) return 0; @@ -4579,6 +5147,7 @@ _public_ int sd_event_source_set_ratelimit(sd_event_source *s, uint64_t interval int r; assert_return(s, -EINVAL); + assert_return(!event_origin_changed(s->event), -ECHILD); /* Turning on ratelimiting on event source types that don't support it, is a loggable offense. Doing * so is a programming error. */ @@ -4596,6 +5165,7 @@ _public_ int sd_event_source_set_ratelimit(sd_event_source *s, uint64_t interval _public_ int sd_event_source_set_ratelimit_expire_callback(sd_event_source *s, sd_event_handler_t callback) { assert_return(s, -EINVAL); + assert_return(!event_origin_changed(s->event), -ECHILD); s->ratelimit_expire_callback = callback; return 0; @@ -4603,6 +5173,7 @@ _public_ int sd_event_source_set_ratelimit_expire_callback(sd_event_source *s, s _public_ int sd_event_source_get_ratelimit(sd_event_source *s, uint64_t *ret_interval, unsigned *ret_burst) { assert_return(s, -EINVAL); + assert_return(!event_origin_changed(s->event), -ECHILD); /* Querying whether an event source has ratelimiting configured is not a loggable offense, hence * don't use assert_return(). Unlike turning on ratelimiting it's not really a programming error. */ @@ -4622,6 +5193,7 @@ _public_ int sd_event_source_get_ratelimit(sd_event_source *s, uint64_t *ret_int _public_ int sd_event_source_is_ratelimited(sd_event_source *s) { assert_return(s, -EINVAL); + assert_return(!event_origin_changed(s->event), -ECHILD); if (!EVENT_SOURCE_CAN_RATE_LIMIT(s->type)) return false; @@ -4632,6 +5204,27 @@ _public_ int sd_event_source_is_ratelimited(sd_event_source *s) { return s->ratelimited; } +_public_ int sd_event_source_leave_ratelimit(sd_event_source *s) { + int r; + + assert_return(s, -EINVAL); + + if (!EVENT_SOURCE_CAN_RATE_LIMIT(s->type)) + return 0; + + if (!ratelimit_configured(&s->rate_limit)) + return 0; + + if (!s->ratelimited) + return 0; + + r = event_source_leave_ratelimit(s, /* run_callback */ false); + if (r < 0) + return r; + + return 1; /* tell caller that we indeed just left the ratelimit state */ +} + _public_ int sd_event_set_signal_exit(sd_event *e, int b) { bool change = false; int r; @@ -4683,4 +5276,94 @@ _public_ int sd_event_set_signal_exit(sd_event *e, int b) { return change; } + +_public_ int sd_event_source_set_memory_pressure_type(sd_event_source *s, const char *ty) { + _cleanup_free_ char *b = NULL; + _cleanup_free_ void *w = NULL; + + assert_return(s, -EINVAL); + assert_return(s->type == SOURCE_MEMORY_PRESSURE, -EDOM); + assert_return(ty, -EINVAL); + assert_return(!event_origin_changed(s->event), -ECHILD); + + if (!STR_IN_SET(ty, "some", "full")) + return -EINVAL; + + if (s->memory_pressure.locked) /* Refuse adjusting parameters, if caller told us how to watch for events */ + return -EBUSY; + + char* space = memchr(s->memory_pressure.write_buffer, ' ', s->memory_pressure.write_buffer_size); + if (!space) + return -EINVAL; + + size_t l = (char*) space - (char*) s->memory_pressure.write_buffer; + b = memdup_suffix0(s->memory_pressure.write_buffer, l); + if (!b) + return -ENOMEM; + if (!STR_IN_SET(b, "some", "full")) + return -EINVAL; + + if (streq(b, ty)) + return 0; + + size_t nl = strlen(ty) + (s->memory_pressure.write_buffer_size - l); + w = new(char, nl); + if (!w) + return -ENOMEM; + + memcpy(stpcpy(w, ty), space, (s->memory_pressure.write_buffer_size - l)); + + free_and_replace(s->memory_pressure.write_buffer, w); + s->memory_pressure.write_buffer_size = nl; + s->memory_pressure.locked = false; + + return 1; +} + +_public_ int sd_event_source_set_memory_pressure_period(sd_event_source *s, uint64_t threshold_usec, uint64_t window_usec) { + _cleanup_free_ char *b = NULL; + _cleanup_free_ void *w = NULL; + + assert_return(s, -EINVAL); + assert_return(s->type == SOURCE_MEMORY_PRESSURE, -EDOM); + assert_return(!event_origin_changed(s->event), -ECHILD); + + if (threshold_usec <= 0 || threshold_usec >= UINT64_MAX) + return -ERANGE; + if (window_usec <= 0 || window_usec >= UINT64_MAX) + return -ERANGE; + if (threshold_usec > window_usec) + return -EINVAL; + + if (s->memory_pressure.locked) /* Refuse adjusting parameters, if caller told us how to watch for events */ + return -EBUSY; + + char* space = memchr(s->memory_pressure.write_buffer, ' ', s->memory_pressure.write_buffer_size); + if (!space) + return -EINVAL; + + size_t l = (char*) space - (char*) s->memory_pressure.write_buffer; + b = memdup_suffix0(s->memory_pressure.write_buffer, l); + if (!b) + return -ENOMEM; + if (!STR_IN_SET(b, "some", "full")) + return -EINVAL; + + if (asprintf((char**) &w, + "%s " USEC_FMT " " USEC_FMT "", + b, + threshold_usec, + window_usec) < 0) + return -EINVAL; + + l = strlen(w) + 1; + if (memcmp_nn(s->memory_pressure.write_buffer, s->memory_pressure.write_buffer_size, w, l) == 0) + return 0; + + free_and_replace(s->memory_pressure.write_buffer, w); + s->memory_pressure.write_buffer_size = l; + s->memory_pressure.locked = false; + + return 1; +} #endif /* NM_IGNORED */ diff --git a/src/libnm-systemd-core/src/libsystemd/sd-id128/id128-util.c b/src/libnm-systemd-core/src/libsystemd/sd-id128/id128-util.c index 82564970ac..c20f7325ad 100644 --- a/src/libnm-systemd-core/src/libsystemd/sd-id128/id128-util.c +++ b/src/libnm-systemd-core/src/libsystemd/sd-id128/id128-util.c @@ -7,6 +7,7 @@ #include <unistd.h> #include "fd-util.h" +#include "fs-util.h" #include "hexdecoct.h" #include "id128-util.h" #include "io-util.h" @@ -44,8 +45,9 @@ bool id128_is_valid(const char *s) { } #endif /* NM_IGNORED */ -int id128_read_fd(int fd, Id128FormatFlag f, sd_id128_t *ret) { +int id128_read_fd(int fd, Id128Flag f, sd_id128_t *ret) { char buffer[SD_ID128_UUID_STRING_MAX + 1]; /* +1 is for trailing newline */ + sd_id128_t id; ssize_t l; int r; @@ -101,22 +103,35 @@ int id128_read_fd(int fd, Id128FormatFlag f, sd_id128_t *ret) { return -EUCLEAN; } - r = sd_id128_from_string(buffer, ret); - return r == -EINVAL ? -EUCLEAN : r; + r = sd_id128_from_string(buffer, &id); + if (r == -EINVAL) + return -EUCLEAN; + if (r < 0) + return r; + + if (FLAGS_SET(f, ID128_REFUSE_NULL) && sd_id128_is_null(id)) + return -ENOMEDIUM; + + if (ret) + *ret = id; + return 0; } -int id128_read(const char *p, Id128FormatFlag f, sd_id128_t *ret) { +int id128_read_at(int dir_fd, const char *path, Id128Flag f, sd_id128_t *ret) { _cleanup_close_ int fd = -EBADF; - fd = open(p, O_RDONLY|O_CLOEXEC|O_NOCTTY); + assert(dir_fd >= 0 || dir_fd == AT_FDCWD); + assert(path); + + fd = xopenat(dir_fd, path, O_RDONLY|O_CLOEXEC|O_NOCTTY, /* xopen_flags = */ 0, /* mode = */ 0); if (fd < 0) - return -errno; + return fd; return id128_read_fd(fd, f, ret); } #if 0 /* NM_IGNORED */ -int id128_write_fd(int fd, Id128FormatFlag f, sd_id128_t id) { +int id128_write_fd(int fd, Id128Flag f, sd_id128_t id) { char buffer[SD_ID128_UUID_STRING_MAX + 1]; /* +1 is for trailing newline */ size_t sz; int r; @@ -124,6 +139,9 @@ int id128_write_fd(int fd, Id128FormatFlag f, sd_id128_t id) { assert(fd >= 0); assert(IN_SET((f & ID128_FORMAT_ANY), ID128_FORMAT_PLAIN, ID128_FORMAT_UUID)); + if (FLAGS_SET(f, ID128_REFUSE_NULL) && sd_id128_is_null(id)) + return -ENOMEDIUM; + if (FLAGS_SET(f, ID128_FORMAT_PLAIN)) { assert_se(sd_id128_to_string(id, buffer)); sz = SD_ID128_STRING_MAX; @@ -146,12 +164,15 @@ int id128_write_fd(int fd, Id128FormatFlag f, sd_id128_t id) { return 0; } -int id128_write(const char *p, Id128FormatFlag f, sd_id128_t id) { +int id128_write_at(int dir_fd, const char *path, Id128Flag f, sd_id128_t id) { _cleanup_close_ int fd = -EBADF; - fd = open(p, O_WRONLY|O_CREAT|O_CLOEXEC|O_NOCTTY|O_TRUNC, 0444); + assert(dir_fd >= 0 || dir_fd == AT_FDCWD); + assert(path); + + fd = xopenat(dir_fd, path, O_WRONLY|O_CREAT|O_CLOEXEC|O_NOCTTY|O_TRUNC, /* xopen_flags = */ 0, 0444); if (fd < 0) - return -errno; + return fd; return id128_write_fd(fd, f, id); } diff --git a/src/libnm-systemd-core/src/libsystemd/sd-id128/id128-util.h b/src/libnm-systemd-core/src/libsystemd/sd-id128/id128-util.h index e094de6441..7bcbd8e558 100644 --- a/src/libnm-systemd-core/src/libsystemd/sd-id128/id128-util.h +++ b/src/libnm-systemd-core/src/libsystemd/sd-id128/id128-util.h @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: LGPL-2.1-or-later */ #pragma once +#include <fcntl.h> #include <stdbool.h> #include "sd-id128.h" @@ -10,19 +11,29 @@ bool id128_is_valid(const char *s) _pure_; -typedef enum Id128FormatFlag { - ID128_FORMAT_PLAIN = 1 << 0, /* formatted as 32 hex chars as-is */ - ID128_FORMAT_UUID = 1 << 1, /* formatted as 36 character uuid string */ - ID128_FORMAT_ANY = ID128_FORMAT_PLAIN | ID128_FORMAT_UUID, +typedef enum Id128Flag { + ID128_FORMAT_PLAIN = 1 << 0, /* formatted as 32 hex chars as-is */ + ID128_FORMAT_UUID = 1 << 1, /* formatted as 36 character uuid string */ + ID128_FORMAT_ANY = ID128_FORMAT_PLAIN | ID128_FORMAT_UUID, ID128_SYNC_ON_WRITE = 1 << 2, /* Sync the file after write. Used only when writing an ID. */ -} Id128FormatFlag; - -int id128_read_fd(int fd, Id128FormatFlag f, sd_id128_t *ret); -int id128_read(const char *p, Id128FormatFlag f, sd_id128_t *ret); - -int id128_write_fd(int fd, Id128FormatFlag f, sd_id128_t id); -int id128_write(const char *p, Id128FormatFlag f, sd_id128_t id); + ID128_REFUSE_NULL = 1 << 3, /* Refuse all zero ID with -ENOMEDIUM. */ +} Id128Flag; + +int id128_read_fd(int fd, Id128Flag f, sd_id128_t *ret); +int id128_read_at(int dir_fd, const char *path, Id128Flag f, sd_id128_t *ret); +static inline int id128_read(const char *path, Id128Flag f, sd_id128_t *ret) { + return id128_read_at(AT_FDCWD, path, f, ret); +} + +int id128_write_fd(int fd, Id128Flag f, sd_id128_t id); +int id128_write_at(int dir_fd, const char *path, Id128Flag f, sd_id128_t id); +static inline int id128_write(const char *path, Id128Flag f, sd_id128_t id) { + return id128_write_at(AT_FDCWD, path, f, id); +} + +int id128_get_machine(const char *root, sd_id128_t *ret); +int id128_get_machine_at(int rfd, sd_id128_t *ret); void id128_hash_func(const sd_id128_t *p, struct siphash *state); int id128_compare_func(const sd_id128_t *a, const sd_id128_t *b) _pure_; diff --git a/src/libnm-systemd-core/src/libsystemd/sd-id128/sd-id128.c b/src/libnm-systemd-core/src/libsystemd/sd-id128/sd-id128.c index b2283efcd0..c63e1a973f 100644 --- a/src/libnm-systemd-core/src/libsystemd/sd-id128/sd-id128.c +++ b/src/libnm-systemd-core/src/libsystemd/sd-id128/sd-id128.c @@ -9,6 +9,7 @@ #include "sd-id128.h" #include "alloc-util.h" +#include "chase.h" #include "fd-util.h" #include "hexdecoct.h" #include "hmac.h" @@ -16,6 +17,8 @@ #include "io-util.h" #include "macro.h" #include "missing_syscall.h" +#include "missing_threads.h" +#include "path-util.h" #include "random-util.h" #include "stat-util.h" #include "user-util.h" @@ -129,12 +132,9 @@ _public_ int sd_id128_get_machine(sd_id128_t *ret) { int r; if (sd_id128_is_null(saved_machine_id)) { - r = id128_read("/etc/machine-id", ID128_FORMAT_PLAIN, &saved_machine_id); + r = id128_read("/etc/machine-id", ID128_FORMAT_PLAIN | ID128_REFUSE_NULL, &saved_machine_id); if (r < 0) return r; - - if (sd_id128_is_null(saved_machine_id)) - return -ENOMEDIUM; } if (ret) @@ -142,19 +142,50 @@ _public_ int sd_id128_get_machine(sd_id128_t *ret) { return 0; } +#if 0 /* NM_IGNORED */ +int id128_get_machine_at(int rfd, sd_id128_t *ret) { + _cleanup_close_ int fd = -EBADF; + int r; + + assert(rfd >= 0 || rfd == AT_FDCWD); + + r = dir_fd_is_root_or_cwd(rfd); + if (r < 0) + return r; + if (r > 0) + return sd_id128_get_machine(ret); + + fd = chase_and_openat(rfd, "/etc/machine-id", CHASE_AT_RESOLVE_IN_ROOT, O_RDONLY|O_CLOEXEC|O_NOCTTY, NULL); + if (fd < 0) + return fd; + + return id128_read_fd(fd, ID128_FORMAT_PLAIN | ID128_REFUSE_NULL, ret); +} + +int id128_get_machine(const char *root, sd_id128_t *ret) { + _cleanup_close_ int fd = -EBADF; + + if (empty_or_root(root)) + return sd_id128_get_machine(ret); + + fd = chase_and_open("/etc/machine-id", root, CHASE_PREFIX_ROOT, O_RDONLY|O_CLOEXEC|O_NOCTTY, NULL); + if (fd < 0) + return fd; + + return id128_read_fd(fd, ID128_FORMAT_PLAIN | ID128_REFUSE_NULL, ret); +} +#endif /* NM_IGNORED */ + _public_ int sd_id128_get_boot(sd_id128_t *ret) { static thread_local sd_id128_t saved_boot_id = {}; int r; if (sd_id128_is_null(saved_boot_id)) { - r = id128_read("/proc/sys/kernel/random/boot_id", ID128_FORMAT_UUID, &saved_boot_id); + r = id128_read("/proc/sys/kernel/random/boot_id", ID128_FORMAT_UUID | ID128_REFUSE_NULL, &saved_boot_id); if (r == -ENOENT && proc_mounted() == 0) return -ENOSYS; if (r < 0) return r; - - if (sd_id128_is_null(saved_boot_id)) - return -ENOMEDIUM; } if (ret) diff --git a/src/libnm-systemd-core/src/systemd/sd-event.h b/src/libnm-systemd-core/src/systemd/sd-event.h index cae4c8672a..49d6975967 100644 --- a/src/libnm-systemd-core/src/systemd/sd-event.h +++ b/src/libnm-systemd-core/src/systemd/sd-event.h @@ -99,6 +99,7 @@ int sd_event_add_inotify_fd(sd_event *e, sd_event_source **s, int fd, uint32_t m int sd_event_add_defer(sd_event *e, sd_event_source **s, sd_event_handler_t callback, void *userdata); int sd_event_add_post(sd_event *e, sd_event_source **s, sd_event_handler_t callback, void *userdata); int sd_event_add_exit(sd_event *e, sd_event_source **s, sd_event_handler_t callback, void *userdata); +int sd_event_add_memory_pressure(sd_event *e, sd_event_source **s, sd_event_handler_t callback, void *userdata); int sd_event_prepare(sd_event *e); int sd_event_wait(sd_event *e, uint64_t usec); @@ -160,6 +161,8 @@ int sd_event_source_send_child_signal(sd_event_source *s, int sig, const siginfo int sd_event_source_send_child_signal(sd_event_source *s, int sig, const void *si, unsigned flags); #endif int sd_event_source_get_inotify_mask(sd_event_source *s, uint32_t *ret); +int sd_event_source_set_memory_pressure_type(sd_event_source *e, const char *ty); +int sd_event_source_set_memory_pressure_period(sd_event_source *s, uint64_t threshold_usec, uint64_t window_usec); int sd_event_source_set_destroy_callback(sd_event_source *s, sd_event_destroy_t callback); int sd_event_source_get_destroy_callback(sd_event_source *s, sd_event_destroy_t *ret); int sd_event_source_get_floating(sd_event_source *s); @@ -170,6 +173,9 @@ int sd_event_source_set_ratelimit(sd_event_source *s, uint64_t interval_usec, un int sd_event_source_get_ratelimit(sd_event_source *s, uint64_t *ret_interval_usec, unsigned *ret_burst); int sd_event_source_is_ratelimited(sd_event_source *s); int sd_event_source_set_ratelimit_expire_callback(sd_event_source *s, sd_event_handler_t callback); +int sd_event_source_leave_ratelimit(sd_event_source *s); + +int sd_event_trim_memory(void); /* Define helpers so that __attribute__((cleanup(sd_event_unrefp))) and similar may be used. */ _SD_DEFINE_POINTER_CLEANUP_FUNC(sd_event, sd_event_unref); diff --git a/src/libnm-systemd-shared/meson.build b/src/libnm-systemd-shared/meson.build index 26c21b5755..b32bd7f6e0 100644 --- a/src/libnm-systemd-shared/meson.build +++ b/src/libnm-systemd-shared/meson.build @@ -22,6 +22,7 @@ libnm_systemd_shared = static_library( 'src/basic/in-addr-util.c', 'src/basic/inotify-util.c', 'src/basic/io-util.c', + 'src/basic/label.c', 'src/basic/locale-util.c', 'src/basic/memory-util.c', 'src/basic/mempool.c', diff --git a/src/libnm-systemd-shared/sd-adapt-shared/chase.h b/src/libnm-systemd-shared/sd-adapt-shared/chase.h new file mode 100644 index 0000000000..637892c2d6 --- /dev/null +++ b/src/libnm-systemd-shared/sd-adapt-shared/chase.h @@ -0,0 +1,3 @@ +#pragma once + +/* dummy header */ diff --git a/src/libnm-systemd-shared/sd-adapt-shared/mallinfo-util.h b/src/libnm-systemd-shared/sd-adapt-shared/mallinfo-util.h new file mode 100644 index 0000000000..637892c2d6 --- /dev/null +++ b/src/libnm-systemd-shared/sd-adapt-shared/mallinfo-util.h @@ -0,0 +1,3 @@ +#pragma once + +/* dummy header */ diff --git a/src/libnm-systemd-shared/sd-adapt-shared/memstream-util.h b/src/libnm-systemd-shared/sd-adapt-shared/memstream-util.h new file mode 100644 index 0000000000..637892c2d6 --- /dev/null +++ b/src/libnm-systemd-shared/sd-adapt-shared/memstream-util.h @@ -0,0 +1,3 @@ +#pragma once + +/* dummy header */ diff --git a/src/libnm-systemd-shared/sd-adapt-shared/nm-sd-adapt-shared.h b/src/libnm-systemd-shared/sd-adapt-shared/nm-sd-adapt-shared.h index a9471b98ea..de6edd0215 100644 --- a/src/libnm-systemd-shared/sd-adapt-shared/nm-sd-adapt-shared.h +++ b/src/libnm-systemd-shared/sd-adapt-shared/nm-sd-adapt-shared.h @@ -28,9 +28,7 @@ /*****************************************************************************/ -#ifndef VALGRIND -#define VALGRIND 0 -#endif +#define HAVE_VALGRIND_VALGRIND_H 0 #define ENABLE_DEBUG_HASHMAP 0 diff --git a/src/libnm-systemd-shared/sd-adapt-shared/psi-util.h b/src/libnm-systemd-shared/sd-adapt-shared/psi-util.h new file mode 100644 index 0000000000..637892c2d6 --- /dev/null +++ b/src/libnm-systemd-shared/sd-adapt-shared/psi-util.h @@ -0,0 +1,3 @@ +#pragma once + +/* dummy header */ diff --git a/src/libnm-systemd-shared/src/basic/alloc-util.h b/src/libnm-systemd-shared/src/basic/alloc-util.h index bf783b15a2..9a62381df1 100644 --- a/src/libnm-systemd-shared/src/basic/alloc-util.h +++ b/src/libnm-systemd-shared/src/basic/alloc-util.h @@ -15,6 +15,7 @@ typedef void (*free_func_t)(void *p); typedef void* (*mfree_func_t)(void *p); +typedef void (*free_array_func_t)(void *p, size_t n); /* If for some reason more than 4M are allocated on the stack, let's abort immediately. It's better than * proceeding and smashing the stack limits. Note that by default RLIMIT_STACK is 8M on Linux. */ diff --git a/src/libnm-systemd-shared/src/basic/cgroup-util.h b/src/libnm-systemd-shared/src/basic/cgroup-util.h index c9aae5abf6..9b30ae0396 100644 --- a/src/libnm-systemd-shared/src/basic/cgroup-util.h +++ b/src/libnm-systemd-shared/src/basic/cgroup-util.h @@ -239,7 +239,6 @@ int cg_get_attribute_as_uint64(const char *controller, const char *path, const c /* Does a parse_boolean() on the attribute contents and sets ret accordingly */ int cg_get_attribute_as_bool(const char *controller, const char *path, const char *attribute, bool *ret); -int cg_set_access(const char *controller, const char *path, uid_t uid, gid_t gid); int cg_get_owner(const char *controller, const char *path, uid_t *ret_uid); int cg_set_xattr(const char *controller, const char *path, const char *name, const void *value, size_t size, int flags); @@ -261,6 +260,7 @@ int cg_path_get_cgroupid(const char *path, uint64_t *ret); int cg_path_get_session(const char *path, char **session); int cg_path_get_owner_uid(const char *path, uid_t *uid); int cg_path_get_unit(const char *path, char **unit); +int cg_path_get_unit_path(const char *path, char **unit); int cg_path_get_user_unit(const char *path, char **unit); int cg_path_get_machine_name(const char *path, char **machine); int cg_path_get_slice(const char *path, char **slice); @@ -279,7 +279,8 @@ int cg_pid_get_user_slice(pid_t pid, char **slice); int cg_path_decode_unit(const char *cgroup, char **unit); -char *cg_escape(const char *p); +bool cg_needs_escape(const char *p); +int cg_escape(const char *p, char **ret); char *cg_unescape(const char *p) _pure_; bool cg_controller_is_valid(const char *p); diff --git a/src/libnm-systemd-shared/src/basic/constants.h b/src/libnm-systemd-shared/src/basic/constants.h index 5d68cc6332..3f96786da9 100644 --- a/src/libnm-systemd-shared/src/basic/constants.h +++ b/src/libnm-systemd-shared/src/basic/constants.h @@ -42,6 +42,9 @@ #define DEFAULT_START_LIMIT_INTERVAL (10*USEC_PER_SEC) #define DEFAULT_START_LIMIT_BURST 5 +/* Wait for 1.5 seconds at maximum for freeze operation */ +#define FREEZE_TIMEOUT (1500 * USEC_PER_MSEC) + /* The default time after which exit-on-idle services exit. This * should be kept lower than the watchdog timeout, because otherwise * the watchdog pings will keep the loop busy. */ @@ -64,10 +67,8 @@ # define _CONF_PATHS_SPLIT_USR(n) #endif -/* Return a nulstr for a standard cascade of configuration paths, - * suitable to pass to conf_files_list_nulstr() or config_parse_many_nulstr() - * to implement drop-in directories for extending configuration - * files. */ +/* Return a nulstr for a standard cascade of configuration paths, suitable to pass to + * conf_files_list_nulstr() to implement drop-in directories for extending configuration files. */ #define CONF_PATHS_NULSTR(n) \ "/etc/" n "\0" \ "/run/" n "\0" \ diff --git a/src/libnm-systemd-shared/src/basic/env-file.c b/src/libnm-systemd-shared/src/basic/env-file.c index a639990726..db270bedce 100644 --- a/src/libnm-systemd-shared/src/basic/env-file.c +++ b/src/libnm-systemd-shared/src/basic/env-file.c @@ -332,8 +332,7 @@ static int parse_env_file_push( if (streq(key, k)) { va_end(aq); - free(*v); - *v = value; + free_and_replace(*v, value); return 1; } @@ -361,6 +360,24 @@ int parse_env_filev( return r; } +#if 0 /* NM_IGNORED */ +int parse_env_file_fdv(int fd, const char *fname, va_list ap) { + _cleanup_fclose_ FILE *f = NULL; + va_list aq; + int r; + + assert(fd >= 0); + + r = fdopen_independent(fd, "re", &f); + if (r < 0) + return r; + + va_copy(aq, ap); + r = parse_env_file_internal(f, fname, parse_env_file_push, &aq); + va_end(aq); + return r; +} + int parse_env_file_sentinel( FILE *f, const char *fname, @@ -378,31 +395,18 @@ int parse_env_file_sentinel( return r; } -#if 0 /* NM_IGNORED */ int parse_env_file_fd_sentinel( int fd, const char *fname, /* only used for logging */ ...) { - _cleanup_close_ int fd_ro = -EBADF; - _cleanup_fclose_ FILE *f = NULL; va_list ap; int r; assert(fd >= 0); - fd_ro = fd_reopen(fd, O_CLOEXEC | O_RDONLY); - if (fd_ro < 0) - return fd_ro; - - f = fdopen(fd_ro, "re"); - if (!f) - return -errno; - - TAKE_FD(fd_ro); - va_start(ap, fname); - r = parse_env_filev(f, fname, ap); + r = parse_env_file_fdv(fd, fname, ap); va_end(ap); return r; @@ -488,6 +492,7 @@ int load_env_file_pairs(FILE *f, const char *fname, char ***ret) { int r; assert(f || fname); + assert(ret); r = parse_env_file_internal(f, fname, load_env_file_push_pairs, &m); if (r < 0) @@ -497,6 +502,19 @@ int load_env_file_pairs(FILE *f, const char *fname, char ***ret) { return 0; } +int load_env_file_pairs_fd(int fd, const char *fname, char ***ret) { + _cleanup_fclose_ FILE *f = NULL; + int r; + + assert(fd >= 0); + + r = fdopen_independent(fd, "re", &f); + if (r < 0) + return r; + + return load_env_file_pairs(f, fname, ret); +} + static int merge_env_file_push( const char *filename, unsigned line, const char *key, char *value, @@ -581,14 +599,15 @@ static void write_env_var(FILE *f, const char *v) { fputc_unlocked('\n', f); } -int write_env_file(const char *fname, char **l) { +int write_env_file_at(int dir_fd, const char *fname, char **l) { _cleanup_fclose_ FILE *f = NULL; _cleanup_free_ char *p = NULL; int r; + assert(dir_fd >= 0 || dir_fd == AT_FDCWD); assert(fname); - r = fopen_temporary(fname, &f, &p); + r = fopen_temporary_at(dir_fd, fname, &f, &p); if (r < 0) return r; @@ -599,13 +618,13 @@ int write_env_file(const char *fname, char **l) { r = fflush_and_check(f); if (r >= 0) { - if (rename(p, fname) >= 0) + if (renameat(dir_fd, p, dir_fd, fname) >= 0) return 0; r = -errno; } - (void) unlink(p); + (void) unlinkat(dir_fd, p, 0); return r; } #endif /* NM_IGNORED */ diff --git a/src/libnm-systemd-shared/src/basic/env-file.h b/src/libnm-systemd-shared/src/basic/env-file.h index 2448d943cd..2465eeddf4 100644 --- a/src/libnm-systemd-shared/src/basic/env-file.h +++ b/src/libnm-systemd-shared/src/basic/env-file.h @@ -1,19 +1,25 @@ /* SPDX-License-Identifier: LGPL-2.1-or-later */ #pragma once +#include <fcntl.h> #include <stdarg.h> #include <stdio.h> #include "macro.h" int parse_env_filev(FILE *f, const char *fname, va_list ap); +int parse_env_file_fdv(int fd, const char *fname, va_list ap); int parse_env_file_sentinel(FILE *f, const char *fname, ...) _sentinel_; #define parse_env_file(f, fname, ...) parse_env_file_sentinel(f, fname, __VA_ARGS__, NULL) int parse_env_file_fd_sentinel(int fd, const char *fname, ...) _sentinel_; #define parse_env_file_fd(fd, fname, ...) parse_env_file_fd_sentinel(fd, fname, __VA_ARGS__, NULL) int load_env_file(FILE *f, const char *fname, char ***ret); int load_env_file_pairs(FILE *f, const char *fname, char ***ret); +int load_env_file_pairs_fd(int fd, const char *fname, char ***ret); int merge_env_file(char ***env, FILE *f, const char *fname); -int write_env_file(const char *fname, char **l); +int write_env_file_at(int dir_fd, const char *fname, char **l); +static inline int write_env_file(const char *fname, char **l) { + return write_env_file_at(AT_FDCWD, fname, l); +} diff --git a/src/libnm-systemd-shared/src/basic/env-util.c b/src/libnm-systemd-shared/src/basic/env-util.c index f58d86a7aa..fa2753bccb 100644 --- a/src/libnm-systemd-shared/src/basic/env-util.c +++ b/src/libnm-systemd-shared/src/basic/env-util.c @@ -139,20 +139,20 @@ bool strv_env_name_or_assignment_is_valid(char **l) { return true; } -static int env_append(char **r, char ***k, char **a) { - assert(r); +static int env_append(char **e, char ***k, char **a) { + assert(e); assert(k); - assert(*k >= r); + assert(*k >= e); if (!a) return 0; - /* Expects the following arguments: 'r' shall point to the beginning of an strv we are going to append to, 'k' + /* Expects the following arguments: 'e' shall point to the beginning of an strv we are going to append to, 'k' * to a pointer pointing to the NULL entry at the end of the same array. 'a' shall point to another strv. * - * This call adds every entry of 'a' to 'r', either overriding an existing matching entry, or appending to it. + * This call adds every entry of 'a' to 'e', either overriding an existing matching entry, or appending to it. * - * This call assumes 'r' has enough pre-allocated space to grow by all of 'a''s items. */ + * This call assumes 'e' has enough pre-allocated space to grow by all of 'a''s items. */ for (; *a; a++) { char **j, *c; @@ -162,7 +162,7 @@ static int env_append(char **r, char ***k, char **a) { if ((*a)[n] == '=') n++; - for (j = r; j < *k; j++) + for (j = e; j < *k; j++) if (strneq(*j, *a, n)) break; @@ -269,7 +269,7 @@ static bool env_entry_has_name(const char *entry, const char *name) { char **strv_env_delete(char **x, size_t n_lists, ...) { size_t n, i = 0; - char **r; + _cleanup_strv_free_ char **t = NULL; va_list ap; /* Deletes every entry from x that is mentioned in the other @@ -277,8 +277,8 @@ char **strv_env_delete(char **x, size_t n_lists, ...) { n = strv_length(x); - r = new(char*, n+1); - if (!r) + t = new(char*, n+1); + if (!t) return NULL; STRV_FOREACH(k, x) { @@ -293,11 +293,9 @@ char **strv_env_delete(char **x, size_t n_lists, ...) { } va_end(ap); - r[i] = strdup(*k); - if (!r[i]) { - strv_free(r); + t[i] = strdup(*k); + if (!t[i]) return NULL; - } i++; continue; @@ -306,11 +304,11 @@ char **strv_env_delete(char **x, size_t n_lists, ...) { va_end(ap); } - r[i] = NULL; + t[i] = NULL; assert(i <= n); - return r; + return TAKE_PTR(t); } char **strv_env_unset(char **l, const char *p) { @@ -462,6 +460,48 @@ int strv_env_assign(char ***l, const char *key, const char *value) { return strv_env_replace_consume(l, p); } +int _strv_env_assign_many(char ***l, ...) { + va_list ap; + int r; + + assert(l); + + va_start(ap, l); + for (;;) { + const char *key, *value; + + key = va_arg(ap, const char *); + if (!key) + break; + + if (!env_name_is_valid(key)) { + va_end(ap); + return -EINVAL; + } + + value = va_arg(ap, const char *); + if (!value) { + strv_env_unset(*l, key); + continue; + } + + char *p = strjoin(key, "=", value); + if (!p) { + va_end(ap); + return -ENOMEM; + } + + r = strv_env_replace_consume(l, p); + if (r < 0) { + va_end(ap); + return r; + } + } + va_end(ap); + + return 0; +} + char *strv_env_get_n(char **l, const char *name, size_t k, unsigned flags) { assert(name); @@ -488,6 +528,7 @@ char *strv_env_get(char **l, const char *name) { return strv_env_get_n(l, name, strlen(name), 0); } +#endif /* NM_IGNORED */ char *strv_env_pairs_get(char **l, const char *name) { char *result = NULL; @@ -501,6 +542,7 @@ char *strv_env_pairs_get(char **l, const char *name) { return result; } +#if 0 /* NM_IGNORED */ char **strv_env_clean_with_callback(char **e, void (*invalid_callback)(const char *p, void *userdata), void *userdata) { int k = 0; @@ -549,7 +591,7 @@ char *replace_env_n(const char *format, size_t n, char **env, unsigned flags) { const char *e, *word = format, *test_value = NULL; /* test_value is initialized to appease gcc */ char *k; - _cleanup_free_ char *r = NULL; + _cleanup_free_ char *s = NULL; size_t i, len = 0; /* len is initialized to appease gcc */ int nest = 0; @@ -565,31 +607,31 @@ char *replace_env_n(const char *format, size_t n, char **env, unsigned flags) { case CURLY: if (*e == '{') { - k = strnappend(r, word, e-word-1); + k = strnappend(s, word, e-word-1); if (!k) return NULL; - free_and_replace(r, k); + free_and_replace(s, k); word = e-1; state = VARIABLE; nest++; } else if (*e == '$') { - k = strnappend(r, word, e-word); + k = strnappend(s, word, e-word); if (!k) return NULL; - free_and_replace(r, k); + free_and_replace(s, k); word = e+1; state = WORD; } else if (flags & REPLACE_ENV_ALLOW_BRACELESS && strchr(VALID_BASH_ENV_NAME_CHARS, *e)) { - k = strnappend(r, word, e-word-1); + k = strnappend(s, word, e-word-1); if (!k) return NULL; - free_and_replace(r, k); + free_and_replace(s, k); word = e-1; state = VARIABLE_RAW; @@ -604,7 +646,7 @@ char *replace_env_n(const char *format, size_t n, char **env, unsigned flags) { t = strv_env_get_n(env, word+2, e-word-2, flags); - if (!strextend(&r, t)) + if (!strextend(&s, t)) return NULL; word = e+1; @@ -657,7 +699,7 @@ char *replace_env_n(const char *format, size_t n, char **env, unsigned flags) { else if (!t && state == DEFAULT_VALUE) t = v = replace_env_n(test_value, e-test_value, env, flags); - if (!strextend(&r, t)) + if (!strextend(&s, t)) return NULL; word = e+1; @@ -673,7 +715,7 @@ char *replace_env_n(const char *format, size_t n, char **env, unsigned flags) { t = strv_env_get_n(env, word+1, e-word-1, flags); - if (!strextend(&r, t)) + if (!strextend(&s, t)) return NULL; word = e--; @@ -689,13 +731,13 @@ char *replace_env_n(const char *format, size_t n, char **env, unsigned flags) { assert(flags & REPLACE_ENV_ALLOW_BRACELESS); t = strv_env_get_n(env, word+1, e-word-1, flags); - return strjoin(r, t); + return strjoin(s, t); } else - return strnappend(r, word, e-word); + return strnappend(s, word, e-word); } char **replace_env_argv(char **argv, char **env) { - char **ret; + _cleanup_strv_free_ char **ret = NULL; size_t k = 0, l = 0; l = strv_length(argv); @@ -709,7 +751,8 @@ char **replace_env_argv(char **argv, char **env) { /* If $FOO appears as single word, replace it by the split up variable */ if ((*i)[0] == '$' && !IN_SET((*i)[1], '{', '$')) { char *e; - char **w, **m = NULL; + char **w; + _cleanup_strv_free_ char **m = NULL; size_t q; e = strv_env_get(env, *i+1); @@ -719,11 +762,9 @@ char **replace_env_argv(char **argv, char **env) { r = strv_split_full(&m, e, WHITESPACE, EXTRACT_RELAX|EXTRACT_UNQUOTE); if (r < 0) { ret[k] = NULL; - strv_free(ret); return NULL; } - } else - m = NULL; + } q = strv_length(m); l = l + q - 1; @@ -731,15 +772,13 @@ char **replace_env_argv(char **argv, char **env) { w = reallocarray(ret, l + 1, sizeof(char *)); if (!w) { ret[k] = NULL; - strv_free(ret); - strv_free(m); return NULL; } ret = w; if (m) { memcpy(ret + k, m, q * sizeof(char*)); - free(m); + m = mfree(m); } k += q; @@ -748,15 +787,13 @@ char **replace_env_argv(char **argv, char **env) { /* If ${FOO} appears as part of a word, replace it by the variable as-is */ ret[k] = replace_env(*i, env, 0); - if (!ret[k]) { - strv_free(ret); + if (!ret[k]) return NULL; - } k++; } ret[k] = NULL; - return ret; + return TAKE_PTR(ret); } #endif /* NM_IGNORED */ diff --git a/src/libnm-systemd-shared/src/basic/env-util.h b/src/libnm-systemd-shared/src/basic/env-util.h index b927ac7a48..b0ff5a11d1 100644 --- a/src/libnm-systemd-shared/src/basic/env-util.h +++ b/src/libnm-systemd-shared/src/basic/env-util.h @@ -49,6 +49,8 @@ int strv_env_replace_consume(char ***l, char *p); /* In place ... */ int strv_env_replace_strdup(char ***l, const char *assignment); int strv_env_replace_strdup_passthrough(char ***l, const char *assignment); int strv_env_assign(char ***l, const char *key, const char *value); +int _strv_env_assign_many(char ***l, ...) _sentinel_; +#define strv_env_assign_many(l, ...) _strv_env_assign_many(l, __VA_ARGS__, NULL) char *strv_env_get_n(char **l, const char *name, size_t k, unsigned flags) _pure_; char *strv_env_get(char **x, const char *n) _pure_; diff --git a/src/libnm-systemd-shared/src/basic/escape.c b/src/libnm-systemd-shared/src/basic/escape.c index f69cda2673..6d2c1d4d66 100644 --- a/src/libnm-systemd-shared/src/basic/escape.c +++ b/src/libnm-systemd-shared/src/basic/escape.c @@ -476,15 +476,23 @@ char* octescape(const char *s, size_t len) { static char* strcpy_backslash_escaped(char *t, const char *s, const char *bad) { assert(bad); + assert(t); + assert(s); + + while (*s) { + int l = utf8_encoded_valid_unichar(s, SIZE_MAX); - for (; *s; s++) - if (char_is_cc(*s)) - t += cescape_char(*s, t); - else { + if (char_is_cc(*s) || l < 0) + t += cescape_char(*(s++), t); + else if (l == 1) { if (*s == '\\' || strchr(bad, *s)) *(t++) = '\\'; - *(t++) = *s; + *(t++) = *(s++); + } else { + t = mempcpy(t, s, l); + s += l; } + } return t; } @@ -513,11 +521,16 @@ char* shell_maybe_quote(const char *s, ShellEscapeFlags flags) { if (FLAGS_SET(flags, SHELL_ESCAPE_EMPTY) && isempty(s)) return strdup("\"\""); /* We don't use $'' here in the POSIX mode. "" is fine too. */ - for (p = s; *p; p++) - if (char_is_cc(*p) || + for (p = s; *p; ) { + int l = utf8_encoded_valid_unichar(p, SIZE_MAX); + + if (char_is_cc(*p) || l < 0 || strchr(WHITESPACE SHELL_NEED_QUOTES, *p)) break; + p += l; + } + if (!*p) return strdup(s); diff --git a/src/libnm-systemd-shared/src/basic/fd-util.c b/src/libnm-systemd-shared/src/basic/fd-util.c index d0ec7c3260..a0e2f4eb8c 100644 --- a/src/libnm-systemd-shared/src/basic/fd-util.c +++ b/src/libnm-systemd-shared/src/basic/fd-util.c @@ -23,6 +23,7 @@ #include "missing_fcntl.h" #include "missing_fs.h" #include "missing_syscall.h" +#include "mountpoint-util.h" #include "parse-util.h" #include "path-util.h" #include "process-util.h" @@ -418,7 +419,8 @@ int close_all_fds(const int except[], size_t n_except) { if (!IN_SET(de->d_type, DT_LNK, DT_UNKNOWN)) continue; - if (safe_atoi(de->d_name, &fd) < 0) + fd = parse_fd(de->d_name); + if (fd < 0) /* Let's better ignore this, just in case */ continue; @@ -538,6 +540,11 @@ bool fdname_is_valid(const char *s) { int fd_get_path(int fd, char **ret) { int r; + assert(fd >= 0 || fd == AT_FDCWD); + + if (fd == AT_FDCWD) + return safe_getcwd(ret); + r = readlink_malloc(FORMAT_PROC_FD_PATH(fd), ret); if (r == -ENOENT) { /* ENOENT can mean two things: that the fd does not exist or that /proc is not mounted. Let's make @@ -746,27 +753,47 @@ finish: return r; } +#endif /* NM_IGNORED */ int fd_reopen(int fd, int flags) { int new_fd, r; + assert(fd >= 0 || fd == AT_FDCWD); + /* Reopens the specified fd with new flags. This is useful for convert an O_PATH fd into a regular one, or to * turn O_RDWR fds into O_RDONLY fds. * * This doesn't work on sockets (since they cannot be open()ed, ever). * - * This implicitly resets the file read index to 0. */ - - if (FLAGS_SET(flags, O_DIRECTORY)) { + * This implicitly resets the file read index to 0. + * + * If AT_FDCWD is specified as file descriptor gets an fd to the current cwd. + * + * If the specified file descriptor refers to a symlink via O_PATH, then this function cannot be used + * to follow that symlink. Because we cannot have non-O_PATH fds to symlinks reopening it without + * O_PATH will always result in -ELOOP. Or in other words: if you have an O_PATH fd to a symlink you + * can reopen it only if you pass O_PATH again. */ + + if (FLAGS_SET(flags, O_NOFOLLOW)) + /* O_NOFOLLOW is not allowed in fd_reopen(), because after all this is primarily implemented + * via a symlink-based interface in /proc/self/fd. Let's refuse this here early. Note that + * the kernel would generate ELOOP here too, hence this manual check is mostly redundant – + * the only reason we add it here is so that the O_DIRECTORY special case (see below) behaves + * the same way as the non-O_DIRECTORY case. */ + return -ELOOP; + + if (FLAGS_SET(flags, O_DIRECTORY) || fd == AT_FDCWD) { /* If we shall reopen the fd as directory we can just go via "." and thus bypass the whole * magic /proc/ directory, and make ourselves independent of that being mounted. */ - new_fd = openat(fd, ".", flags); + new_fd = openat(fd, ".", flags | O_DIRECTORY); if (new_fd < 0) return -errno; return new_fd; } + assert(fd >= 0); + new_fd = open(FORMAT_PROC_FD_PATH(fd), flags); if (new_fd < 0) { if (errno != ENOENT) @@ -784,6 +811,7 @@ int fd_reopen(int fd, int flags) { return new_fd; } +#if 0 /* NM_IGNORED */ int fd_reopen_condition( int fd, int flags, @@ -815,6 +843,18 @@ int fd_reopen_condition( return new_fd; } +int fd_is_opath(int fd) { + int r; + + assert(fd >= 0); + + r = fcntl(fd, F_GETFL); + if (r < 0) + return -errno; + + return FLAGS_SET(r, O_PATH); +} + int read_nr_open(void) { _cleanup_free_ char *nr_open = NULL; int r; @@ -859,4 +899,89 @@ int fd_get_diskseq(int fd, uint64_t *ret) { return 0; } + +int path_is_root_at(int dir_fd, const char *path) { + STRUCT_NEW_STATX_DEFINE(st); + STRUCT_NEW_STATX_DEFINE(pst); + _cleanup_close_ int fd = -EBADF; + int r; + + assert(dir_fd >= 0 || dir_fd == AT_FDCWD); + + if (!isempty(path)) { + fd = openat(dir_fd, path, O_PATH|O_CLOEXEC); + if (fd < 0) + return -errno; + + dir_fd = fd; + } + + r = statx_fallback(dir_fd, ".", 0, STATX_TYPE|STATX_INO|STATX_MNT_ID, &st.sx); + if (r == -ENOTDIR) + return false; + if (r < 0) + return r; + + r = statx_fallback(dir_fd, "..", 0, STATX_TYPE|STATX_INO|STATX_MNT_ID, &pst.sx); + if (r < 0) + return r; + + /* First, compare inode. If these are different, the fd does not point to the root directory "/". */ + if (!statx_inode_same(&st.sx, &pst.sx)) + return false; + + /* Even if the parent directory has the same inode, the fd may not point to the root directory "/", + * and we also need to check that the mount ids are the same. Otherwise, a construct like the + * following could be used to trick us: + * + * $ mkdir /tmp/x /tmp/x/y + * $ mount --bind /tmp/x /tmp/x/y + * + * Note, statx() does not provide the mount ID and path_get_mnt_id_at() does not work when an old + * kernel is used without /proc mounted. In that case, let's assume that we do not have such spurious + * mount points in an early boot stage, and silently skip the following check. */ + + if (!FLAGS_SET(st.nsx.stx_mask, STATX_MNT_ID)) { + int mntid; + + r = path_get_mnt_id_at(dir_fd, "", &mntid); + if (r == -ENOSYS) + return true; /* skip the mount ID check */ + if (r < 0) + return r; + assert(mntid >= 0); + + st.nsx.stx_mnt_id = mntid; + st.nsx.stx_mask |= STATX_MNT_ID; + } + + if (!FLAGS_SET(pst.nsx.stx_mask, STATX_MNT_ID)) { + int mntid; + + r = path_get_mnt_id_at(dir_fd, "..", &mntid); + if (r == -ENOSYS) + return true; /* skip the mount ID check */ + if (r < 0) + return r; + assert(mntid >= 0); + + pst.nsx.stx_mnt_id = mntid; + pst.nsx.stx_mask |= STATX_MNT_ID; + } + + return statx_mount_same(&st.nsx, &pst.nsx); +} + +const char *accmode_to_string(int flags) { + switch (flags & O_ACCMODE) { + case O_RDONLY: + return "ro"; + case O_WRONLY: + return "wo"; + case O_RDWR: + return "rw"; + default: + return NULL; + } +} #endif /* NM_IGNORED */ diff --git a/src/libnm-systemd-shared/src/basic/fd-util.h b/src/libnm-systemd-shared/src/basic/fd-util.h index 952afdd64f..c870a1b899 100644 --- a/src/libnm-systemd-shared/src/basic/fd-util.h +++ b/src/libnm-systemd-shared/src/basic/fd-util.h @@ -2,6 +2,7 @@ #pragma once #include <dirent.h> +#include <fcntl.h> #include <stdbool.h> #include <stdio.h> #include <sys/socket.h> @@ -74,14 +75,6 @@ int fd_get_path(int fd, char **ret); int move_fd(int from, int to, int cloexec); -enum { - ACQUIRE_NO_DEV_NULL = 1 << 0, - ACQUIRE_NO_MEMFD = 1 << 1, - ACQUIRE_NO_PIPE = 1 << 2, - ACQUIRE_NO_TMPFILE = 1 << 3, - ACQUIRE_NO_REGULAR = 1 << 4, -}; - int fd_move_above_stdio(int fd); int rearrange_stdio(int original_input_fd, int original_output_fd, int original_error_fd); @@ -104,9 +97,18 @@ static inline int make_null_stdio(void) { int fd_reopen(int fd, int flags); int fd_reopen_condition(int fd, int flags, int mask, int *ret_new_fd); +int fd_is_opath(int fd); int read_nr_open(void); int fd_get_diskseq(int fd, uint64_t *ret); +int path_is_root_at(int dir_fd, const char *path); +static inline int dir_fd_is_root(int dir_fd) { + return path_is_root_at(dir_fd, NULL); +} +static inline int dir_fd_is_root_or_cwd(int dir_fd) { + return dir_fd == AT_FDCWD ? true : path_is_root_at(dir_fd, NULL); +} + /* The maximum length a buffer for a /proc/self/fd/<fd> path needs */ #define PROC_FD_PATH_MAX \ (STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(int)) @@ -120,3 +122,13 @@ static inline char *format_proc_fd_path(char buf[static PROC_FD_PATH_MAX], int f #define FORMAT_PROC_FD_PATH(fd) \ format_proc_fd_path((char[PROC_FD_PATH_MAX]) {}, (fd)) + +const char *accmode_to_string(int flags); + +/* Like ASSERT_PTR, but for fds */ +#define ASSERT_FD(fd) \ + ({ \ + int _fd_ = (fd); \ + assert(_fd_ >= 0); \ + _fd_; \ + }) diff --git a/src/libnm-systemd-shared/src/basic/fileio.c b/src/libnm-systemd-shared/src/basic/fileio.c index 0f9dfe0d80..908a030911 100644 --- a/src/libnm-systemd-shared/src/basic/fileio.c +++ b/src/libnm-systemd-shared/src/basic/fileio.c @@ -15,7 +15,7 @@ #include <unistd.h> #include "alloc-util.h" -#include "chase-symlinks.h" +#include "chase.h" #include "fd-util.h" #include "fileio.h" #include "fs-util.h" @@ -46,22 +46,6 @@ * can detect EOFs. */ #define READ_VIRTUAL_BYTES_MAX (4U*1024U*1024U - 2U) -#if 0 /* NM_IGNORED */ -int fopen_unlocked_at(int dir_fd, const char *path, const char *options, int flags, FILE **ret) { - int r; - - assert(ret); - - r = xfopenat(dir_fd, path, options, flags, ret); - if (r < 0) - return r; - - (void) __fsetlocking(*ret, FSETLOCKING_BYCALLER); - - return 0; -} -#endif /* NM_IGNORED */ - int fdopen_unlocked(int fd, const char *options, FILE **ret) { assert(ret); @@ -272,7 +256,8 @@ int write_string_file_ts_at( const struct timespec *ts) { _cleanup_fclose_ FILE *f = NULL; - int q, r, fd; + _cleanup_close_ int fd = -EBADF; + int q, r; assert(fn); assert(line); @@ -309,11 +294,9 @@ int write_string_file_ts_at( goto fail; } - r = fdopen_unlocked(fd, "w", &f); - if (r < 0) { - safe_close(fd); + r = take_fdopen_unlocked(&fd, "w", &f); + if (r < 0) goto fail; - } if (flags & WRITE_STRING_FILE_DISABLE_BUFFER) setvbuf(f, NULL, _IONBF, 0); @@ -359,18 +342,19 @@ int write_string_filef( return write_string_file(fn, p, flags); } -int read_one_line_file(const char *fn, char **line) { +int read_one_line_file_at(int dir_fd, const char *filename, char **ret) { _cleanup_fclose_ FILE *f = NULL; int r; - assert(fn); - assert(line); + assert(dir_fd >= 0 || dir_fd == AT_FDCWD); + assert(filename); + assert(ret); - r = fopen_unlocked(fn, "re", &f); + r = fopen_unlocked_at(dir_fd, filename, "re", 0, &f); if (r < 0) return r; - return read_line(f, LONG_LINE_MAX, line); + return read_line(f, LONG_LINE_MAX, ret); } int verify_file_at(int dir_fd, const char *fn, const char *blob, bool accept_extra_nl) { @@ -510,7 +494,7 @@ int read_virtual_file_fd(int fd, size_t max_size, char **ret_contents, size_t *r * at least one more byte to be able to distinguish EOF from truncation. */ if (max_size != SIZE_MAX && n > max_size) { n = size; /* Make sure we never use more than what we sized the buffer for (so that - * we have one free byte in it for the trailing NUL we add below).*/ + * we have one free byte in it for the trailing NUL we add below). */ truncated = true; break; } @@ -766,62 +750,19 @@ int read_full_file_full( size_t *ret_size) { _cleanup_fclose_ FILE *f = NULL; + XfopenFlags xflags = XFOPEN_UNLOCKED; int r; assert(filename); assert(ret_contents); - r = xfopenat(dir_fd, filename, "re", 0, &f); - if (r < 0) { - _cleanup_close_ int sk = -EBADF; - - /* ENXIO is what Linux returns if we open a node that is an AF_UNIX socket */ - if (r != -ENXIO) - return r; - - /* If this is enabled, let's try to connect to it */ - if (!FLAGS_SET(flags, READ_FULL_FILE_CONNECT_SOCKET)) - return -ENXIO; - - /* Seeking is not supported on AF_UNIX sockets */ - if (offset != UINT64_MAX) - return -ENXIO; - - sk = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC, 0); - if (sk < 0) - return -errno; - - if (bind_name) { - /* If the caller specified a socket name to bind to, do so before connecting. This is - * useful to communicate some minor, short meta-information token from the client to - * the server. */ - union sockaddr_union bsa; - - r = sockaddr_un_set_path(&bsa.un, bind_name); - if (r < 0) - return r; + if (FLAGS_SET(flags, READ_FULL_FILE_CONNECT_SOCKET) && /* If this is enabled, let's try to connect to it */ + offset == UINT64_MAX) /* Seeking is not supported on AF_UNIX sockets */ + xflags |= XFOPEN_SOCKET; - if (bind(sk, &bsa.sa, r) < 0) - return -errno; - } - - r = connect_unix_path(sk, dir_fd, filename); - if (IN_SET(r, -ENOTSOCK, -EINVAL)) /* propagate original error if this is not a socket after all */ - return -ENXIO; - if (r < 0) - return r; - - if (shutdown(sk, SHUT_WR) < 0) - return -errno; - - f = fdopen(sk, "r"); - if (!f) - return -errno; - - TAKE_FD(sk); - } - - (void) __fsetlocking(f, FSETLOCKING_BYCALLER); + r = xfopenat_full(dir_fd, filename, "re", 0, xflags, bind_name, &f); + if (r < 0) + return r; return read_full_stream_full(f, filename, offset, size, flags, ret_contents, ret_size); } @@ -929,8 +870,7 @@ int get_proc_field(const char *filename, const char *pattern, const char *termin } DIR *xopendirat(int fd, const char *name, int flags) { - int nfd; - DIR *d; + _cleanup_close_ int nfd = -EBADF; assert(!(flags & O_CREAT)); @@ -941,13 +881,7 @@ DIR *xopendirat(int fd, const char *name, int flags) { if (nfd < 0) return NULL; - d = fdopendir(nfd); - if (!d) { - safe_close(nfd); - return NULL; - } - - return d; + return take_fdopendir(&nfd); } #endif /* NM_IGNORED */ @@ -997,38 +931,144 @@ int fopen_mode_to_flags(const char *mode) { return flags; } -int xfopenat(int dir_fd, const char *path, const char *mode, int flags, FILE **ret) { +static int xfopenat_regular(int dir_fd, const char *path, const char *mode, int open_flags, FILE **ret) { FILE *f; /* A combination of fopen() with openat() */ - if (dir_fd == AT_FDCWD && flags == 0) { + assert(dir_fd >= 0 || dir_fd == AT_FDCWD); + assert(path); + assert(mode); + assert(ret); + + if (dir_fd == AT_FDCWD && open_flags == 0) f = fopen(path, mode); - if (!f) - return -errno; - } else { - int fd, mode_flags; + else { + _cleanup_close_ int fd = -EBADF; + int mode_flags; mode_flags = fopen_mode_to_flags(mode); if (mode_flags < 0) return mode_flags; - fd = openat(dir_fd, path, mode_flags | flags); + fd = openat(dir_fd, path, mode_flags | open_flags); if (fd < 0) return -errno; - f = fdopen(fd, mode); - if (!f) { - safe_close(fd); + f = take_fdopen(&fd, mode); + } + if (!f) + return -errno; + + *ret = f; + return 0; +} + +static int xfopenat_unix_socket(int dir_fd, const char *path, const char *bind_name, FILE **ret) { + _cleanup_close_ int sk = -EBADF; + FILE *f; + int r; + + assert(dir_fd >= 0 || dir_fd == AT_FDCWD); + assert(path); + assert(ret); + + sk = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC, 0); + if (sk < 0) + return -errno; + + if (bind_name) { + /* If the caller specified a socket name to bind to, do so before connecting. This is + * useful to communicate some minor, short meta-information token from the client to + * the server. */ + union sockaddr_union bsa; + + r = sockaddr_un_set_path(&bsa.un, bind_name); + if (r < 0) + return r; + + if (bind(sk, &bsa.sa, r) < 0) return -errno; - } } + r = connect_unix_path(sk, dir_fd, path); + if (r < 0) + return r; + + if (shutdown(sk, SHUT_WR) < 0) + return -errno; + + f = take_fdopen(&sk, "r"); + if (!f) + return -errno; + + *ret = f; + return 0; +} + +int xfopenat_full( + int dir_fd, + const char *path, + const char *mode, + int open_flags, + XfopenFlags flags, + const char *bind_name, + FILE **ret) { + + FILE *f = NULL; /* avoid false maybe-uninitialized warning */ + int r; + + assert(dir_fd >= 0 || dir_fd == AT_FDCWD); + assert(path); + assert(mode); + assert(ret); + + r = xfopenat_regular(dir_fd, path, mode, open_flags, &f); + if (r == -ENXIO && FLAGS_SET(flags, XFOPEN_SOCKET)) { + /* ENXIO is what Linux returns if we open a node that is an AF_UNIX socket */ + r = xfopenat_unix_socket(dir_fd, path, bind_name, &f); + if (IN_SET(r, -ENOTSOCK, -EINVAL)) + return -ENXIO; /* propagate original error if this is not a socket after all */ + } + if (r < 0) + return r; + + if (FLAGS_SET(flags, XFOPEN_UNLOCKED)) + (void) __fsetlocking(f, FSETLOCKING_BYCALLER); + *ret = f; return 0; } #if 0 /* NM_IGNORED */ +int fdopen_independent(int fd, const char *mode, FILE **ret) { + _cleanup_close_ int copy_fd = -EBADF; + _cleanup_fclose_ FILE *f = NULL; + int mode_flags; + + assert(fd >= 0); + assert(mode); + assert(ret); + + /* A combination of fdopen() + fd_reopen(). i.e. reopens the inode the specified fd points to and + * returns a FILE* for it */ + + mode_flags = fopen_mode_to_flags(mode); + if (mode_flags < 0) + return mode_flags; + + copy_fd = fd_reopen(fd, mode_flags); + if (copy_fd < 0) + return copy_fd; + + f = take_fdopen(©_fd, mode); + if (!f) + return -errno; + + *ret = TAKE_PTR(f); + return 0; +} + static int search_and_fopen_internal( const char *path, const char *mode, diff --git a/src/libnm-systemd-shared/src/basic/fileio.h b/src/libnm-systemd-shared/src/basic/fileio.h index 7da3ee33e0..769bf394fd 100644 --- a/src/libnm-systemd-shared/src/basic/fileio.h +++ b/src/libnm-systemd-shared/src/basic/fileio.h @@ -43,10 +43,6 @@ typedef enum { READ_FULL_FILE_FAIL_WHEN_LARGER = 1 << 5, /* fail loading if file is larger than specified size */ } ReadFullFileFlags; -int fopen_unlocked_at(int dir_fd, const char *path, const char *options, int flags, FILE **ret); -static inline int fopen_unlocked(const char *path, const char *options, FILE **ret) { - return fopen_unlocked_at(AT_FDCWD, path, options, 0, ret); -} int fdopen_unlocked(int fd, const char *options, FILE **ret); int take_fdopen_unlocked(int *fd, const char *options, FILE **ret); FILE* take_fdopen(int *fd, const char *options); @@ -71,7 +67,10 @@ static inline int write_string_file(const char *fn, const char *line, WriteStrin int write_string_filef(const char *fn, WriteStringFileFlags flags, const char *format, ...) _printf_(3, 4); -int read_one_line_file(const char *filename, char **line); +int read_one_line_file_at(int dir_fd, const char *filename, char **ret); +static inline int read_one_line_file(const char *filename, char **ret) { + return read_one_line_file_at(AT_FDCWD, filename, ret); +} int read_full_file_full(int dir_fd, const char *filename, uint64_t offset, size_t size, ReadFullFileFlags flags, const char *bind_name, char **ret_contents, size_t *ret_size); static inline int read_full_file_at(int dir_fd, const char *filename, char **ret_contents, size_t *ret_size) { return read_full_file_full(dir_fd, filename, UINT64_MAX, SIZE_MAX, 0, NULL, ret_contents, ret_size); @@ -104,7 +103,31 @@ int executable_is_script(const char *path, char **interpreter); int get_proc_field(const char *filename, const char *pattern, const char *terminator, char **field); DIR *xopendirat(int dirfd, const char *name, int flags); -int xfopenat(int dir_fd, const char *path, const char *mode, int flags, FILE **ret); + +typedef enum XfopenFlags { + XFOPEN_UNLOCKED = 1 << 0, /* call __fsetlocking(FSETLOCKING_BYCALLER) after opened */ + XFOPEN_SOCKET = 1 << 1, /* also try to open unix socket */ +} XfopenFlags; + +int xfopenat_full( + int dir_fd, + const char *path, + const char *mode, + int open_flags, + XfopenFlags flags, + const char *bind_name, + FILE **ret); +static inline int xfopenat(int dir_fd, const char *path, const char *mode, int open_flags, FILE **ret) { + return xfopenat_full(dir_fd, path, mode, open_flags, 0, NULL, ret); +} +static inline int fopen_unlocked_at(int dir_fd, const char *path, const char *mode, int open_flags, FILE **ret) { + return xfopenat_full(dir_fd, path, mode, open_flags, XFOPEN_UNLOCKED, NULL, ret); +} +static inline int fopen_unlocked(const char *path, const char *mode, FILE **ret) { + return fopen_unlocked_at(AT_FDCWD, path, mode, 0, ret); +} + +int fdopen_independent(int fd, const char *mode, FILE **ret); int search_and_fopen(const char *path, const char *mode, const char *root, const char **search, FILE **ret, char **ret_path); int search_and_fopen_nulstr(const char *path, const char *mode, const char *root, const char *search, FILE **ret, char **ret_path); diff --git a/src/libnm-systemd-shared/src/basic/fs-util.c b/src/libnm-systemd-shared/src/basic/fs-util.c index 27b4c2b661..32fd849d34 100644 --- a/src/libnm-systemd-shared/src/basic/fs-util.c +++ b/src/libnm-systemd-shared/src/basic/fs-util.c @@ -5,6 +5,7 @@ #include <errno.h> #include <stddef.h> #include <stdlib.h> +#include <sys/file.h> #include <linux/falloc.h> #include <linux/magic.h> #include <unistd.h> @@ -15,6 +16,8 @@ #include "fileio.h" #include "fs-util.h" #include "hostname-util.h" +#include "label.h" +#include "lock-util.h" #include "log.h" #include "macro.h" #include "missing_fcntl.h" @@ -35,11 +38,6 @@ #include "umask-util.h" #include "user-util.h" -int unlink_noerrno(const char *path) { - PROTECT_ERRNO; - return RET_NERRNO(unlink(path)); -} - #if 0 /* NM_IGNORED */ int rmdir_parents(const char *path, const char *stop) { char *p; @@ -311,7 +309,7 @@ int fchmod_opath(int fd, mode_t m) { } int futimens_opath(int fd, const struct timespec ts[2]) { - /* Similar to fchmod_path() but for futimens() */ + /* Similar to fchmod_opath() but for futimens() */ if (utimensat(AT_FDCWD, FORMAT_PROC_FD_PATH(fd), ts, 0) < 0) { if (errno != ENOENT) @@ -683,7 +681,7 @@ void unlink_tempfilep(char (*p)[]) { * successfully created. We ignore both the rare case where the * original suffix is used and unlink failures. */ if (!endswith(*p, ".XXXXXX")) - (void) unlink_noerrno(*p); + (void) unlink(*p); } int unlinkat_deallocate(int fd, const char *name, UnlinkDeallocateFlags flags) { @@ -781,7 +779,7 @@ int unlinkat_deallocate(int fd, const char *name, UnlinkDeallocateFlags flags) { * punch-hole/truncate this to release the disk space. */ bs = MAX(st.st_blksize, 512); - l = DIV_ROUND_UP(st.st_size, bs) * bs; /* Round up to next block size */ + l = ROUND_UP(st.st_size, bs); /* Round up to next block size */ if (fallocate(truncate_fd, FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE, 0, l) >= 0) return 0; /* Successfully punched a hole! 😊 */ @@ -795,12 +793,23 @@ int unlinkat_deallocate(int fd, const char *name, UnlinkDeallocateFlags flags) { return 0; } -int open_parent(const char *path, int flags, mode_t mode) { +int open_parent_at(int dir_fd, const char *path, int flags, mode_t mode) { _cleanup_free_ char *parent = NULL; int r; + assert(dir_fd >= 0 || dir_fd == AT_FDCWD); + assert(path); + r = path_extract_directory(path, &parent); - if (r < 0) + if (r == -EDESTADDRREQ) { + parent = strdup("."); + if (!parent) + return -ENOMEM; + } else if (r == -EADDRNOTAVAIL) { + parent = strdup(path); + if (!parent) + return -ENOMEM; + } else if (r < 0) return r; /* Let's insist on O_DIRECTORY since the parent of a file or directory is a directory. Except if we open an @@ -811,7 +820,7 @@ int open_parent(const char *path, int flags, mode_t mode) { else if (!FLAGS_SET(flags, O_TMPFILE)) flags |= O_DIRECTORY|O_RDONLY; - return RET_NERRNO(open(parent, flags, mode)); + return RET_NERRNO(openat(dir_fd, parent, flags, mode)); } #endif /* NM_IGNORED */ @@ -822,11 +831,11 @@ int conservative_renameat( _cleanup_close_ int old_fd = -EBADF, new_fd = -EBADF; struct stat old_stat, new_stat; - /* Renames the old path to thew new path, much like renameat() — except if both are regular files and + /* Renames the old path to the new path, much like renameat() — except if both are regular files and * have the exact same contents and basic file attributes already. In that case remove the new file * instead. This call is useful for reducing inotify wakeups on files that are updated but don't * actually change. This function is written in a style that we rather rename too often than suppress - * too much. i.e. whenever we are in doubt we rather rename than fail. After all reducing inotify + * too much. I.e. whenever we are in doubt, we rather rename than fail. After all reducing inotify * events is an optimization only, not more. */ old_fd = openat(olddirfd, oldpath, O_CLOEXEC|O_RDONLY|O_NOCTTY|O_NOFOLLOW); @@ -1002,8 +1011,7 @@ int parse_cifs_service( int open_mkdir_at(int dirfd, const char *path, int flags, mode_t mode) { _cleanup_close_ int fd = -EBADF, parent_fd = -EBADF; - _cleanup_free_ char *fname = NULL; - bool made; + _cleanup_free_ char *fname = NULL, *parent = NULL; int r; /* Creates a directory with mkdirat() and then opens it, in the "most atomic" fashion we can @@ -1018,19 +1026,13 @@ int open_mkdir_at(int dirfd, const char *path, int flags, mode_t mode) { /* Note that O_DIRECTORY|O_NOFOLLOW is implied, but we allow specifying it anyway. The following * flags actually make sense to specify: O_CLOEXEC, O_EXCL, O_NOATIME, O_PATH */ - if (isempty(path)) - return -EINVAL; - - if (!filename_is_valid(path)) { - _cleanup_free_ char *parent = NULL; - - /* If this is not a valid filename, it's a path. Let's open the parent directory then, so - * that we can pin it, and operate below it. */ - - r = path_extract_directory(path, &parent); - if (r < 0) + /* If this is not a valid filename, it's a path. Let's open the parent directory then, so + * that we can pin it, and operate below it. */ + r = path_extract_directory(path, &parent); + if (r < 0) { + if (!IN_SET(r, -EDESTADDRREQ, -EADDRNOTAVAIL)) return r; - + } else { r = path_extract_filename(path, &fname); if (r < 0) return r; @@ -1043,33 +1045,11 @@ int open_mkdir_at(int dirfd, const char *path, int flags, mode_t mode) { path = fname; } - r = RET_NERRNO(mkdirat(dirfd, path, mode)); - if (r == -EEXIST) { - if (FLAGS_SET(flags, O_EXCL)) - return -EEXIST; - - made = false; - } else if (r < 0) - return r; - else - made = true; - - fd = RET_NERRNO(openat(dirfd, path, (flags & ~O_EXCL)|O_DIRECTORY|O_NOFOLLOW)); - if (fd < 0) { - if (fd == -ENOENT) /* We got ENOENT? then someone else immediately removed it after we - * created it. In that case let's return immediately without unlinking - * anything, because there simply isn't anything to unlink anymore. */ - return -ENOENT; - if (fd == -ELOOP) /* is a symlink? exists already → created by someone else, don't unlink */ - return -EEXIST; - if (fd == -ENOTDIR) /* not a directory? exists already → created by someone else, don't unlink */ - return -EEXIST; - - if (made) - (void) unlinkat(dirfd, path, AT_REMOVEDIR); - + fd = xopenat(dirfd, path, flags|O_CREAT|O_DIRECTORY|O_NOFOLLOW, /* xopen_flags = */ 0, mode); + if (IN_SET(fd, -ELOOP, -ENOTDIR)) + return -EEXIST; + if (fd < 0) return fd; - } return TAKE_FD(fd); } @@ -1120,3 +1100,120 @@ int openat_report_new(int dirfd, const char *pathname, int flags, mode_t mode, b return -EEXIST; } } + +int xopenat(int dir_fd, const char *path, int open_flags, XOpenFlags xopen_flags, mode_t mode) { + _cleanup_close_ int fd = -EBADF; + bool made = false; + int r; + + assert(dir_fd >= 0 || dir_fd == AT_FDCWD); + + if (isempty(path)) { + assert(!FLAGS_SET(open_flags, O_CREAT|O_EXCL)); + return fd_reopen(dir_fd, open_flags & ~O_NOFOLLOW); + } + + if (FLAGS_SET(open_flags, O_CREAT) && FLAGS_SET(xopen_flags, XO_LABEL)) { + r = label_ops_pre(dir_fd, path, FLAGS_SET(open_flags, O_DIRECTORY) ? S_IFDIR : S_IFREG); + if (r < 0) + return r; + } + + if (FLAGS_SET(open_flags, O_DIRECTORY|O_CREAT)) { + r = RET_NERRNO(mkdirat(dir_fd, path, mode)); + if (r == -EEXIST) { + if (FLAGS_SET(open_flags, O_EXCL)) + return -EEXIST; + + made = false; + } else if (r < 0) + return r; + else + made = true; + + if (FLAGS_SET(xopen_flags, XO_LABEL)) { + r = label_ops_post(dir_fd, path); + if (r < 0) + return r; + } + + open_flags &= ~(O_EXCL|O_CREAT); + xopen_flags &= ~XO_LABEL; + } + + fd = RET_NERRNO(openat(dir_fd, path, open_flags, mode)); + if (fd < 0) { + if (IN_SET(fd, + /* We got ENOENT? then someone else immediately removed it after we + * created it. In that case let's return immediately without unlinking + * anything, because there simply isn't anything to unlink anymore. */ + -ENOENT, + /* is a symlink? exists already → created by someone else, don't unlink */ + -ELOOP, + /* not a directory? exists already → created by someone else, don't unlink */ + -ENOTDIR)) + return fd; + + if (made) + (void) unlinkat(dir_fd, path, AT_REMOVEDIR); + + return fd; + } + + if (FLAGS_SET(open_flags, O_CREAT) && FLAGS_SET(xopen_flags, XO_LABEL)) { + r = label_ops_post(dir_fd, path); + if (r < 0) + return r; + } + + return TAKE_FD(fd); +} + +#if 0 /* NM_IGNORED */ +int xopenat_lock( + int dir_fd, + const char *path, + int open_flags, + XOpenFlags xopen_flags, + mode_t mode, + LockType locktype, + int operation) { + + _cleanup_close_ int fd = -EBADF; + int r; + + assert(dir_fd >= 0 || dir_fd == AT_FDCWD); + assert(path); + assert(IN_SET(operation & ~LOCK_NB, LOCK_EX, LOCK_SH)); + + /* POSIX/UNPOSIX locks don't work on directories (errno is set to -EBADF so let's return early with + * the same error here). */ + if (FLAGS_SET(open_flags, O_DIRECTORY) && locktype != LOCK_BSD) + return -EBADF; + + for (;;) { + struct stat st; + + fd = xopenat(dir_fd, path, open_flags, xopen_flags, mode); + if (fd < 0) + return fd; + + r = lock_generic(fd, locktype, operation); + if (r < 0) + return r; + + /* If we acquired the lock, let's check if the file/directory still exists in the file + * system. If not, then the previous exclusive owner removed it and then closed it. In such a + * case our acquired lock is worthless, hence try again. */ + + if (fstat(fd, &st) < 0) + return -errno; + if (st.st_nlink > 0) + break; + + fd = safe_close(fd); + } + + return TAKE_FD(fd); +} +#endif /* NM_IGNORED */ diff --git a/src/libnm-systemd-shared/src/basic/fs-util.h b/src/libnm-systemd-shared/src/basic/fs-util.h index 932d003f19..a19836d138 100644 --- a/src/libnm-systemd-shared/src/basic/fs-util.h +++ b/src/libnm-systemd-shared/src/basic/fs-util.h @@ -12,6 +12,7 @@ #include "alloc-util.h" #include "errno-util.h" +#include "lock-util.h" #include "time-util.h" #include "user-util.h" @@ -22,8 +23,6 @@ #define PTR_TO_MODE(p) ((mode_t) ((uintptr_t) (p)-1)) #define MODE_TO_PTR(u) ((void *) ((uintptr_t) (u)+1)) -int unlink_noerrno(const char *path); - int rmdir_parents(const char *path, const char *stop); int rename_noreplace(int olddirfd, const char *oldpath, int newdirfd, const char *newpath); @@ -99,7 +98,7 @@ static inline char* unlink_and_free(char *p) { if (!p) return NULL; - (void) unlink_noerrno(p); + (void) unlink(p); return mfree(p); } DEFINE_TRIVIAL_CLEANUP_FUNC(char*, unlink_and_free); @@ -115,7 +114,10 @@ typedef enum UnlinkDeallocateFlags { int unlinkat_deallocate(int fd, const char *name, UnlinkDeallocateFlags flags); -int open_parent(const char *path, int flags, mode_t mode); +int open_parent_at(int dir_fd, const char *path, int flags, mode_t mode); +static inline int open_parent(const char *path, int flags, mode_t mode) { + return open_parent_at(AT_FDCWD, path, flags, mode); +} int conservative_renameat(int olddirfd, const char *oldpath, int newdirfd, const char *newpath); static inline int conservative_rename(const char *oldpath, const char *newpath) { @@ -129,3 +131,11 @@ int parse_cifs_service(const char *s, char **ret_host, char **ret_service, char int open_mkdir_at(int dirfd, const char *path, int flags, mode_t mode); int openat_report_new(int dirfd, const char *pathname, int flags, mode_t mode, bool *ret_newly_created); + +typedef enum XOpenFlags { + XO_LABEL = 1 << 0, +} XOpenFlags; + +int xopenat(int dir_fd, const char *path, int open_flags, XOpenFlags xopen_flags, mode_t mode); + +int xopenat_lock(int dir_fd, const char *path, int open_flags, XOpenFlags xopen_flags, mode_t mode, LockType locktype, int operation); diff --git a/src/libnm-systemd-shared/src/basic/hash-funcs.h b/src/libnm-systemd-shared/src/basic/hash-funcs.h index c14302ec72..be64289252 100644 --- a/src/libnm-systemd-shared/src/basic/hash-funcs.h +++ b/src/libnm-systemd-shared/src/basic/hash-funcs.h @@ -102,7 +102,7 @@ extern const struct hash_ops uint64_hash_ops; /* On some archs dev_t is 32bit, and on others 64bit. And sometimes it's 64bit on 32bit archs, and sometimes 32bit on * 64bit archs. Yuck! */ #if SIZEOF_DEV_T != 8 -void devt_hash_func(const dev_t *p, struct siphash *state) _pure_; +void devt_hash_func(const dev_t *p, struct siphash *state); #else #define devt_hash_func uint64_hash_func #endif diff --git a/src/libnm-systemd-shared/src/basic/hashmap.c b/src/libnm-systemd-shared/src/basic/hashmap.c index 4ea3aba9e3..356200cfcc 100644 --- a/src/libnm-systemd-shared/src/basic/hashmap.c +++ b/src/libnm-systemd-shared/src/basic/hashmap.c @@ -7,6 +7,9 @@ #include <pthread.h> #include <stdint.h> #include <stdlib.h> +#if HAVE_VALGRIND_VALGRIND_H +# include <valgrind/valgrind.h> +#endif #include "alloc-util.h" #include "fileio.h" @@ -276,29 +279,34 @@ static _used_ const struct hashmap_type_info hashmap_type_info[_HASHMAP_TYPE_MAX }, }; -#if VALGRIND -_destructor_ static void cleanup_pools(void) { - _cleanup_free_ char *t = NULL; +#if 0 /* NM_IGNORED */ +void hashmap_trim_pools(void) { int r; - /* Be nice to valgrind */ + /* The pool is only allocated by the main thread, but the memory can be passed to other + * threads. Let's clean up if we are the main thread and no other threads are live. */ - /* The pool is only allocated by the main thread, but the memory can - * be passed to other threads. Let's clean up if we are the main thread - * and no other threads are live. */ - /* We build our own is_main_thread() here, which doesn't use C11 - * TLS based caching of the result. That's because valgrind apparently - * doesn't like malloc() (which C11 TLS internally uses) to be called - * from a GCC destructors. */ + /* We build our own is_main_thread() here, which doesn't use C11 TLS based caching of the + * result. That's because valgrind apparently doesn't like TLS to be used from a GCC destructor. */ if (getpid() != gettid()) - return; + return (void) log_debug("Not cleaning up memory pools, not in main thread."); - r = get_proc_field("/proc/self/status", "Threads", WHITESPACE, &t); - if (r < 0 || !streq(t, "1")) - return; + r = get_process_threads(0); + if (r < 0) + return (void) log_debug_errno(r, "Failed to determine number of threads, not cleaning up memory pools: %m"); + if (r != 1) + return (void) log_debug("Not cleaning up memory pools, running in multi-threaded process."); + + mempool_trim(&hashmap_pool); + mempool_trim(&ordered_hashmap_pool); +} +#endif /* NM_IGNORED */ - mempool_drop(&hashmap_pool); - mempool_drop(&ordered_hashmap_pool); +#if HAVE_VALGRIND_VALGRIND_H +_destructor_ static void cleanup_pools(void) { + /* Be nice to valgrind */ + if (RUNNING_ON_VALGRIND) + hashmap_trim_pools(); } #endif diff --git a/src/libnm-systemd-shared/src/basic/hashmap.h b/src/libnm-systemd-shared/src/basic/hashmap.h index ebb5a63eb4..68d9b81cf2 100644 --- a/src/libnm-systemd-shared/src/basic/hashmap.h +++ b/src/libnm-systemd-shared/src/basic/hashmap.h @@ -443,3 +443,5 @@ DEFINE_TRIVIAL_CLEANUP_FUNC(OrderedHashmap*, ordered_hashmap_free_free_free); DEFINE_TRIVIAL_CLEANUP_FUNC(IteratedCache*, iterated_cache_free); #define _cleanup_iterated_cache_free_ _cleanup_(iterated_cache_freep) + +void hashmap_trim_pools(void); diff --git a/src/libnm-systemd-shared/src/basic/hostname-util.c b/src/libnm-systemd-shared/src/basic/hostname-util.c index e7bb5ff35e..26e196f35e 100644 --- a/src/libnm-systemd-shared/src/basic/hostname-util.c +++ b/src/libnm-systemd-shared/src/basic/hostname-util.c @@ -197,6 +197,7 @@ bool is_localhost(const char *hostname) { endswith_no_case(hostname, ".localhost.localdomain."); } +#if 0 /* NM_IGNORED */ int get_pretty_hostname(char **ret) { _cleanup_free_ char *n = NULL; int r; @@ -213,3 +214,4 @@ int get_pretty_hostname(char **ret) { *ret = TAKE_PTR(n); return 0; } +#endif /* NM_IGNORED */ diff --git a/src/libnm-systemd-shared/src/basic/io-util.c b/src/libnm-systemd-shared/src/basic/io-util.c index 5da04923ee..0c480091b2 100644 --- a/src/libnm-systemd-shared/src/basic/io-util.c +++ b/src/libnm-systemd-shared/src/basic/io-util.c @@ -369,4 +369,14 @@ size_t iovw_size(struct iovec_wrapper *iovw) { return n; } + +void iovec_array_free(struct iovec *iov, size_t n) { + if (!iov) + return; + + for (size_t i = 0; i < n; i++) + free(iov[i].iov_base); + + free(iov); +} #endif /* NM_IGNORED */ diff --git a/src/libnm-systemd-shared/src/basic/io-util.h b/src/libnm-systemd-shared/src/basic/io-util.h index 3afb134266..3ad8267962 100644 --- a/src/libnm-systemd-shared/src/basic/io-util.h +++ b/src/libnm-systemd-shared/src/basic/io-util.h @@ -74,10 +74,13 @@ static inline bool FILE_SIZE_VALID_OR_INFINITY(uint64_t l) { } -#define IOVEC_INIT(base, len) { .iov_base = (base), .iov_len = (len) } -#define IOVEC_MAKE(base, len) (struct iovec) IOVEC_INIT(base, len) -#define IOVEC_INIT_STRING(string) IOVEC_INIT((char*) string, strlen(string)) -#define IOVEC_MAKE_STRING(string) (struct iovec) IOVEC_INIT_STRING(string) +#define IOVEC_NULL (struct iovec) {} +#define IOVEC_MAKE(base, len) (struct iovec) { .iov_base = (base), .iov_len = (len) } +#define IOVEC_MAKE_STRING(string) \ + ({ \ + char *_s = (char*) (string); \ + IOVEC_MAKE(_s, strlen(_s)); \ + }) char* set_iovec_string_field(struct iovec *iovec, size_t *n_iovec, const char *field, const char *value); char* set_iovec_string_field_free(struct iovec *iovec, size_t *n_iovec, const char *field, char *value); @@ -105,3 +108,5 @@ int iovw_put_string_field(struct iovec_wrapper *iovw, const char *field, const c int iovw_put_string_field_free(struct iovec_wrapper *iovw, const char *field, char *value); void iovw_rebase(struct iovec_wrapper *iovw, char *old, char *new); size_t iovw_size(struct iovec_wrapper *iovw); + +void iovec_array_free(struct iovec *iov, size_t n); diff --git a/src/libnm-systemd-shared/src/basic/label.c b/src/libnm-systemd-shared/src/basic/label.c new file mode 100644 index 0000000000..a08a238ff7 --- /dev/null +++ b/src/libnm-systemd-shared/src/basic/label.c @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include "nm-sd-adapt-shared.h" + +#include <errno.h> +#include <stddef.h> + +#include "label.h" + +static const LabelOps *label_ops = NULL; + +int label_ops_set(const LabelOps *ops) { + if (label_ops) + return -EBUSY; + + label_ops = ops; + return 0; +} + +int label_ops_pre(int dir_fd, const char *path, mode_t mode) { + if (!label_ops || !label_ops->pre) + return 0; + + return label_ops->pre(dir_fd, path, mode); +} + +int label_ops_post(int dir_fd, const char *path) { + if (!label_ops || !label_ops->post) + return 0; + + return label_ops->post(dir_fd, path); +} diff --git a/src/libnm-systemd-shared/src/basic/label.h b/src/libnm-systemd-shared/src/basic/label.h new file mode 100644 index 0000000000..9644e435a3 --- /dev/null +++ b/src/libnm-systemd-shared/src/basic/label.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <sys/types.h> + +typedef struct LabelOps { + int (*pre)(int dir_fd, const char *path, mode_t mode); + int (*post)(int dir_fd, const char *path); +} LabelOps; + +int label_ops_set(const LabelOps *label_ops); + +int label_ops_pre(int dir_fd, const char *path, mode_t mode); +int label_ops_post(int dir_fd, const char *path); diff --git a/src/libnm-systemd-shared/src/basic/list.h b/src/libnm-systemd-shared/src/basic/list.h index ffc8bd8304..e4e5dff3ea 100644 --- a/src/libnm-systemd-shared/src/basic/list.h +++ b/src/libnm-systemd-shared/src/basic/list.h @@ -46,7 +46,7 @@ /* Remove an item from the list */ #define LIST_REMOVE(name,head,item) \ - ({ \ + ({ \ typeof(*(head)) **_head = &(head), *_item = (item); \ assert(_item); \ if (_item->name##_next) \ @@ -127,8 +127,11 @@ _b; \ }) -#define LIST_JUST_US(name,item) \ - (!(item)->name##_prev && !(item)->name##_next) +#define LIST_JUST_US(name, item) \ + ({ \ + typeof(*(item)) *_item = (item); \ + !(_item)->name##_prev && !(_item)->name##_next; \ + }) /* The type of the iterator 'i' is automatically determined by the type of 'head', and declared in the * loop. Hence, do not declare the same variable in the outer scope. Sometimes, we set 'head' through diff --git a/src/libnm-systemd-shared/src/basic/lock-util.h b/src/libnm-systemd-shared/src/basic/lock-util.h new file mode 100644 index 0000000000..e7744476bb --- /dev/null +++ b/src/libnm-systemd-shared/src/basic/lock-util.h @@ -0,0 +1,42 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <fcntl.h> + +typedef struct LockFile { + int dir_fd; + char *path; + int fd; + int operation; +} LockFile; + +int make_lock_file_at(int dir_fd, const char *p, int operation, LockFile *ret); +static inline int make_lock_file(const char *p, int operation, LockFile *ret) { + return make_lock_file_at(AT_FDCWD, p, operation, ret); +} +int make_lock_file_for(const char *p, int operation, LockFile *ret); +void release_lock_file(LockFile *f); + +#define LOCK_FILE_INIT { .dir_fd = -EBADF, .fd = -EBADF } + +/* POSIX locks with the same interface as flock(). */ +int posix_lock(int fd, int operation); +void posix_unlockpp(int **fd); + +#define CLEANUP_POSIX_UNLOCK(fd) \ + _cleanup_(posix_unlockpp) _unused_ int *CONCATENATE(_cleanup_posix_unlock_, UNIQ) = &(fd) + +/* Open File Description locks with the same interface as flock(). */ +int unposix_lock(int fd, int operation); +void unposix_unlockpp(int **fd); + +#define CLEANUP_UNPOSIX_UNLOCK(fd) \ + _cleanup_(unposix_unlockpp) _unused_ int *CONCATENATE(_cleanup_unposix_unlock_, UNIQ) = &(fd) + +typedef enum LockType { + LOCK_BSD, + LOCK_POSIX, + LOCK_UNPOSIX, +} LockType; + +int lock_generic(int fd, LockType type, int operation); diff --git a/src/libnm-systemd-shared/src/basic/log.h b/src/libnm-systemd-shared/src/basic/log.h index 10f34c8559..eb7b51cb82 100644 --- a/src/libnm-systemd-shared/src/basic/log.h +++ b/src/libnm-systemd-shared/src/basic/log.h @@ -51,8 +51,10 @@ static inline void clear_log_syntax_callback(dummy_t *dummy) { const char *log_target_to_string(LogTarget target) _const_; LogTarget log_target_from_string(const char *s) _pure_; void log_set_target(LogTarget target); +void log_set_target_and_open(LogTarget target); int log_set_target_from_string(const char *e); LogTarget log_get_target(void) _pure_; +void log_settle_target(void); void log_set_max_level(int level); int log_set_max_level_from_string(const char *e); @@ -565,6 +567,16 @@ typedef struct LogRateLimit { #define log_ratelimit_error_errno(error, ...) log_ratelimit_full_errno(LOG_ERR, error, __VA_ARGS__) #define log_ratelimit_emergency_errno(error, ...) log_ratelimit_full_errno(log_emergency_level(), error, __VA_ARGS__) +const char *_log_set_prefix(const char *prefix, bool force); +static inline const char *_log_unset_prefixp(const char **p) { + assert(p); + _log_set_prefix(*p, true); + return NULL; +} + +#define LOG_SET_PREFIX(prefix) \ + _cleanup_(_log_unset_prefixp) _unused_ const char *CONCATENATE(_cleanup_log_unset_prefix_, UNIQ) = _log_set_prefix(prefix, false); + /* * The log context allows attaching extra metadata to log messages written to the journal via log.h. We keep * track of a thread local log context onto which we can push extra metadata fields that should be logged. @@ -597,50 +609,70 @@ typedef struct LogContext LogContext; bool log_context_enabled(void); -LogContext* log_context_attach(LogContext *c); -LogContext* log_context_detach(LogContext *c); +LogContext* log_context_new(const char *key, const char *value); +LogContext* log_context_new_strv(char **fields, bool owned); +LogContext* log_context_new_iov(struct iovec *input_iovec, size_t n_input_iovec, bool owned); -LogContext* log_context_new(char **fields, bool owned); -LogContext* log_context_free(LogContext *c); +/* Same as log_context_new(), but frees the given fields strv/iovec on failure. */ +LogContext* log_context_new_strv_consume(char **fields); +LogContext* log_context_new_iov_consume(struct iovec *input_iovec, size_t n_input_iovec); -/* Same as log_context_new(), but frees the given fields strv on failure. */ -LogContext* log_context_new_consume(char **fields); +LogContext *log_context_ref(LogContext *c); +LogContext *log_context_unref(LogContext *c); + +DEFINE_TRIVIAL_CLEANUP_FUNC(LogContext*, log_context_unref); /* Returns the number of attached log context objects. */ size_t log_context_num_contexts(void); /* Returns the number of fields in all attached log contexts. */ size_t log_context_num_fields(void); -DEFINE_TRIVIAL_CLEANUP_FUNC(LogContext*, log_context_detach); -DEFINE_TRIVIAL_CLEANUP_FUNC(LogContext*, log_context_free); - #define LOG_CONTEXT_PUSH(...) \ LOG_CONTEXT_PUSH_STRV(STRV_MAKE(__VA_ARGS__)) #define LOG_CONTEXT_PUSHF(...) \ LOG_CONTEXT_PUSH(snprintf_ok((char[LINE_MAX]) {}, LINE_MAX, __VA_ARGS__)) +#define _LOG_CONTEXT_PUSH_KEY_VALUE(key, value, c) \ + _unused_ _cleanup_(log_context_unrefp) LogContext *c = log_context_new(key, value); + +#define LOG_CONTEXT_PUSH_KEY_VALUE(key, value) \ + _LOG_CONTEXT_PUSH_KEY_VALUE(key, value, UNIQ_T(c, UNIQ)) + #define _LOG_CONTEXT_PUSH_STRV(strv, c) \ - _unused_ _cleanup_(log_context_freep) LogContext *c = log_context_new(strv, /*owned=*/ false); + _unused_ _cleanup_(log_context_unrefp) LogContext *c = log_context_new_strv(strv, /*owned=*/ false); #define LOG_CONTEXT_PUSH_STRV(strv) \ _LOG_CONTEXT_PUSH_STRV(strv, UNIQ_T(c, UNIQ)) -/* LOG_CONTEXT_CONSUME_STR()/LOG_CONTEXT_CONSUME_STRV() are identical to - * LOG_CONTEXT_PUSH_STR()/LOG_CONTEXT_PUSH_STRV() except they take ownership of the given str/strv argument. +#define _LOG_CONTEXT_PUSH_IOV(input_iovec, n_input_iovec, c) \ + _unused_ _cleanup_(log_context_unrefp) LogContext *c = log_context_new_iov(input_iovec, n_input_iovec, /*owned=*/ false); + +#define LOG_CONTEXT_PUSH_IOV(input_iovec, n_input_iovec) \ + _LOG_CONTEXT_PUSH_IOV(input_iovec, n_input_iovec, UNIQ_T(c, UNIQ)) + +/* LOG_CONTEXT_CONSUME_STR()/LOG_CONTEXT_CONSUME_STRV()/LOG_CONTEXT_CONSUME_IOV() are identical to + * LOG_CONTEXT_PUSH_STR()/LOG_CONTEXT_PUSH_STRV()/LOG_CONTEXT_PUSH_IOV() except they take ownership of the + * given str/strv argument. */ #define _LOG_CONTEXT_CONSUME_STR(s, c, strv) \ _unused_ _cleanup_strv_free_ strv = strv_new(s); \ if (!strv) \ free(s); \ - _unused_ _cleanup_(log_context_freep) LogContext *c = log_context_new_consume(TAKE_PTR(strv)) + _unused_ _cleanup_(log_context_unrefp) LogContext *c = log_context_new_strv_consume(TAKE_PTR(strv)) #define LOG_CONTEXT_CONSUME_STR(s) \ _LOG_CONTEXT_CONSUME_STR(s, UNIQ_T(c, UNIQ), UNIQ_T(sv, UNIQ)) #define _LOG_CONTEXT_CONSUME_STRV(strv, c) \ - _unused_ _cleanup_(log_context_freep) LogContext *c = log_context_new_consume(strv); + _unused_ _cleanup_(log_context_unrefp) LogContext *c = log_context_new_strv_consume(strv); #define LOG_CONTEXT_CONSUME_STRV(strv) \ _LOG_CONTEXT_CONSUME_STRV(strv, UNIQ_T(c, UNIQ)) + +#define _LOG_CONTEXT_CONSUME_IOV(input_iovec, n_input_iovec, c) \ + _unused_ _cleanup_(log_context_unrefp) LogContext *c = log_context_new_iov_consume(input_iovec, n_input_iovec); + +#define LOG_CONTEXT_CONSUME_IOV(input_iovec, n_input_iovec) \ + _LOG_CONTEXT_CONSUME_IOV(input_iovec, n_input_iovec, UNIQ_T(c, UNIQ)) diff --git a/src/libnm-systemd-shared/src/basic/macro.h b/src/libnm-systemd-shared/src/basic/macro.h index 6ceaaea496..ce7350cb0e 100644 --- a/src/libnm-systemd-shared/src/basic/macro.h +++ b/src/libnm-systemd-shared/src/basic/macro.h @@ -257,6 +257,11 @@ static inline int __coverity_check_and_return__(int condition) { #define char_array_0(x) x[sizeof(x)-1] = 0; #define sizeof_field(struct_type, member) sizeof(((struct_type *) 0)->member) +#define endoffsetof_field(struct_type, member) (offsetof(struct_type, member) + sizeof_field(struct_type, member)) + +/* Maximum buffer size needed for formatting an unsigned integer type as hex, including space for '0x' + * prefix and trailing NUL suffix. */ +#define HEXADECIMAL_STR_MAX(type) (2 + sizeof(type) * 2 + 1) /* Returns the number of chars needed to format variables of the specified type as a decimal string. Adds in * extra space for a negative '-' prefix for signed types. Includes space for the trailing NUL. */ @@ -305,19 +310,14 @@ static inline int __coverity_check_and_return__(int condition) { p != (typeof(p)) POINTER_MAX; \ p = *(++_l)) -/* Define C11 thread_local attribute even on older gcc compiler - * version */ -#ifndef thread_local -/* - * Don't break on glibc < 2.16 that doesn't define __STDC_NO_THREADS__ - * see https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53769 - */ -#if __STDC_VERSION__ >= 201112L && !(defined(__STDC_NO_THREADS__) || (defined(__GNU_LIBRARY__) && __GLIBC__ == 2 && __GLIBC_MINOR__ < 16)) -#define thread_local _Thread_local -#else -#define thread_local __thread -#endif -#endif +#define _FOREACH_ARRAY(i, array, num, m, end) \ + for (typeof(array[0]) *i = (array), *end = ({ \ + typeof(num) m = (num); \ + (i && m > 0) ? i + m : NULL; \ + }); end && i < end; i++) + +#define FOREACH_ARRAY(i, array, num) \ + _FOREACH_ARRAY(i, array, num, UNIQ_T(m, UNIQ), UNIQ_T(end, UNIQ)) #define DEFINE_TRIVIAL_DESTRUCTOR(name, type, func) \ static inline void name(type *p) { \ @@ -427,7 +427,7 @@ typedef struct { assert_cc(sizeof(dummy_t) == 0); -/* A little helper for subtracting 1 off a pointer in a safe UB-free way. This is intended to be used for for +/* A little helper for subtracting 1 off a pointer in a safe UB-free way. This is intended to be used for * loops that count down from a high pointer until some base. A naive loop would implement this like this: * * for (p = end-1; p >= base; p--) … @@ -443,4 +443,13 @@ assert_cc(sizeof(dummy_t) == 0); _q && _q > (base) ? &_q[-1] : NULL; \ }) +/* Iterate through each variadic arg. All must be the same type as 'entry' or must be implicitly + * convertible. The iteration variable 'entry' must already be defined. */ +#define VA_ARGS_FOREACH(entry, ...) \ + _VA_ARGS_FOREACH(entry, UNIQ_T(_entries_, UNIQ), UNIQ_T(_current_, UNIQ), ##__VA_ARGS__) +#define _VA_ARGS_FOREACH(entry, _entries_, _current_, ...) \ + for (typeof(entry) _entries_[] = { __VA_ARGS__ }, *_current_ = _entries_; \ + ((long)(_current_ - _entries_) < (long)ELEMENTSOF(_entries_)) && ({ entry = *_current_; true; }); \ + _current_++) + #include "log.h" diff --git a/src/libnm-systemd-shared/src/basic/memory-util.c b/src/libnm-systemd-shared/src/basic/memory-util.c index e7b98b5c5c..c1e0a742b1 100644 --- a/src/libnm-systemd-shared/src/basic/memory-util.c +++ b/src/libnm-systemd-shared/src/basic/memory-util.c @@ -5,6 +5,7 @@ #include <unistd.h> #include "memory-util.h" +#include "missing_threads.h" size_t page_size(void) { static thread_local size_t pgsz = 0; diff --git a/src/libnm-systemd-shared/src/basic/memory-util.h b/src/libnm-systemd-shared/src/basic/memory-util.h index 428ccc210c..d26a0918e1 100644 --- a/src/libnm-systemd-shared/src/basic/memory-util.h +++ b/src/libnm-systemd-shared/src/basic/memory-util.h @@ -111,3 +111,37 @@ static inline void erase_and_freep(void *p) { static inline void erase_char(char *p) { explicit_bzero_safe(p, sizeof(char)); } + +/* An automatic _cleanup_-like logic for destroy arrays (i.e. pointers + size) when leaving scope */ +typedef struct ArrayCleanup { + void **parray; + size_t *pn; + free_array_func_t pfunc; +} ArrayCleanup; + +static inline void array_cleanup(const ArrayCleanup *c) { + assert(c); + + assert(!c->parray == !c->pn); + + if (!c->parray) + return; + + if (*c->parray) { + assert(c->pfunc); + c->pfunc(*c->parray, *c->pn); + *c->parray = NULL; + } + + *c->pn = 0; +} + +#define CLEANUP_ARRAY(array, n, func) \ + _cleanup_(array_cleanup) _unused_ const ArrayCleanup CONCATENATE(_cleanup_array_, UNIQ) = { \ + .parray = (void**) &(array), \ + .pn = &(n), \ + .pfunc = (free_array_func_t) ({ \ + void (*_f)(typeof(array[0]) *a, size_t b) = func; \ + _f; \ + }), \ + } diff --git a/src/libnm-systemd-shared/src/basic/mempool.c b/src/libnm-systemd-shared/src/basic/mempool.c index 53a719e00e..e467d7065e 100644 --- a/src/libnm-systemd-shared/src/basic/mempool.c +++ b/src/libnm-systemd-shared/src/basic/mempool.c @@ -5,6 +5,7 @@ #include <stdint.h> #include <stdlib.h> +#include "format-util.h" #include "macro.h" #include "memory-util.h" #include "mempool.h" @@ -15,21 +16,26 @@ struct pool { size_t n_used; }; +static void* pool_ptr(struct pool *p) { + return ((uint8_t*) ASSERT_PTR(p)) + ALIGN(sizeof(struct pool)); +} + void* mempool_alloc_tile(struct mempool *mp) { size_t i; /* When a tile is released we add it to the list and simply * place the next pointer at its offset 0. */ + assert(mp); assert(mp->tile_size >= sizeof(void*)); assert(mp->at_least > 0); if (mp->freelist) { - void *r; + void *t; - r = mp->freelist; - mp->freelist = * (void**) mp->freelist; - return r; + t = mp->freelist; + mp->freelist = *(void**) mp->freelist; + return t; } if (_unlikely_(!mp->first_pool) || @@ -55,7 +61,7 @@ void* mempool_alloc_tile(struct mempool *mp) { i = mp->first_pool->n_used++; - return ((uint8_t*) mp->first_pool) + ALIGN(sizeof(struct pool)) + i*mp->tile_size; + return (uint8_t*) pool_ptr(mp->first_pool) + i*mp->tile_size; } void* mempool_alloc0_tile(struct mempool *mp) { @@ -67,19 +73,105 @@ void* mempool_alloc0_tile(struct mempool *mp) { return p; } -void mempool_free_tile(struct mempool *mp, void *p) { - * (void**) p = mp->freelist; +void* mempool_free_tile(struct mempool *mp, void *p) { + assert(mp); + + if (!p) + return NULL; + + *(void**) p = mp->freelist; mp->freelist = p; + + return NULL; +} + +static bool pool_contains(struct mempool *mp, struct pool *p, void *ptr) { + size_t off; + void *a; + + assert(mp); + assert(p); + + if (!ptr) + return false; + + a = pool_ptr(p); + if ((uint8_t*) ptr < (uint8_t*) a) + return false; + + off = (uint8_t*) ptr - (uint8_t*) a; + if (off >= mp->tile_size * p->n_tiles) + return false; + + assert(off % mp->tile_size == 0); + return true; +} + +static bool pool_is_unused(struct mempool *mp, struct pool *p) { + assert(mp); + assert(p); + + if (p->n_used == 0) + return true; + + /* Check if all tiles in this specific pool are in the freelist. */ + size_t n = 0; + void *i = mp->freelist; + while (i) { + if (pool_contains(mp, p, i)) + n++; + + i = *(void**) i; + } + + assert(n <= p->n_used); + + return n == p->n_used; +} + +static void pool_unlink(struct mempool *mp, struct pool *p) { + size_t m = 0; + + assert(mp); + assert(p); + + if (p->n_used == 0) + return; + + void **i = &mp->freelist; + while (*i) { + void *d = *i; + + if (pool_contains(mp, p, d)) { + *i = *(void**) d; + m++; + + if (m == p->n_used) + break; + } else + i = (void**) d; + } } -#if VALGRIND -void mempool_drop(struct mempool *mp) { - struct pool *p = mp->first_pool; - while (p) { - struct pool *n; - n = p->next; - free(p); - p = n; +void mempool_trim(struct mempool *mp) { + size_t trimmed = 0, left = 0; + + assert(mp); + + struct pool **p = &mp->first_pool; + while (*p) { + struct pool *d = *p; + + if (pool_is_unused(mp, d)) { + trimmed += d->n_tiles * mp->tile_size; + pool_unlink(mp, d); + *p = d->next; + free(d); + } else { + left += d->n_tiles * mp->tile_size; + p = &d->next; + } } + + log_debug("Trimmed %s from memory pool %p. (%s left)", FORMAT_BYTES(trimmed), mp, FORMAT_BYTES(left)); } -#endif diff --git a/src/libnm-systemd-shared/src/basic/mempool.h b/src/libnm-systemd-shared/src/basic/mempool.h index 539ccbdf06..ba588af451 100644 --- a/src/libnm-systemd-shared/src/basic/mempool.h +++ b/src/libnm-systemd-shared/src/basic/mempool.h @@ -10,12 +10,12 @@ struct mempool { struct pool *first_pool; void *freelist; size_t tile_size; - unsigned at_least; + size_t at_least; }; void* mempool_alloc_tile(struct mempool *mp); void* mempool_alloc0_tile(struct mempool *mp); -void mempool_free_tile(struct mempool *mp, void *p); +void* mempool_free_tile(struct mempool *mp, void *p); #define DEFINE_MEMPOOL(pool_name, tile_type, alloc_at_least) \ static struct mempool pool_name = { \ @@ -25,6 +25,4 @@ static struct mempool pool_name = { \ __attribute__((weak)) bool mempool_enabled(void); -#if VALGRIND -void mempool_drop(struct mempool *mp); -#endif +void mempool_trim(struct mempool *mp); diff --git a/src/libnm-systemd-shared/src/basic/missing_fcntl.h b/src/libnm-systemd-shared/src/basic/missing_fcntl.h index 00937d2af0..24b2dc3119 100644 --- a/src/libnm-systemd-shared/src/basic/missing_fcntl.h +++ b/src/libnm-systemd-shared/src/basic/missing_fcntl.h @@ -25,6 +25,14 @@ #define F_SEAL_WRITE 0x0008 /* prevent writes */ #endif +#ifndef F_SEAL_FUTURE_WRITE +#define F_SEAL_FUTURE_WRITE 0x0010 /* prevent future writes while mapped */ +#endif + +#ifndef F_SEAL_EXEC +#define F_SEAL_EXEC 0x0020 /* prevent chmod modifying exec bits */ +#endif + #ifndef F_OFD_GETLK #define F_OFD_GETLK 36 #define F_OFD_SETLK 37 @@ -58,3 +66,12 @@ #ifndef O_TMPFILE #define O_TMPFILE (__O_TMPFILE | O_DIRECTORY) #endif + +/* So O_LARGEFILE is generally implied by glibc, and defined to zero hence, because we only build in LFS + * mode. However, when invoking fcntl(F_GETFL) the flag is ORed into the result anyway — glibc does not mask + * it away. Which sucks. Let's define the actual value here, so that we can mask it ourselves. */ +#if O_LARGEFILE != 0 +#define RAW_O_LARGEFILE O_LARGEFILE +#else +#define RAW_O_LARGEFILE 0100000 +#endif diff --git a/src/libnm-systemd-shared/src/basic/missing_syscall.h b/src/libnm-systemd-shared/src/basic/missing_syscall.h index d7296e74a1..610a7cef2e 100644 --- a/src/libnm-systemd-shared/src/basic/missing_syscall.h +++ b/src/libnm-systemd-shared/src/basic/missing_syscall.h @@ -598,10 +598,22 @@ static inline int missing_fsopen(const char *fsname, unsigned flags) { #if !HAVE_FSCONFIG +#ifndef FSCONFIG_SET_FLAG +#define FSCONFIG_SET_FLAG 0 /* Set parameter, supplying no value */ +#endif + #ifndef FSCONFIG_SET_STRING #define FSCONFIG_SET_STRING 1 /* Set parameter, supplying a string value */ #endif +#ifndef FSCONFIG_SET_FD +#define FSCONFIG_SET_FD 5 /* Set parameter, supplying an object by fd */ +#endif + +#ifndef FSCONFIG_CMD_CREATE +#define FSCONFIG_CMD_CREATE 6 /* Invoke superblock creation */ +#endif + static inline int missing_fsconfig(int fd, unsigned cmd, const char *key, const void *value, int aux) { # if defined __NR_fsconfig && __NR_fsconfig >= 0 return syscall(__NR_fsconfig, fd, cmd, key, value, aux); @@ -616,6 +628,26 @@ static inline int missing_fsconfig(int fd, unsigned cmd, const char *key, const /* ======================================================================= */ +#if !HAVE_FSMOUNT + +#ifndef FSMOUNT_CLOEXEC +#define FSMOUNT_CLOEXEC 0x00000001 +#endif + +static inline int missing_fsmount(int fd, unsigned flags, unsigned ms_flags) { +# if defined __NR_fsmount && __NR_fsmount >= 0 + return syscall(__NR_fsmount, fd, flags, ms_flags); +# else + errno = ENOSYS; + return -1; +# endif +} + +# define fsmount missing_fsmount +#endif + +/* ======================================================================= */ + #if !HAVE_GETDENTS64 static inline ssize_t missing_getdents64(int fd, void *buffer, size_t length) { diff --git a/src/libnm-systemd-shared/src/basic/missing_threads.h b/src/libnm-systemd-shared/src/basic/missing_threads.h new file mode 100644 index 0000000000..fb3b72249b --- /dev/null +++ b/src/libnm-systemd-shared/src/basic/missing_threads.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +/* If threads.h doesn't exist, then define our own thread_local to match C11's thread_local. */ +#if HAVE_THREADS_H +# include <threads.h> +#elif !(defined(thread_local)) +/* Don't break on glibc < 2.16 that doesn't define __STDC_NO_THREADS__ + * see https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53769 */ +# if __STDC_VERSION__ >= 201112L && !(defined(__STDC_NO_THREADS__) || (defined(__GNU_LIBRARY__) && __GLIBC__ == 2 && __GLIBC_MINOR__ < 16)) +# define thread_local _Thread_local +# else +# define thread_local __thread +# endif +#endif diff --git a/src/libnm-systemd-shared/src/basic/origin-id.h b/src/libnm-systemd-shared/src/basic/origin-id.h new file mode 100644 index 0000000000..c55b0a368a --- /dev/null +++ b/src/libnm-systemd-shared/src/basic/origin-id.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <pthread.h> + +#include "random-util.h" + +/* This pattern needs to be repeated exactly in multiple modules, so macro it. + * To ensure an object is not passed into a different module (e.g.: when two shared objects statically + * linked to libsystemd get loaded in the same process, and the object created by one is passed to the + * other, see https://github.com/systemd/systemd/issues/27216), create a random static global random + * (mixed with PID, so that we can also check for reuse after fork) that is stored in the object and + * checked by public API on use. */ +#define _DEFINE_ORIGIN_ID_HELPERS(type, name, scope) \ +static uint64_t origin_id; \ + \ +static void origin_id_initialize(void) { \ + origin_id = random_u64(); \ +} \ + \ +static uint64_t origin_id_query(void) { \ + static pthread_once_t once = PTHREAD_ONCE_INIT; \ + assert_se(pthread_once(&once, origin_id_initialize) == 0); \ + return origin_id ^ getpid_cached(); \ +} \ + \ +scope bool name##_origin_changed(type *p) { \ + assert(p); \ + return p->origin_id != origin_id_query(); \ +} + +#define DEFINE_ORIGIN_ID_HELPERS(type, name) \ + _DEFINE_ORIGIN_ID_HELPERS(type, name,); + +#define DEFINE_PRIVATE_ORIGIN_ID_HELPERS(type, name) \ + _DEFINE_ORIGIN_ID_HELPERS(type, name, static); diff --git a/src/libnm-systemd-shared/src/basic/parse-util.c b/src/libnm-systemd-shared/src/basic/parse-util.c index 71a16743ae..2b22039c1c 100644 --- a/src/libnm-systemd-shared/src/basic/parse-util.c +++ b/src/libnm-systemd-shared/src/basic/parse-util.c @@ -53,7 +53,6 @@ int parse_pid(const char *s, pid_t* ret_pid) { int r; assert(s); - assert(ret_pid); r = safe_atolu(s, &ul); if (r < 0) @@ -67,7 +66,8 @@ int parse_pid(const char *s, pid_t* ret_pid) { if (!pid_is_valid(pid)) return -ERANGE; - *ret_pid = pid; + if (ret_pid) + *ret_pid = pid; return 0; } @@ -339,6 +339,21 @@ int parse_errno(const char *t) { } #endif /* NM_IGNORED */ +int parse_fd(const char *t) { + int r, fd; + + assert(t); + + r = safe_atoi(t, &fd); + if (r < 0) + return r; + + if (fd < 0) + return -EBADF; + + return fd; +} + static const char *mangle_base(const char *s, unsigned *base) { const char *k; diff --git a/src/libnm-systemd-shared/src/basic/parse-util.h b/src/libnm-systemd-shared/src/basic/parse-util.h index 877199529d..c480407c2a 100644 --- a/src/libnm-systemd-shared/src/basic/parse-util.h +++ b/src/libnm-systemd-shared/src/basic/parse-util.h @@ -21,6 +21,7 @@ int parse_size(const char *t, uint64_t base, uint64_t *size); int parse_sector_size(const char *t, uint64_t *ret); int parse_range(const char *t, unsigned *lower, unsigned *upper); int parse_errno(const char *t); +int parse_fd(const char *t); #define SAFE_ATO_REFUSE_PLUS_MINUS (1U << 30) #define SAFE_ATO_REFUSE_LEADING_ZERO (1U << 29) diff --git a/src/libnm-systemd-shared/src/basic/path-util.c b/src/libnm-systemd-shared/src/basic/path-util.c index c214b8b007..a2af9e0ce2 100644 --- a/src/libnm-systemd-shared/src/basic/path-util.c +++ b/src/libnm-systemd-shared/src/basic/path-util.c @@ -10,7 +10,7 @@ #include <unistd.h> #include "alloc-util.h" -#include "chase-symlinks.h" +#include "chase.h" #include "extract-word.h" #include "fd-util.h" #include "fs-util.h" @@ -25,7 +25,7 @@ #if 0 /* NM_IGNORED */ int path_split_and_make_absolute(const char *p, char ***ret) { - char **l; + _cleanup_strv_free_ char **l = NULL; int r; assert(p); @@ -36,12 +36,10 @@ int path_split_and_make_absolute(const char *p, char ***ret) { return -ENOMEM; r = path_strv_make_absolute_cwd(l); - if (r < 0) { - strv_free(l); + if (r < 0) return r; - } - *ret = l; + *ret = TAKE_PTR(l); return r; } @@ -288,7 +286,7 @@ char **path_strv_resolve(char **l, const char *root) { } else t = *s; - r = chase_symlinks(t, root, 0, &u, NULL); + r = chase(t, root, 0, &u, NULL); if (r == -ENOENT) { if (root) { u = TAKE_PTR(orig); @@ -491,29 +489,37 @@ int path_compare(const char *a, const char *b) { } } -bool path_equal_or_files_same(const char *a, const char *b, int flags) { - return path_equal(a, b) || files_same(a, b, flags) > 0; +bool path_equal_or_inode_same(const char *a, const char *b, int flags) { + return path_equal(a, b) || inode_same(a, b, flags) > 0; } -bool path_equal_filename(const char *a, const char *b) { - _cleanup_free_ char *a_basename = NULL, *b_basename = NULL; - int r; +int path_compare_filename(const char *a, const char *b) { + _cleanup_free_ char *fa = NULL, *fb = NULL; + int r, j, k; - assert(a); - assert(b); + /* Order NULL before non-NULL */ + r = CMP(!!a, !!b); + if (r != 0) + return r; - r = path_extract_filename(a, &a_basename); - if (r < 0) { - log_debug_errno(r, "Failed to parse basename of %s: %m", a); - return false; - } - r = path_extract_filename(b, &b_basename); - if (r < 0) { - log_debug_errno(r, "Failed to parse basename of %s: %m", b); - return false; - } + j = path_extract_filename(a, &fa); + k = path_extract_filename(b, &fb); - return path_equal(a_basename, b_basename); + /* When one of paths is "." or root, then order it earlier. */ + r = CMP(j != -EADDRNOTAVAIL, k != -EADDRNOTAVAIL); + if (r != 0) + return r; + + /* When one of paths is invalid (or we get OOM), order invalid path after valid one. */ + r = CMP(j < 0, k < 0); + if (r != 0) + return r; + + /* fallback to use strcmp() if both paths are invalid. */ + if (j < 0) + return strcmp(a, b); + + return strcmp(fa, fb); } char* path_extend_internal(char **x, ...) { @@ -626,16 +632,13 @@ static int find_executable_impl(const char *name, const char *root, char **ret_f assert(name); - /* Function chase_symlinks() is invoked only when root is not NULL, as using it regardless of + /* Function chase() is invoked only when root is not NULL, as using it regardless of * root value would alter the behavior of existing callers for example: /bin/sleep would become * /usr/bin/sleep when find_executables is called. Hence, this function should be invoked when * needed to avoid unforeseen regression or other complicated changes. */ if (root) { - r = chase_symlinks(name, - root, - CHASE_PREFIX_ROOT, - &path_name, - /* ret_fd= */ NULL); /* prefix root to name in case full paths are not specified */ + /* prefix root to name in case full paths are not specified */ + r = chase(name, root, CHASE_PREFIX_ROOT, &path_name, /* ret_fd= */ NULL); if (r < 0) return r; @@ -902,6 +905,8 @@ static const char *skip_slash_or_dot_backward(const char *path, const char *q) { continue; if (q > path && strneq(q - 1, "/.", 2)) continue; + if (q == path && *q == '.') + continue; break; } return q; @@ -926,6 +931,12 @@ int path_find_last_component(const char *path, bool accept_dot_dot, const char * * ret: "bbbbb/cc//././" * return value: 5 (== strlen("bbbbb")) * + * Input: path: "//.//aaa///bbbbb/cc//././" + * next: "///bbbbb/cc//././" + * Output: next: "//.//aaa///bbbbb/cc//././" (next == path) + * ret: "aaa///bbbbb/cc//././" + * return value: 3 (== strlen("aaa")) + * * Input: path: "/", ".", "", or NULL * Output: next: equivalent to path * ret: NULL diff --git a/src/libnm-systemd-shared/src/basic/path-util.h b/src/libnm-systemd-shared/src/basic/path-util.h index 1bdc0d406f..fee6e8ee49 100644 --- a/src/libnm-systemd-shared/src/basic/path-util.h +++ b/src/libnm-systemd-shared/src/basic/path-util.h @@ -68,15 +68,18 @@ char *path_startswith_full(const char *path, const char *prefix, bool accept_dot static inline char* path_startswith(const char *path, const char *prefix) { return path_startswith_full(path, prefix, true); } -int path_compare(const char *a, const char *b) _pure_; +int path_compare(const char *a, const char *b) _pure_; static inline bool path_equal(const char *a, const char *b) { return path_compare(a, b) == 0; } -bool path_equal_or_files_same(const char *a, const char *b, int flags); -/* Compares only the last portion of the input paths, ie: the filenames */ -bool path_equal_filename(const char *a, const char *b); +int path_compare_filename(const char *a, const char *b); +static inline bool path_equal_filename(const char *a, const char *b) { + return path_compare_filename(a, b) == 0; +} + +bool path_equal_or_inode_same(const char *a, const char *b, int flags); char* path_extend_internal(char **x, ...); #define path_extend(x, ...) path_extend_internal(x, __VA_ARGS__, POINTER_MAX) diff --git a/src/libnm-systemd-shared/src/basic/process-util.c b/src/libnm-systemd-shared/src/basic/process-util.c index a45e32bcc1..8601e0da54 100644 --- a/src/libnm-systemd-shared/src/basic/process-util.c +++ b/src/libnm-systemd-shared/src/basic/process-util.c @@ -40,6 +40,7 @@ #include "memory-util.h" #include "missing_sched.h" #include "missing_syscall.h" +#include "missing_threads.h" #include "mountpoint-util.h" #include "namespace-util.h" #include "nulstr-util.h" @@ -227,18 +228,12 @@ int get_process_cmdline(pid_t pid, size_t max_columns, ProcessCmdlineFlags flags _cleanup_strv_free_ char **args = NULL; - args = strv_parse_nulstr(t, k); + /* Drop trailing NULs, otherwise strv_parse_nulstr() adds additional empty strings at the end. + * See also issue #21186. */ + args = strv_parse_nulstr_full(t, k, /* drop_trailing_nuls = */ true); if (!args) return -ENOMEM; - /* Drop trailing empty strings. See issue #21186. */ - STRV_FOREACH_BACKWARDS(p, args) { - if (!isempty(*p)) - break; - - *p = mfree(*p); - } - ans = quote_command_line(args, shflags); if (!ans) return -ENOMEM; @@ -264,6 +259,28 @@ int get_process_cmdline(pid_t pid, size_t max_columns, ProcessCmdlineFlags flags return 0; } +int get_process_cmdline_strv(pid_t pid, ProcessCmdlineFlags flags, char ***ret) { + _cleanup_free_ char *t = NULL; + char **args; + size_t k; + int r; + + assert(pid >= 0); + assert((flags & ~PROCESS_CMDLINE_COMM_FALLBACK) == 0); + assert(ret); + + r = get_process_cmdline_nulstr(pid, SIZE_MAX, flags, &t, &k); + if (r < 0) + return r; + + args = strv_parse_nulstr_full(t, k, /* drop_trailing_nuls = */ true); + if (!args) + return -ENOMEM; + + *ret = args; + return 0; +} + int container_get_leader(const char *machine, pid_t *pid) { _cleanup_free_ char *s = NULL, *class = NULL; const char *p; @@ -602,6 +619,8 @@ int get_process_umask(pid_t pid, mode_t *ret) { r = get_proc_field(p, "Umask", WHITESPACE, &m); if (r == -ENOENT) return -ESRCH; + if (r < 0) + return r; return parse_mode(m, ret); } @@ -933,7 +952,7 @@ int pid_from_same_root_fs(pid_t pid) { root = procfs_file_alloca(pid, "root"); - return files_same(root, "/proc/1/root", 0); + return inode_same(root, "/proc/1/root", 0); } #endif /* NM_IGNORED */ @@ -1142,6 +1161,7 @@ static void restore_sigsetp(sigset_t **ssp) { int safe_fork_full( const char *name, + const int stdio_fds[3], const int except_fds[], size_t n_except_fds, ForkFlags flags, @@ -1193,7 +1213,7 @@ int safe_fork_full( else pid = fork(); if (pid < 0) - return log_full_errno(prio, errno, "Failed to fork: %m"); + return log_full_errno(prio, errno, "Failed to fork off '%s': %m", strna(name)); if (pid > 0) { /* We are in the parent process */ @@ -1229,6 +1249,7 @@ int safe_fork_full( /* Close the logs if requested, before we log anything. And make sure we reopen it if needed. */ log_close(); log_set_open_when_needed(true); + log_settle_target(); } if (name) { @@ -1300,6 +1321,27 @@ int safe_fork_full( } } + if (flags & FORK_REARRANGE_STDIO) { + if (stdio_fds) { + r = rearrange_stdio(stdio_fds[0], stdio_fds[1], stdio_fds[2]); + if (r < 0) { + log_full_errno(prio, r, "Failed to rearrange stdio fds: %m"); + _exit(EXIT_FAILURE); + } + } else { + r = make_null_stdio(); + if (r < 0) { + log_full_errno(prio, r, "Failed to connect stdin/stdout to /dev/null: %m"); + _exit(EXIT_FAILURE); + } + } + } else if (flags & FORK_STDOUT_TO_STDERR) { + if (dup2(STDERR_FILENO, STDOUT_FILENO) < 0) { + log_full_errno(prio, errno, "Failed to connect stdout to stderr: %m"); + _exit(EXIT_FAILURE); + } + } + if (flags & FORK_CLOSE_ALL_FDS) { /* Close the logs here in case it got reopened above, as close_all_fds() would close them for us */ log_close(); @@ -1325,24 +1367,18 @@ int safe_fork_full( log_set_open_when_needed(false); } - if (flags & FORK_NULL_STDIO) { - r = make_null_stdio(); + if (flags & FORK_RLIMIT_NOFILE_SAFE) { + r = rlimit_nofile_safe(); if (r < 0) { - log_full_errno(prio, r, "Failed to connect stdin/stdout to /dev/null: %m"); - _exit(EXIT_FAILURE); - } - - } else if (flags & FORK_STDOUT_TO_STDERR) { - if (dup2(STDERR_FILENO, STDOUT_FILENO) < 0) { - log_full_errno(prio, errno, "Failed to connect stdout to stderr: %m"); + log_full_errno(prio, r, "Failed to lower RLIMIT_NOFILE's soft limit to 1K: %m"); _exit(EXIT_FAILURE); } } - if (flags & FORK_RLIMIT_NOFILE_SAFE) { - r = rlimit_nofile_safe(); + if (!FLAGS_SET(flags, FORK_KEEP_NOTIFY_SOCKET)) { + r = RET_NERRNO(unsetenv("NOTIFY_SOCKET")); if (r < 0) { - log_full_errno(prio, r, "Failed to lower RLIMIT_NOFILE's soft limit to 1K: %m"); + log_full_errno(prio, r, "Failed to unset $NOTIFY_SOCKET: %m"); _exit(EXIT_FAILURE); } } @@ -1372,7 +1408,10 @@ int namespace_fork( * process. This ensures that we are fully a member of the destination namespace, with pidns an all, so that * /proc/self/fd works correctly. */ - r = safe_fork_full(outer_name, except_fds, n_except_fds, (flags|FORK_DEATHSIG) & ~(FORK_REOPEN_LOG|FORK_NEW_MOUNTNS|FORK_MOUNTNS_SLAVE), ret_pid); + r = safe_fork_full(outer_name, + NULL, + except_fds, n_except_fds, + (flags|FORK_DEATHSIG) & ~(FORK_REOPEN_LOG|FORK_NEW_MOUNTNS|FORK_MOUNTNS_SLAVE), ret_pid); if (r < 0) return r; if (r == 0) { @@ -1387,7 +1426,10 @@ int namespace_fork( } /* We mask a few flags here that either make no sense for the grandchild, or that we don't have to do again */ - r = safe_fork_full(inner_name, except_fds, n_except_fds, flags & ~(FORK_WAIT|FORK_RESET_SIGNALS|FORK_CLOSE_ALL_FDS|FORK_NULL_STDIO), &pid); + r = safe_fork_full(inner_name, + NULL, + except_fds, n_except_fds, + flags & ~(FORK_WAIT|FORK_RESET_SIGNALS|FORK_CLOSE_ALL_FDS|FORK_REARRANGE_STDIO), &pid); if (r < 0) _exit(EXIT_FAILURE); if (r == 0) { @@ -1440,6 +1482,15 @@ int pidfd_get_pid(int fd, pid_t *ret) { char *p; int r; + /* Converts a pidfd into a pid. Well known errors: + * + * -EBADF → fd invalid + * -ENOSYS → /proc/ not mounted + * -ENOTTY → fd valid, but not a pidfd + * -EREMOTE → fd valid, but pid is in another namespace we cannot translate to the local one + * -ESRCH → fd valid, but process is already reaped + */ + if (fd < 0) return -EBADF; @@ -1447,22 +1498,22 @@ int pidfd_get_pid(int fd, pid_t *ret) { r = read_full_virtual_file(path, &fdinfo, NULL); if (r == -ENOENT) /* if fdinfo doesn't exist we assume the process does not exist */ - return -ESRCH; + return proc_mounted() > 0 ? -EBADF : -ENOSYS; if (r < 0) return r; - p = startswith(fdinfo, "Pid:"); - if (!p) { - p = strstr(fdinfo, "\nPid:"); - if (!p) - return -ENOTTY; /* not a pidfd? */ - - p += 5; - } + p = find_line_startswith(fdinfo, "Pid:"); + if (!p) + return -ENOTTY; /* not a pidfd? */ p += strspn(p, WHITESPACE); p[strcspn(p, WHITESPACE)] = 0; + if (streq(p, "0")) + return -EREMOTE; /* PID is in foreign PID namespace? */ + if (streq(p, "-1")) + return -ESRCH; /* refers to reaped process? */ + return parse_pid(p, ret); } @@ -1557,6 +1608,31 @@ _noreturn_ void freeze(void) { pause(); } +int get_process_threads(pid_t pid) { + _cleanup_free_ char *t = NULL; + const char *p; + int n, r; + + if (pid < 0) + return -EINVAL; + + p = procfs_file_alloca(pid, "status"); + + r = get_proc_field(p, "Threads", WHITESPACE, &t); + if (r == -ENOENT) + return proc_mounted() == 0 ? -ENOSYS : -ESRCH; + if (r < 0) + return r; + + r = safe_atoi(t, &n); + if (r < 0) + return r; + if (n < 0) + return -EINVAL; + + return n; +} + static const char *const sigchld_code_table[] = { [CLD_EXITED] = "exited", [CLD_KILLED] = "killed", diff --git a/src/libnm-systemd-shared/src/basic/process-util.h b/src/libnm-systemd-shared/src/basic/process-util.h index f96f7bf06b..5cf5c7c6ec 100644 --- a/src/libnm-systemd-shared/src/basic/process-util.h +++ b/src/libnm-systemd-shared/src/basic/process-util.h @@ -40,6 +40,7 @@ typedef enum ProcessCmdlineFlags { int get_process_comm(pid_t pid, char **ret); int get_process_cmdline(pid_t pid, size_t max_columns, ProcessCmdlineFlags flags, char **ret); +int get_process_cmdline_strv(pid_t pid, ProcessCmdlineFlags flags, char ***ret); int get_process_exe(pid_t pid, char **ret); int get_process_uid(pid_t pid, uid_t *ret); int get_process_gid(pid_t pid, gid_t *ret); @@ -145,7 +146,7 @@ typedef enum ForkFlags { FORK_CLOSE_ALL_FDS = 1 << 1, /* Close all open file descriptors in the child, except for 0,1,2 */ FORK_DEATHSIG = 1 << 2, /* Set PR_DEATHSIG in the child to SIGTERM */ FORK_DEATHSIG_SIGINT = 1 << 3, /* Set PR_DEATHSIG in the child to SIGINT */ - FORK_NULL_STDIO = 1 << 4, /* Connect 0,1,2 to /dev/null */ + FORK_REARRANGE_STDIO = 1 << 4, /* Connect 0,1,2 to specified fds or /dev/null */ FORK_REOPEN_LOG = 1 << 5, /* Reopen log connection */ FORK_LOG = 1 << 6, /* Log above LOG_DEBUG log level about failures */ FORK_WAIT = 1 << 7, /* Wait until child exited */ @@ -157,12 +158,19 @@ typedef enum ForkFlags { FORK_FLUSH_STDIO = 1 << 13, /* fflush() stdout (and stderr) before forking */ FORK_NEW_USERNS = 1 << 14, /* Run child in its own user namespace */ FORK_CLOEXEC_OFF = 1 << 15, /* In the child: turn off O_CLOEXEC on all fds in except_fds[] */ + FORK_KEEP_NOTIFY_SOCKET = 1 << 16, /* Unless this specified, $NOTIFY_SOCKET will be unset. */ } ForkFlags; -int safe_fork_full(const char *name, const int except_fds[], size_t n_except_fds, ForkFlags flags, pid_t *ret_pid); +int safe_fork_full( + const char *name, + const int stdio_fds[3], + const int except_fds[], + size_t n_except_fds, + ForkFlags flags, + pid_t *ret_pid); static inline int safe_fork(const char *name, ForkFlags flags, pid_t *ret_pid) { - return safe_fork_full(name, NULL, 0, flags, ret_pid); + return safe_fork_full(name, NULL, NULL, 0, flags, ret_pid); } int namespace_fork(const char *outer_name, const char *inner_name, const int except_fds[], size_t n_except_fds, ForkFlags flags, int pidns_fd, int mntns_fd, int netns_fd, int userns_fd, int root_fd, pid_t *ret_pid); @@ -191,3 +199,5 @@ int pidfd_verify_pid(int pidfd, pid_t pid); int setpriority_closest(int priority); _noreturn_ void freeze(void); + +int get_process_threads(pid_t pid); diff --git a/src/libnm-systemd-shared/src/basic/random-util.c b/src/libnm-systemd-shared/src/basic/random-util.c index 0b359da0d9..934d5e2531 100644 --- a/src/libnm-systemd-shared/src/basic/random-util.c +++ b/src/libnm-systemd-shared/src/basic/random-util.c @@ -26,6 +26,7 @@ #include "io-util.h" #include "missing_random.h" #include "missing_syscall.h" +#include "missing_threads.h" #include "parse-util.h" #include "random-util.h" #include "sha256.h" diff --git a/src/libnm-systemd-shared/src/basic/ratelimit.c b/src/libnm-systemd-shared/src/basic/ratelimit.c index b427db8467..a28c812210 100644 --- a/src/libnm-systemd-shared/src/basic/ratelimit.c +++ b/src/libnm-systemd-shared/src/basic/ratelimit.c @@ -12,7 +12,6 @@ bool ratelimit_below(RateLimit *r) { usec_t ts; - bool good = false; assert(r); @@ -23,20 +22,41 @@ bool ratelimit_below(RateLimit *r) { if (r->begin <= 0 || usec_sub_unsigned(ts, r->begin) > r->interval) { - r->begin = ts; + r->begin = ts; /* Start a new time window */ + r->num = 1; /* Reset counter */ + return true; + } - /* Reset counter */ - r->num = 0; - good = true; - } else if (r->num < r->burst) - good = true; + if (_unlikely_(r->num == UINT_MAX)) + return false; r->num++; - return good; + return r->num <= r->burst; } unsigned ratelimit_num_dropped(RateLimit *r) { assert(r); - return r->num > r->burst ? r->num - r->burst : 0; + if (r->num == UINT_MAX) /* overflow, return as special case */ + return UINT_MAX; + + return LESS_BY(r->num, r->burst); +} + +usec_t ratelimit_end(const RateLimit *rl) { + assert(rl); + + if (rl->begin == 0) + return 0; + + return usec_add(rl->begin, rl->interval); +} + +usec_t ratelimit_left(const RateLimit *rl) { + assert(rl); + + if (rl->begin == 0) + return 0; + + return usec_sub_unsigned(ratelimit_end(rl), now(CLOCK_MONOTONIC)); } diff --git a/src/libnm-systemd-shared/src/basic/ratelimit.h b/src/libnm-systemd-shared/src/basic/ratelimit.h index 2236189851..bb7160a895 100644 --- a/src/libnm-systemd-shared/src/basic/ratelimit.h +++ b/src/libnm-systemd-shared/src/basic/ratelimit.h @@ -23,3 +23,6 @@ static inline bool ratelimit_configured(RateLimit *rl) { bool ratelimit_below(RateLimit *r); unsigned ratelimit_num_dropped(RateLimit *r); + +usec_t ratelimit_end(const RateLimit *rl); +usec_t ratelimit_left(const RateLimit *rl); diff --git a/src/libnm-systemd-shared/src/basic/signal-util.c b/src/libnm-systemd-shared/src/basic/signal-util.c index f446605bfd..270d397d50 100644 --- a/src/libnm-systemd-shared/src/basic/signal-util.c +++ b/src/libnm-systemd-shared/src/basic/signal-util.c @@ -8,6 +8,7 @@ #include "errno-util.h" #include "macro.h" #include "missing_syscall.h" +#include "missing_threads.h" #include "parse-util.h" #include "signal-util.h" #include "stdio-util.h" diff --git a/src/libnm-systemd-shared/src/basic/socket-util.c b/src/libnm-systemd-shared/src/basic/socket-util.c index e7ea030008..9b411e07a2 100644 --- a/src/libnm-systemd-shared/src/basic/socket-util.c +++ b/src/libnm-systemd-shared/src/basic/socket-util.c @@ -228,7 +228,7 @@ bool socket_address_equal(const SocketAddress *a, const SocketAddress *b) { return false; if (a->sockaddr.un.sun_path[0]) { - if (!path_equal_or_files_same(a->sockaddr.un.sun_path, b->sockaddr.un.sun_path, 0)) + if (!path_equal_or_inode_same(a->sockaddr.un.sun_path, b->sockaddr.un.sun_path, 0)) return false; } else { if (a->size != b->size) @@ -1054,7 +1054,7 @@ ssize_t receive_one_fd_iov( } if (found) - *ret_fd = *(int*) CMSG_DATA(found); + *ret_fd = *CMSG_TYPED_DATA(found, int); else *ret_fd = -EBADF; @@ -1181,6 +1181,24 @@ struct cmsghdr* cmsg_find(struct msghdr *mh, int level, int type, socklen_t leng return NULL; } +void* cmsg_find_and_copy_data(struct msghdr *mh, int level, int type, void *buf, size_t buf_len) { + struct cmsghdr *cmsg; + + assert(mh); + assert(buf); + assert(buf_len > 0); + + /* This is similar to cmsg_find_data(), but copy the found data to buf. This should be typically used + * when reading possibly unaligned data such as timestamp, as time_t is 64bit and size_t is 32bit on + * RISCV32. See issue #27241. */ + + cmsg = cmsg_find(mh, level, type, CMSG_LEN(buf_len)); + if (!cmsg) + return NULL; + + return memcpy_safe(buf, CMSG_DATA(cmsg), buf_len); +} + #if 0 /* NM_IGNORED */ int socket_ioctl_fd(void) { int fd; @@ -1320,7 +1338,7 @@ ssize_t recvmsg_safe(int sockfd, struct msghdr *msg, int flags) { } #if 0 /* NM_IGNORED */ -int socket_get_family(int fd, int *ret) { +int socket_get_family(int fd) { int af; socklen_t sl = sizeof(af); @@ -1334,12 +1352,11 @@ int socket_get_family(int fd, int *ret) { } int socket_set_recvpktinfo(int fd, int af, bool b) { - int r; if (af == AF_UNSPEC) { - r = socket_get_family(fd, &af); - if (r < 0) - return r; + af = socket_get_family(fd); + if (af < 0) + return af; } switch (af) { @@ -1363,12 +1380,11 @@ int socket_set_recvpktinfo(int fd, int af, bool b) { int socket_set_unicast_if(int fd, int af, int ifi) { be32_t ifindex_be = htobe32(ifi); - int r; if (af == AF_UNSPEC) { - r = socket_get_family(fd, &af); - if (r < 0) - return r; + af = socket_get_family(fd); + if (af < 0) + return af; } switch (af) { @@ -1385,12 +1401,10 @@ int socket_set_unicast_if(int fd, int af, int ifi) { } int socket_set_option(int fd, int af, int opt_ipv4, int opt_ipv6, int val) { - int r; - if (af == AF_UNSPEC) { - r = socket_get_family(fd, &af); - if (r < 0) - return r; + af = socket_get_family(fd); + if (af < 0) + return af; } switch (af) { @@ -1410,9 +1424,9 @@ int socket_get_mtu(int fd, int af, size_t *ret) { int mtu, r; if (af == AF_UNSPEC) { - r = socket_get_family(fd, &af); - if (r < 0) - return r; + af = socket_get_family(fd); + if (af < 0) + return af; } switch (af) { @@ -1439,52 +1453,60 @@ int socket_get_mtu(int fd, int af, size_t *ret) { } #endif /* NM_IGNORED */ -int connect_unix_path(int fd, int dir_fd, const char *path) { - _cleanup_close_ int inode_fd = -EBADF; +static int connect_unix_path_simple(int fd, const char *path) { union sockaddr_union sa = { .un.sun_family = AF_UNIX, }; - size_t path_len; - socklen_t salen; + size_t l; assert(fd >= 0); - assert(dir_fd == AT_FDCWD || dir_fd >= 0); assert(path); + l = strlen(path); + assert(l > 0); + assert(l < sizeof(sa.un.sun_path)); + + memcpy(sa.un.sun_path, path, l + 1); + return RET_NERRNO(connect(fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + l + 1)); +} + +static int connect_unix_inode(int fd, int inode_fd) { + assert(fd >= 0); + assert(inode_fd >= 0); + + return connect_unix_path_simple(fd, FORMAT_PROC_FD_PATH(inode_fd)); +} + +int connect_unix_path(int fd, int dir_fd, const char *path) { + _cleanup_close_ int inode_fd = -EBADF; + + assert(fd >= 0); + assert(dir_fd == AT_FDCWD || dir_fd >= 0); + /* Connects to the specified AF_UNIX socket in the file system. Works around the 108 byte size limit * in sockaddr_un, by going via O_PATH if needed. This hence works for any kind of path. */ - path_len = strlen(path); + if (!path) + return connect_unix_inode(fd, dir_fd); /* If no path is specified, then dir_fd refers to the socket inode to connect to. */ /* Refuse zero length path early, to make sure AF_UNIX stack won't mistake this for an abstract * namespace path, since first char is NUL */ - if (path_len <= 0) + if (isempty(path)) return -EINVAL; - if (dir_fd == AT_FDCWD && path_len < sizeof(sa.un.sun_path)) { - memcpy(sa.un.sun_path, path, path_len + 1); - salen = offsetof(struct sockaddr_un, sun_path) + path_len + 1; - } else { - const char *proc; - size_t proc_len; - - /* If dir_fd is specified, then we need to go the indirect O_PATH route, because connectat() - * does not exist. If the path is too long, we also need to take the indirect route, since we - * can't fit this into a sockaddr_un directly. */ + /* Shortcut for the simple case */ + if (dir_fd == AT_FDCWD && strlen(path) < sizeof_field(struct sockaddr_un, sun_path)) + return connect_unix_path_simple(fd, path); - inode_fd = openat(dir_fd, path, O_PATH|O_CLOEXEC); - if (inode_fd < 0) - return -errno; + /* If dir_fd is specified, then we need to go the indirect O_PATH route, because connectat() does not + * exist. If the path is too long, we also need to take the indirect route, since we can't fit this + * into a sockaddr_un directly. */ - proc = FORMAT_PROC_FD_PATH(inode_fd); - proc_len = strlen(proc); - - assert(proc_len < sizeof(sa.un.sun_path)); - memcpy(sa.un.sun_path, proc, proc_len + 1); - salen = offsetof(struct sockaddr_un, sun_path) + proc_len + 1; - } + inode_fd = openat(dir_fd, path, O_PATH|O_CLOEXEC); + if (inode_fd < 0) + return -errno; - return RET_NERRNO(connect(fd, &sa.sa, salen)); + return connect_unix_inode(fd, inode_fd); } int socket_address_parse_unix(SocketAddress *ret_address, const char *s) { @@ -1515,13 +1537,20 @@ int socket_address_parse_vsock(SocketAddress *ret_address, const char *s) { _cleanup_free_ char *n = NULL; char *e, *cid_start; unsigned port, cid; - int r; + int type, r; assert(ret_address); assert(s); - cid_start = startswith(s, "vsock:"); - if (!cid_start) + if ((cid_start = startswith(s, "vsock:"))) + type = 0; + else if ((cid_start = startswith(s, "vsock-dgram:"))) + type = SOCK_DGRAM; + else if ((cid_start = startswith(s, "vsock-seqpacket:"))) + type = SOCK_SEQPACKET; + else if ((cid_start = startswith(s, "vsock-stream:"))) + type = SOCK_STREAM; + else return -EPROTO; e = strchr(cid_start, ':'); @@ -1550,6 +1579,7 @@ int socket_address_parse_vsock(SocketAddress *ret_address, const char *s) { .svm_family = AF_VSOCK, .svm_port = port, }, + .type = type, .size = sizeof(struct sockaddr_vm), }; diff --git a/src/libnm-systemd-shared/src/basic/socket-util.h b/src/libnm-systemd-shared/src/basic/socket-util.h index b21bd7e42e..26f9636fa6 100644 --- a/src/libnm-systemd-shared/src/basic/socket-util.h +++ b/src/libnm-systemd-shared/src/basic/socket-util.h @@ -177,18 +177,30 @@ int flush_accept(int fd); #define CMSG_FOREACH(cmsg, mh) \ for ((cmsg) = CMSG_FIRSTHDR(mh); (cmsg); (cmsg) = CMSG_NXTHDR((mh), (cmsg))) +/* Returns the cmsghdr's data pointer, but safely cast to the specified type. Does two alignment checks: one + * at compile time, that the requested type has a smaller or same alignment as 'struct cmsghdr', and one + * during runtime, that the actual pointer matches the alignment too. This is supposed to catch cases such as + * 'struct timeval' is embedded into 'struct cmsghdr' on architectures where the alignment of the former is 8 + * bytes (because of a 64bit time_t), but of the latter is 4 bytes (because size_t is 32bit), such as + * riscv32. */ #define CMSG_TYPED_DATA(cmsg, type) \ ({ \ - struct cmsghdr *_cmsg = cmsg; \ + struct cmsghdr *_cmsg = (cmsg); \ + assert_cc(alignof(type) <= alignof(struct cmsghdr)); \ _cmsg ? CAST_ALIGN_PTR(type, CMSG_DATA(_cmsg)) : (type*) NULL; \ }) struct cmsghdr* cmsg_find(struct msghdr *mh, int level, int type, socklen_t length); +void* cmsg_find_and_copy_data(struct msghdr *mh, int level, int type, void *buf, size_t buf_len); /* Type-safe, dereferencing version of cmsg_find() */ #define CMSG_FIND_DATA(mh, level, type, ctype) \ CMSG_TYPED_DATA(cmsg_find(mh, level, type, CMSG_LEN(sizeof(ctype))), ctype) +/* Type-safe version of cmsg_find_and_copy_data() */ +#define CMSG_FIND_AND_COPY_DATA(mh, level, type, ctype) \ + (ctype*) cmsg_find_and_copy_data(mh, level, type, &(ctype){}, sizeof(ctype)) + /* Resolves to a type that can carry cmsghdr structures. Make sure things are properly aligned, i.e. the type * itself is placed properly in memory and the size is also aligned to what's appropriate for "cmsghdr" * structures. */ @@ -308,7 +320,7 @@ struct timespec_large { ssize_t recvmsg_safe(int sockfd, struct msghdr *msg, int flags); -int socket_get_family(int fd, int *ret); +int socket_get_family(int fd); int socket_set_recvpktinfo(int fd, int af, bool b); int socket_set_unicast_if(int fd, int af, int ifi); @@ -346,3 +358,10 @@ int connect_unix_path(int fd, int dir_fd, const char *path); * protocol mismatch. */ int socket_address_parse_unix(SocketAddress *ret_address, const char *s); int socket_address_parse_vsock(SocketAddress *ret_address, const char *s); + +/* libc's SOMAXCONN is defined to 128 or 4096 (at least on glibc). But actually, the value can be much + * larger. In our codebase we want to set it to the max usually, since noawadays socket memory is properly + * tracked by memcg, and hence we don't need to enforce extra limits here. Moreover, the kernel caps it to + * /proc/sys/net/core/somaxconn anyway, thus by setting this to unbounded we just make that sysctl file + * authoritative. */ +#define SOMAXCONN_DELUXE INT_MAX diff --git a/src/libnm-systemd-shared/src/basic/stat-util.c b/src/libnm-systemd-shared/src/basic/stat-util.c index 8688ca91e6..a81ee468ff 100644 --- a/src/libnm-systemd-shared/src/basic/stat-util.c +++ b/src/libnm-systemd-shared/src/basic/stat-util.c @@ -10,7 +10,7 @@ #include <unistd.h> #include "alloc-util.h" -#include "chase-symlinks.h" +#include "chase.h" #include "dirent-util.h" #include "errno-util.h" #include "fd-util.h" @@ -150,24 +150,13 @@ int null_or_empty_path_with_root(const char *fn, const char *root) { if (path_equal_ptr(path_startswith(fn, root ?: "/"), "dev/null")) return true; - r = chase_symlinks_and_stat(fn, root, CHASE_PREFIX_ROOT, NULL, &st, NULL); + r = chase_and_stat(fn, root, CHASE_PREFIX_ROOT, NULL, &st); if (r < 0) return r; return null_or_empty(&st); } -int null_or_empty_fd(int fd) { - struct stat st; - - assert(fd >= 0); - - if (fstat(fd, &st) < 0) - return -errno; - - return null_or_empty(&st); -} - static int fd_is_read_only_fs(int fd) { struct statvfs st; @@ -200,17 +189,19 @@ int path_is_read_only_fs(const char *path) { } #endif /* NM_IGNORED */ -int files_same(const char *filea, const char *fileb, int flags) { +int inode_same_at(int fda, const char *filea, int fdb, const char *fileb, int flags) { struct stat a, b; + assert(fda >= 0 || fda == AT_FDCWD); assert(filea); + assert(fdb >= 0 || fdb == AT_FDCWD); assert(fileb); - if (fstatat(AT_FDCWD, filea, &a, flags) < 0) - return -errno; + if (fstatat(fda, filea, &a, flags) < 0) + return log_debug_errno(errno, "Cannot stat %s: %m", filea); - if (fstatat(AT_FDCWD, fileb, &b, flags) < 0) - return -errno; + if (fstatat(fdb, fileb, &b, flags) < 0) + return log_debug_errno(errno, "Cannot stat %s: %m", fileb); return stat_inode_same(&a, &b); } @@ -222,22 +213,13 @@ bool is_fs_type(const struct statfs *s, statfs_f_type_t magic_value) { return F_TYPE_EQUAL(s->f_type, magic_value); } -#if 0 /* NM_IGNORED */ -int fd_is_fs_type(int fd, statfs_f_type_t magic_value) { - struct statfs s; - - if (fstatfs(fd, &s) < 0) - return -errno; - - return is_fs_type(&s, magic_value); -} -#endif /* NM_IGNORED */ - -int path_is_fs_type(const char *path, statfs_f_type_t magic_value) { +int is_fs_type_at(int dir_fd, const char *path, statfs_f_type_t magic_value) { struct statfs s; + int r; - if (statfs(path, &s) < 0) - return -errno; + r = xstatfsat(dir_fd, path, &s); + if (r < 0) + return r; return is_fs_type(&s, magic_value); } @@ -318,6 +300,18 @@ int fd_verify_regular(int fd) { } #if 0 /* NM_IGNORED */ +int verify_regular_at(int dir_fd, const char *path, bool follow) { + struct stat st; + + assert(dir_fd >= 0 || dir_fd == AT_FDCWD); + assert(path); + + if (fstatat(dir_fd, path, &st, (isempty(path) ? AT_EMPTY_PATH : 0) | (follow ? 0 : AT_SYMLINK_NOFOLLOW)) < 0) + return -errno; + + return stat_verify_regular(&st); +} + int stat_verify_directory(const struct stat *st) { assert(st); @@ -466,7 +460,22 @@ int statx_fallback(int dfd, const char *path, int flags, unsigned mask, struct s return 0; } +#endif /* NM_IGNORED */ + +int xstatfsat(int dir_fd, const char *path, struct statfs *ret) { + _cleanup_close_ int fd = -EBADF; + + assert(dir_fd >= 0 || dir_fd == AT_FDCWD); + assert(ret); + + fd = xopenat(dir_fd, path, O_PATH|O_CLOEXEC|O_NOCTTY, /* xopen_flags = */ 0, /* mode = */ 0); + if (fd < 0) + return fd; + + return RET_NERRNO(fstatfs(fd, ret)); +} +#if 0 /* NM_IGNORED */ void inode_hash_func(const struct stat *q, struct siphash *state) { siphash24_compress(&q->st_dev, sizeof(q->st_dev), state); siphash24_compress(&q->st_ino, sizeof(q->st_ino), state); @@ -483,4 +492,27 @@ int inode_compare_func(const struct stat *a, const struct stat *b) { } DEFINE_HASH_OPS_WITH_KEY_DESTRUCTOR(inode_hash_ops, struct stat, inode_hash_func, inode_compare_func, free); + +const char* inode_type_to_string(mode_t m) { + + /* Returns a short string for the inode type. We use the same name as the underlying macros for each + * inode type. */ + + switch (m & S_IFMT) { + case S_IFREG: + return "reg"; + case S_IFDIR: + return "dir"; + case S_IFCHR: + return "chr"; + case S_IFBLK: + return "blk"; + case S_IFIFO: + return "fifo"; + case S_IFSOCK: + return "sock"; + } + + return NULL; +} #endif /* NM_IGNORED */ diff --git a/src/libnm-systemd-shared/src/basic/stat-util.h b/src/libnm-systemd-shared/src/basic/stat-util.h index de11c0cf7c..ae0aaf8f51 100644 --- a/src/libnm-systemd-shared/src/basic/stat-util.h +++ b/src/libnm-systemd-shared/src/basic/stat-util.h @@ -30,7 +30,6 @@ static inline int dir_is_empty(const char *path, bool ignore_hidden_or_backup) { bool null_or_empty(struct stat *st) _pure_; int null_or_empty_path_with_root(const char *fn, const char *root); -int null_or_empty_fd(int fd); static inline int null_or_empty_path(const char *fn) { return null_or_empty_path_with_root(fn, NULL); @@ -38,15 +37,24 @@ static inline int null_or_empty_path(const char *fn) { int path_is_read_only_fs(const char *path); -int files_same(const char *filea, const char *fileb, int flags); +int inode_same_at(int fda, const char *filea, int fdb, const char *fileb, int flags); + +static inline int inode_same(const char *filea, const char *fileb, int flags) { + return inode_same_at(AT_FDCWD, filea, AT_FDCWD, fileb, flags); +} /* The .f_type field of struct statfs is really weird defined on * different archs. Let's give its type a name. */ typedef typeof(((struct statfs*)NULL)->f_type) statfs_f_type_t; bool is_fs_type(const struct statfs *s, statfs_f_type_t magic_value) _pure_; -int fd_is_fs_type(int fd, statfs_f_type_t magic_value); -int path_is_fs_type(const char *path, statfs_f_type_t magic_value); +int is_fs_type_at(int dir_fd, const char *path, statfs_f_type_t magic_value); +static inline int fd_is_fs_type(int fd, statfs_f_type_t magic_value) { + return is_fs_type_at(fd, NULL, magic_value); +} +static inline int path_is_fs_type(const char *path, statfs_f_type_t magic_value) { + return is_fs_type_at(AT_FDCWD, path, magic_value); +} bool is_temporary_fs(const struct statfs *s) _pure_; bool is_network_fs(const struct statfs *s) _pure_; @@ -65,6 +73,7 @@ int path_is_network_fs(const char *path); int stat_verify_regular(const struct stat *st); int fd_verify_regular(int fd); +int verify_regular_at(int dir_fd, const char *path, bool follow); int stat_verify_directory(const struct stat *st); int fd_verify_directory(int fd); @@ -79,6 +88,8 @@ bool statx_mount_same(const struct new_statx *a, const struct new_statx *b); int statx_fallback(int dfd, const char *path, int flags, unsigned mask, struct statx *sx); +int xstatfsat(int dir_fd, const char *path, struct statfs *ret); + #if HAS_FEATURE_MEMORY_SANITIZER # warning "Explicitly initializing struct statx, to work around msan limitation. Please remove as soon as msan has been updated to not require this." # define STRUCT_STATX_DEFINE(var) \ @@ -101,3 +112,5 @@ int statx_fallback(int dfd, const char *path, int flags, unsigned mask, struct s void inode_hash_func(const struct stat *q, struct siphash *state); int inode_compare_func(const struct stat *a, const struct stat *b); extern const struct hash_ops inode_hash_ops; + +const char* inode_type_to_string(mode_t m); diff --git a/src/libnm-systemd-shared/src/basic/string-table.h b/src/libnm-systemd-shared/src/basic/string-table.h index e3a26a623c..3be70dfade 100644 --- a/src/libnm-systemd-shared/src/basic/string-table.h +++ b/src/libnm-systemd-shared/src/basic/string-table.h @@ -95,6 +95,7 @@ ssize_t string_table_lookup(const char * const *table, size_t len, const char *k #define DEFINE_STRING_TABLE_LOOKUP_WITH_FALLBACK(name,type,max) \ _DEFINE_STRING_TABLE_LOOKUP_TO_STRING_FALLBACK(name,type,max,) \ _DEFINE_STRING_TABLE_LOOKUP_FROM_STRING_FALLBACK(name,type,max,) +#define DEFINE_STRING_TABLE_LOOKUP_FROM_STRING_WITH_FALLBACK(name,type,max) _DEFINE_STRING_TABLE_LOOKUP_FROM_STRING_FALLBACK(name,type,max,) #define DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING_FALLBACK(name,type,max) \ _DEFINE_STRING_TABLE_LOOKUP_TO_STRING_FALLBACK(name,type,max,static) diff --git a/src/libnm-systemd-shared/src/basic/string-util.c b/src/libnm-systemd-shared/src/basic/string-util.c index 8bf548d81b..1afa49bba0 100644 --- a/src/libnm-systemd-shared/src/basic/string-util.c +++ b/src/libnm-systemd-shared/src/basic/string-util.c @@ -11,11 +11,13 @@ #include "alloc-util.h" #include "escape.h" #include "extract-word.h" +#include "fd-util.h" #include "fileio.h" #include "gunicode.h" #include "locale-util.h" #include "macro.h" #include "memory-util.h" +#include "memstream-util.h" #include "string-util.h" #include "strv.h" #include "terminal-util.h" @@ -610,8 +612,8 @@ char *strip_tab_ansi(char **ibuf, size_t *_isz, size_t highlight[2]) { STATE_CSI, STATE_CSO, } state = STATE_OTHER; - char *obuf = NULL; - size_t osz = 0, isz, shift[2] = {}, n_carriage_returns = 0; + _cleanup_(memstream_done) MemStream m = {}; + size_t isz, shift[2] = {}, n_carriage_returns = 0; FILE *f; assert(ibuf); @@ -635,7 +637,7 @@ char *strip_tab_ansi(char **ibuf, size_t *_isz, size_t highlight[2]) { /* Note we turn off internal locking on f for performance reasons. It's safe to do so since we * created f here and it doesn't leave our scope. */ - f = open_memstream_unlocked(&obuf, &osz); + f = memstream_init(&m); if (!f) return NULL; @@ -720,17 +722,12 @@ char *strip_tab_ansi(char **ibuf, size_t *_isz, size_t highlight[2]) { } } - if (fflush_and_check(f) < 0) { - fclose(f); - return mfree(obuf); - } - fclose(f); + char *obuf; + if (memstream_finalize(&m, &obuf, _isz) < 0) + return NULL; free_and_replace(*ibuf, obuf); - if (_isz) - *_isz = osz; - if (highlight) { highlight[0] += shift[0]; highlight[1] += shift[1]; @@ -963,8 +960,7 @@ int free_and_strdup(char **p, const char *s) { } else t = NULL; - free(*p); - *p = t; + free_and_replace(*p, t); return 1; } @@ -1269,4 +1265,39 @@ char *strdupcspn(const char *a, const char *reject) { return strndup(a, strcspn(a, reject)); } + +char *find_line_startswith(const char *haystack, const char *needle) { + char *p; + + assert(haystack); + assert(needle); + + /* Finds the first line in 'haystack' that starts with the specified string. Returns a pointer to the + * first character after it */ + + p = strstr(haystack, needle); + if (!p) + return NULL; + + if (p > haystack) + while (p[-1] != '\n') { + p = strstr(p + 1, needle); + if (!p) + return NULL; + } + + return p + strlen(needle); +} #endif /* NM_IGNORED */ + +char *startswith_strv(const char *string, char **strv) { + char *found = NULL; + + STRV_FOREACH(i, strv) { + found = startswith(string, *i); + if (found) + break; + } + + return found; +} diff --git a/src/libnm-systemd-shared/src/basic/string-util.h b/src/libnm-systemd-shared/src/basic/string-util.h index e0a47a21a9..4430910e22 100644 --- a/src/libnm-systemd-shared/src/basic/string-util.h +++ b/src/libnm-systemd-shared/src/basic/string-util.h @@ -29,6 +29,18 @@ static inline char* strstr_ptr(const char *haystack, const char *needle) { return strstr(haystack, needle); } +static inline char *strstrafter(const char *haystack, const char *needle) { + char *p; + + /* Returns NULL if not found, or pointer to first character after needle if found */ + + p = strstr_ptr(haystack, needle); + if (!p) + return NULL; + + return p + strlen(needle); +} + static inline const char* strnull(const char *s) { return s ?: "(null)"; } @@ -253,3 +265,10 @@ size_t strspn_from_end(const char *str, const char *accept); char *strdupspn(const char *a, const char *accept); char *strdupcspn(const char *a, const char *reject); + +char *find_line_startswith(const char *haystack, const char *needle); + +char *startswith_strv(const char *string, char **strv); + +#define STARTSWITH_SET(p, ...) \ + startswith_strv(p, STRV_MAKE(__VA_ARGS__)) diff --git a/src/libnm-systemd-shared/src/basic/strv.c b/src/libnm-systemd-shared/src/basic/strv.c index e2c4c205d3..9ad5330739 100644 --- a/src/libnm-systemd-shared/src/basic/strv.c +++ b/src/libnm-systemd-shared/src/basic/strv.c @@ -9,6 +9,7 @@ #include <stdlib.h> #include "alloc-util.h" +#include "env-util.h" #include "escape.h" #include "extract-word.h" #include "fileio.h" @@ -65,6 +66,16 @@ char* strv_find_startswith(char * const *l, const char *name) { return NULL; } +char* strv_find_first_field(char * const *needles, char * const *haystack) { + STRV_FOREACH(k, needles) { + char *value = strv_env_pairs_get((char **)haystack, *k); + if (value) + return value; + } + + return NULL; +} + char** strv_free(char **l) { STRV_FOREACH(k, l) free(*k); @@ -79,20 +90,26 @@ char** strv_free_erase(char **l) { return mfree(l); } -char** strv_copy(char * const *l) { +char** strv_copy_n(char * const *l, size_t m) { _cleanup_strv_free_ char **result = NULL; char **k; - result = new(char*, strv_length(l) + 1); + result = new(char*, MIN(strv_length(l), m) + 1); if (!result) return NULL; k = result; STRV_FOREACH(i, l) { + if (m == 0) + break; + *k = strdup(*i); if (!*k) return NULL; k++; + + if (m != SIZE_MAX) + m--; } *k = NULL; @@ -670,9 +687,9 @@ int strv_compare(char * const *a, char * const *b) { return 0; } -void strv_print(char * const *l) { +void strv_print_full(char * const *l, const char *prefix) { STRV_FOREACH(s, l) - puts(*s); + printf("%s%s\n", strempty(prefix), *s); } int strv_extendf(char ***l, const char *format, ...) { @@ -715,8 +732,7 @@ char** strv_shell_escape(char **l, const char *bad) { if (!v) return NULL; - free(*s); - *s = v; + free_and_replace(*s, v); } return l; diff --git a/src/libnm-systemd-shared/src/basic/strv.h b/src/libnm-systemd-shared/src/basic/strv.h index 1f8da85fcc..544d46a3f8 100644 --- a/src/libnm-systemd-shared/src/basic/strv.h +++ b/src/libnm-systemd-shared/src/basic/strv.h @@ -17,6 +17,9 @@ char* strv_find(char * const *l, const char *name) _pure_; char* strv_find_case(char * const *l, const char *name) _pure_; char* strv_find_prefix(char * const *l, const char *name) _pure_; char* strv_find_startswith(char * const *l, const char *name) _pure_; +/* Given two vectors, the first a list of keys and the second a list of key-value pairs, returns the value + * of the first key from the first vector that is found in the second vector. */ +char* strv_find_first_field(char * const *needles, char * const *haystack) _pure_; #define strv_contains(l, s) (!!strv_find((l), (s))) #define strv_contains_case(l, s) (!!strv_find_case((l), (s))) @@ -29,7 +32,10 @@ char** strv_free_erase(char **l); DEFINE_TRIVIAL_CLEANUP_FUNC(char**, strv_free_erase); #define _cleanup_strv_free_erase_ _cleanup_(strv_free_erasep) -char** strv_copy(char * const *l); +char** strv_copy_n(char * const *l, size_t n); +static inline char** strv_copy(char * const *l) { + return strv_copy_n(l, SIZE_MAX); +} size_t strv_length(char * const *l) _pure_; int strv_extend_strv(char ***a, char * const *b, bool filter_duplicates); @@ -84,7 +90,7 @@ char** strv_new_ap(const char *x, va_list ap); #define STRV_IGNORE ((const char *) POINTER_MAX) static inline const char* STRV_IFNOTNULL(const char *x) { - return x ? x : STRV_IGNORE; + return x ?: STRV_IGNORE; } static inline bool strv_isempty(char * const *l) { @@ -146,7 +152,10 @@ bool strv_overlap(char * const *a, char * const *b) _pure_; _STRV_FOREACH_PAIR(x, y, l, UNIQ_T(i, UNIQ)) char** strv_sort(char **l); -void strv_print(char * const *l); +void strv_print_full(char * const *l, const char *prefix); +static inline void strv_print(char * const *l) { + strv_print_full(l, NULL); +} #define strv_from_stdarg_alloca(first) \ ({ \ @@ -191,18 +200,6 @@ void strv_print(char * const *l); _x && strv_contains_case(STRV_MAKE(__VA_ARGS__), _x); \ }) -#define STARTSWITH_SET(p, ...) \ - ({ \ - const char *_p = (p); \ - char *_found = NULL; \ - STRV_FOREACH(_i, STRV_MAKE(__VA_ARGS__)) { \ - _found = startswith(_p, *_i); \ - if (_found) \ - break; \ - } \ - _found; \ - }) - #define ENDSWITH_SET(p, ...) \ ({ \ const char *_p = (p); \ diff --git a/src/libnm-systemd-shared/src/basic/time-util.c b/src/libnm-systemd-shared/src/basic/time-util.c index f3f32be05a..092912b2b0 100644 --- a/src/libnm-systemd-shared/src/basic/time-util.c +++ b/src/libnm-systemd-shared/src/basic/time-util.c @@ -19,6 +19,7 @@ #include "io-util.h" #include "log.h" #include "macro.h" +#include "missing_threads.h" #include "missing_timerfd.h" #include "parse-util.h" #include "path-util.h" @@ -173,6 +174,8 @@ dual_timestamp* dual_timestamp_from_monotonic(dual_timestamp *ts, usec_t u) { dual_timestamp* dual_timestamp_from_boottime(dual_timestamp *ts, usec_t u) { usec_t nowm; + assert(ts); + if (u == USEC_INFINITY) { ts->realtime = ts->monotonic = USEC_INFINITY; return ts; @@ -185,6 +188,7 @@ dual_timestamp* dual_timestamp_from_boottime(dual_timestamp *ts, usec_t u) { } usec_t triple_timestamp_by_clock(triple_timestamp *ts, clockid_t clock) { + assert(ts); switch (clock) { @@ -230,7 +234,7 @@ nsec_t timespec_load_nsec(const struct timespec *ts) { return (nsec_t) ts->tv_sec * NSEC_PER_SEC + (nsec_t) ts->tv_nsec; } -struct timespec *timespec_store(struct timespec *ts, usec_t u) { +struct timespec *timespec_store(struct timespec *ts, usec_t u) { assert(ts); if (u == USEC_INFINITY || @@ -246,7 +250,7 @@ struct timespec *timespec_store(struct timespec *ts, usec_t u) { return ts; } -struct timespec *timespec_store_nsec(struct timespec *ts, nsec_t n) { +struct timespec *timespec_store_nsec(struct timespec *ts, nsec_t n) { assert(ts); if (n == NSEC_INFINITY || @@ -421,27 +425,29 @@ char *format_timestamp_style( return buf; } -char *format_timestamp_relative(char *buf, size_t l, usec_t t) { +char* format_timestamp_relative_full(char *buf, size_t l, usec_t t, clockid_t clock, bool implicit_left) { const char *s; usec_t n, d; + assert(buf); + if (!timestamp_is_set(t)) return NULL; - n = now(CLOCK_REALTIME); + n = now(clock); if (n > t) { d = n - t; - s = "ago"; + s = " ago"; } else { d = t - n; - s = "left"; + s = implicit_left ? "" : " left"; } if (d >= USEC_PER_YEAR) { usec_t years = d / USEC_PER_YEAR; usec_t months = (d % USEC_PER_YEAR) / USEC_PER_MONTH; - (void) snprintf(buf, l, USEC_FMT " %s " USEC_FMT " %s %s", + (void) snprintf(buf, l, USEC_FMT " %s " USEC_FMT " %s%s", years, years == 1 ? "year" : "years", months, @@ -451,7 +457,7 @@ char *format_timestamp_relative(char *buf, size_t l, usec_t t) { usec_t months = d / USEC_PER_MONTH; usec_t days = (d % USEC_PER_MONTH) / USEC_PER_DAY; - (void) snprintf(buf, l, USEC_FMT " %s " USEC_FMT " %s %s", + (void) snprintf(buf, l, USEC_FMT " %s " USEC_FMT " %s%s", months, months == 1 ? "month" : "months", days, @@ -461,39 +467,39 @@ char *format_timestamp_relative(char *buf, size_t l, usec_t t) { usec_t weeks = d / USEC_PER_WEEK; usec_t days = (d % USEC_PER_WEEK) / USEC_PER_DAY; - (void) snprintf(buf, l, USEC_FMT " %s " USEC_FMT " %s %s", + (void) snprintf(buf, l, USEC_FMT " %s " USEC_FMT " %s%s", weeks, weeks == 1 ? "week" : "weeks", days, days == 1 ? "day" : "days", s); } else if (d >= 2*USEC_PER_DAY) - (void) snprintf(buf, l, USEC_FMT " days %s", d / USEC_PER_DAY, s); + (void) snprintf(buf, l, USEC_FMT " days%s", d / USEC_PER_DAY,s); else if (d >= 25*USEC_PER_HOUR) - (void) snprintf(buf, l, "1 day " USEC_FMT "h %s", + (void) snprintf(buf, l, "1 day " USEC_FMT "h%s", (d - USEC_PER_DAY) / USEC_PER_HOUR, s); else if (d >= 6*USEC_PER_HOUR) - (void) snprintf(buf, l, USEC_FMT "h %s", + (void) snprintf(buf, l, USEC_FMT "h%s", d / USEC_PER_HOUR, s); else if (d >= USEC_PER_HOUR) - (void) snprintf(buf, l, USEC_FMT "h " USEC_FMT "min %s", + (void) snprintf(buf, l, USEC_FMT "h " USEC_FMT "min%s", d / USEC_PER_HOUR, (d % USEC_PER_HOUR) / USEC_PER_MINUTE, s); else if (d >= 5*USEC_PER_MINUTE) - (void) snprintf(buf, l, USEC_FMT "min %s", + (void) snprintf(buf, l, USEC_FMT "min%s", d / USEC_PER_MINUTE, s); else if (d >= USEC_PER_MINUTE) - (void) snprintf(buf, l, USEC_FMT "min " USEC_FMT "s %s", + (void) snprintf(buf, l, USEC_FMT "min " USEC_FMT "s%s", d / USEC_PER_MINUTE, (d % USEC_PER_MINUTE) / USEC_PER_SEC, s); else if (d >= USEC_PER_SEC) - (void) snprintf(buf, l, USEC_FMT "s %s", + (void) snprintf(buf, l, USEC_FMT "s%s", d / USEC_PER_SEC, s); else if (d >= USEC_PER_MSEC) - (void) snprintf(buf, l, USEC_FMT "ms %s", + (void) snprintf(buf, l, USEC_FMT "ms%s", d / USEC_PER_MSEC, s); else if (d > 0) - (void) snprintf(buf, l, USEC_FMT"us %s", + (void) snprintf(buf, l, USEC_FMT"us%s", d, s); else (void) snprintf(buf, l, "now"); @@ -503,7 +509,7 @@ char *format_timestamp_relative(char *buf, size_t l, usec_t t) { } #endif /* NM_IGNORED */ -char *format_timespan(char *buf, size_t l, usec_t t, usec_t accuracy) { +char* format_timespan(char *buf, size_t l, usec_t t, usec_t accuracy) { static const struct { const char *suffix; usec_t usec; @@ -610,7 +616,14 @@ char *format_timespan(char *buf, size_t l, usec_t t, usec_t accuracy) { } #if 0 /* NM_IGNORED */ -static int parse_timestamp_impl(const char *t, usec_t *usec, bool with_tz) { +static int parse_timestamp_impl( + const char *t, + size_t tz_offset, + bool utc, + int isdst, + long gmtoff, + usec_t *ret) { + static const struct { const char *name; const int nr; @@ -631,12 +644,14 @@ static int parse_timestamp_impl(const char *t, usec_t *usec, bool with_tz) { { "Sat", 6 }, }; - const char *k, *utc = NULL, *tzn = NULL; + _cleanup_free_ char *t_alloc = NULL; + usec_t usec, plus = 0, minus = 0; + bool with_tz = false; + int r, weekday = -1; + unsigned fractional = 0; + const char *k; struct tm tm, copy; - time_t x; - usec_t x_usec, plus = 0, minus = 0, ret; - int r, weekday = -1, dst = -1; - size_t i; + time_t sec; /* Allowed syntaxes: * @@ -652,103 +667,96 @@ static int parse_timestamp_impl(const char *t, usec_t *usec, bool with_tz) { * +5min * -5days * @2147483647 (seconds since epoch) + * + * Note, on DST change, 00:00:00 may not exist and in that case the time part may be shifted. + * E.g. "Sun 2023-03-13 America/Havana" is parsed as "Sun 2023-03-13 01:00:00 CDT". */ assert(t); + if (tz_offset != SIZE_MAX) { + /* If the input string contains timezone, then cut it here. */ + + if (tz_offset <= 1) /* timezone must be after a space. */ + return -EINVAL; + + t_alloc = strndup(t, tz_offset - 1); + if (!t_alloc) + return -ENOMEM; + + t = t_alloc; + with_tz = true; + } + + if (utc) { + /* glibc accepts gmtoff more than 24 hours, but we refuse it. */ + if ((usec_t) labs(gmtoff) * USEC_PER_SEC > USEC_PER_DAY) + return -EINVAL; + } else { + if (gmtoff != 0) + return -EINVAL; + } + if (t[0] == '@' && !with_tz) - return parse_sec(t + 1, usec); + return parse_sec(t + 1, ret); - ret = now(CLOCK_REALTIME); + usec = now(CLOCK_REALTIME); if (!with_tz) { if (streq(t, "now")) goto finish; - else if (t[0] == '+') { + if (t[0] == '+') { r = parse_sec(t+1, &plus); if (r < 0) return r; goto finish; + } - } else if (t[0] == '-') { + if (t[0] == '-') { r = parse_sec(t+1, &minus); if (r < 0) return r; goto finish; + } + + if ((k = endswith(t, " ago"))) { + _cleanup_free_ char *buf = NULL; - } else if ((k = endswith(t, " ago"))) { - t = strndupa_safe(t, k - t); + buf = strndup(t, k - t); + if (!buf) + return -ENOMEM; - r = parse_sec(t, &minus); + r = parse_sec(buf, &minus); if (r < 0) return r; goto finish; + } - } else if ((k = endswith(t, " left"))) { - t = strndupa_safe(t, k - t); + if ((k = endswith(t, " left"))) { + _cleanup_free_ char *buf = NULL; - r = parse_sec(t, &plus); + buf = strndup(t, k - t); + if (!buf) + return -ENOMEM; + + r = parse_sec(buf, &plus); if (r < 0) return r; goto finish; } - - /* See if the timestamp is suffixed with UTC */ - utc = endswith_no_case(t, " UTC"); - if (utc) - t = strndupa_safe(t, utc - t); - else { - const char *e = NULL; - int j; - - tzset(); - - /* See if the timestamp is suffixed by either the DST or non-DST local timezone. Note - * that we only support the local timezones here, nothing else. Not because we - * wouldn't want to, but simply because there are no nice APIs available to cover - * this. By accepting the local time zone strings, we make sure that all timestamps - * written by format_timestamp() can be parsed correctly, even though we don't - * support arbitrary timezone specifications. */ - - for (j = 0; j <= 1; j++) { - - if (isempty(tzname[j])) - continue; - - e = endswith_no_case(t, tzname[j]); - if (!e) - continue; - if (e == t) - continue; - if (e[-1] != ' ') - continue; - - break; - } - - if (IN_SET(j, 0, 1)) { - /* Found one of the two timezones specified. */ - t = strndupa_safe(t, e - t - 1); - dst = j; - tzn = tzname[j]; - } - } } - x = (time_t) (ret / USEC_PER_SEC); - x_usec = 0; + sec = (time_t) (usec / USEC_PER_SEC); - if (!localtime_or_gmtime_r(&x, &tm, utc)) + if (!localtime_or_gmtime_r(&sec, &tm, utc)) return -EINVAL; - tm.tm_isdst = dst; - if (!with_tz && tzn) - tm.tm_zone = tzn; + tm.tm_isdst = isdst; if (streq(t, "today")) { tm.tm_sec = tm.tm_min = tm.tm_hour = 0; @@ -765,18 +773,13 @@ static int parse_timestamp_impl(const char *t, usec_t *usec, bool with_tz) { goto from_tm; } - for (i = 0; i < ELEMENTSOF(day_nr); i++) { - size_t skip; - - if (!startswith_no_case(t, day_nr[i].name)) - continue; - - skip = strlen(day_nr[i].name); - if (t[skip] != ' ') + for (size_t i = 0; i < ELEMENTSOF(day_nr); i++) { + k = startswith_no_case(t, day_nr[i].name); + if (!k || *k != ' ') continue; weekday = day_nr[i].nr; - t += skip + 1; + t = k + 1; break; } @@ -855,65 +858,130 @@ static int parse_timestamp_impl(const char *t, usec_t *usec, bool with_tz) { return -EINVAL; parse_usec: - { - unsigned add; - - k++; - r = parse_fractional_part_u(&k, 6, &add); - if (r < 0) - return -EINVAL; - - if (*k) - return -EINVAL; - - x_usec = add; - } + k++; + r = parse_fractional_part_u(&k, 6, &fractional); + if (r < 0) + return -EINVAL; + if (*k != '\0') + return -EINVAL; from_tm: + assert(plus == 0); + assert(minus == 0); + if (weekday >= 0 && tm.tm_wday != weekday) return -EINVAL; - x = mktime_or_timegm(&tm, utc); - if (x < 0) - return -EINVAL; + if (gmtoff < 0) { + plus = -gmtoff * USEC_PER_SEC; + + /* If gmtoff is negative, the string may be too old to be parsed as UTC. + * E.g. 1969-12-31 23:00:00 -06 == 1970-01-01 05:00:00 UTC + * We assumed that gmtoff is in the range of -24:00…+24:00, hence the only date we need to + * handle here is 1969-12-31. So, let's shift the date with one day, then subtract the shift + * later. */ + if (tm.tm_year == 69 && tm.tm_mon == 11 && tm.tm_mday == 31) { + /* Thu 1970-01-01-00:00:00 */ + tm.tm_year = 70; + tm.tm_mon = 0; + tm.tm_mday = 1; + tm.tm_wday = 4; + tm.tm_yday = 0; + minus = USEC_PER_DAY; + } + } else + minus = gmtoff * USEC_PER_SEC; - ret = (usec_t) x * USEC_PER_SEC + x_usec; - if (ret > USEC_TIMESTAMP_FORMATTABLE_MAX) + sec = mktime_or_timegm(&tm, utc); + if (sec < 0) return -EINVAL; + usec = usec_add(sec * USEC_PER_SEC, fractional); + finish: - if (ret + plus < ret) /* overflow? */ - return -EINVAL; - ret += plus; - if (ret > USEC_TIMESTAMP_FORMATTABLE_MAX) + usec = usec_add(usec, plus); + + if (usec < minus) return -EINVAL; - if (ret >= minus) - ret -= minus; - else + usec = usec_sub_unsigned(usec, minus); + + if (usec > USEC_TIMESTAMP_FORMATTABLE_MAX) return -EINVAL; - if (usec) - *usec = ret; + if (ret) + *ret = usec; return 0; } +static int parse_timestamp_maybe_with_tz(const char *t, size_t tz_offset, bool valid_tz, usec_t *ret) { + assert(t); + + tzset(); + + for (int j = 0; j <= 1; j++) { + if (isempty(tzname[j])) + continue; + + if (!streq(t + tz_offset, tzname[j])) + continue; + + /* The specified timezone matches tzname[] of the local timezone. */ + return parse_timestamp_impl(t, tz_offset, /* utc = */ false, /* isdst = */ j, /* gmtoff = */ 0, ret); + } + + /* If we know that the last word is a valid timezone (e.g. Asia/Tokyo), then simply drop the timezone + * and parse the remaining string as a local time. If we know that the last word is not a timezone, + * then assume that it is a part of the time and try to parse the whole string as a local time. */ + return parse_timestamp_impl(t, valid_tz ? tz_offset : SIZE_MAX, + /* utc = */ false, /* isdst = */ -1, /* gmtoff = */ 0, ret); +} + typedef struct ParseTimestampResult { usec_t usec; int return_value; } ParseTimestampResult; -int parse_timestamp(const char *t, usec_t *usec) { - char *last_space, *tz = NULL; +int parse_timestamp(const char *t, usec_t *ret) { ParseTimestampResult *shared, tmp; + const char *k, *tz, *current_tz; + size_t tz_offset; + struct tm tm; int r; - last_space = strrchr(t, ' '); - if (last_space != NULL && timezone_is_valid(last_space + 1, LOG_DEBUG)) - tz = last_space + 1; + assert(t); + + tz = strrchr(t, ' '); + if (!tz) + return parse_timestamp_impl(t, /* tz_offset = */ SIZE_MAX, /* utc = */ false, /* isdst = */ -1, /* gmtoff = */ 0, ret); - if (!tz || endswith_no_case(t, " UTC")) - return parse_timestamp_impl(t, usec, false); + tz++; + tz_offset = tz - t; + + /* Shortcut, parse the string as UTC. */ + if (streq(tz, "UTC")) + return parse_timestamp_impl(t, tz_offset, /* utc = */ true, /* isdst = */ -1, /* gmtoff = */ 0, ret); + + /* If the timezone is compatible with RFC-822/ISO 8601 (e.g. +06, or -03:00) then parse the string as + * UTC and shift the result. Note, this must be earlier than the timezone check with tzname[], as + * tzname[] may be in the same format. */ + k = strptime(tz, "%z", &tm); + if (k && *k == '\0') + return parse_timestamp_impl(t, tz_offset, /* utc = */ true, /* isdst = */ -1, /* gmtoff = */ tm.tm_gmtoff, ret); + + /* If the last word is not a timezone file (e.g. Asia/Tokyo), then let's check if it matches + * tzname[] of the local timezone, e.g. JST or CEST. */ + if (!timezone_is_valid(tz, LOG_DEBUG)) + return parse_timestamp_maybe_with_tz(t, tz_offset, /* valid_tz = */ false, ret); + + /* Shortcut. If the current $TZ is equivalent to the specified timezone, it is not necessary to fork + * the process. */ + current_tz = getenv("TZ"); + if (current_tz && *current_tz == ':' && streq(current_tz + 1, tz)) + return parse_timestamp_maybe_with_tz(t, tz_offset, /* valid_tz = */ true, ret); + + /* Otherwise, to avoid polluting the current environment variables, let's fork the process and set + * the specified timezone in the child process. */ shared = mmap(NULL, sizeof *shared, PROT_READ|PROT_WRITE, MAP_SHARED|MAP_ANONYMOUS, -1, 0); if (shared == MAP_FAILED) @@ -925,8 +993,7 @@ int parse_timestamp(const char *t, usec_t *usec) { return r; } if (r == 0) { - bool with_tz = true; - char *colon_tz; + const char *colon_tz; /* tzset(3) says $TZ should be prefixed with ":" if we reference timezone files */ colon_tz = strjoina(":", tz); @@ -936,17 +1003,7 @@ int parse_timestamp(const char *t, usec_t *usec) { _exit(EXIT_FAILURE); } - tzset(); - - /* If there is a timezone that matches the tzname fields, leave the parsing to the implementation. - * Otherwise just cut it off. */ - with_tz = !STR_IN_SET(tz, tzname[0], tzname[1]); - - /* Cut off the timezone if we don't need it. */ - if (with_tz) - t = strndupa_safe(t, last_space - t); - - shared->return_value = parse_timestamp_impl(t, &shared->usec, with_tz); + shared->return_value = parse_timestamp_maybe_with_tz(t, tz_offset, /* valid_tz = */ true, &shared->usec); _exit(EXIT_SUCCESS); } @@ -955,13 +1012,13 @@ int parse_timestamp(const char *t, usec_t *usec) { if (munmap(shared, sizeof *shared) != 0) return negative_errno(); - if (tmp.return_value == 0 && usec) - *usec = tmp.usec; + if (tmp.return_value == 0 && ret) + *ret = tmp.usec; return tmp.return_value; } -static const char* extract_multiplier(const char *p, usec_t *multiplier) { +static const char* extract_multiplier(const char *p, usec_t *ret) { static const struct { const char *suffix; usec_t usec; @@ -997,12 +1054,15 @@ static const char* extract_multiplier(const char *p, usec_t *multiplier) { { "µs", 1ULL }, }; + assert(p); + assert(ret); + for (size_t i = 0; i < ELEMENTSOF(table); i++) { char *e; e = startswith(p, table[i].suffix); if (e) { - *multiplier = table[i].usec; + *ret = table[i].usec; return e; } } @@ -1010,9 +1070,9 @@ static const char* extract_multiplier(const char *p, usec_t *multiplier) { return p; } -int parse_time(const char *t, usec_t *usec, usec_t default_unit) { +int parse_time(const char *t, usec_t *ret, usec_t default_unit) { const char *p, *s; - usec_t r = 0; + usec_t usec = 0; bool something = false; assert(t); @@ -1027,8 +1087,8 @@ int parse_time(const char *t, usec_t *usec, usec_t default_unit) { if (*s != 0) return -EINVAL; - if (usec) - *usec = USEC_INFINITY; + if (ret) + *ret = USEC_INFINITY; return 0; } @@ -1075,10 +1135,10 @@ int parse_time(const char *t, usec_t *usec, usec_t default_unit) { return -ERANGE; k = (usec_t) l * multiplier; - if (k >= USEC_INFINITY - r) + if (k >= USEC_INFINITY - usec) return -ERANGE; - r += k; + usec += k; something = true; @@ -1088,10 +1148,10 @@ int parse_time(const char *t, usec_t *usec, usec_t default_unit) { for (b = e + 1; *b >= '0' && *b <= '9'; b++, m /= 10) { k = (usec_t) (*b - '0') * m; - if (k >= USEC_INFINITY - r) + if (k >= USEC_INFINITY - usec) return -ERANGE; - r += k; + usec += k; } /* Don't allow "0.-0", "3.+1", "3. 1", "3.sec" or "3.hoge" */ @@ -1100,13 +1160,13 @@ int parse_time(const char *t, usec_t *usec, usec_t default_unit) { } } - if (usec) - *usec = r; + if (ret) + *ret = usec; return 0; } -int parse_sec(const char *t, usec_t *usec) { - return parse_time(t, usec, USEC_PER_SEC); +int parse_sec(const char *t, usec_t *ret) { + return parse_time(t, ret, USEC_PER_SEC); } int parse_sec_fix_0(const char *t, usec_t *ret) { @@ -1125,6 +1185,9 @@ int parse_sec_fix_0(const char *t, usec_t *ret) { } int parse_sec_def_infinity(const char *t, usec_t *ret) { + assert(t); + assert(ret); + t += strspn(t, WHITESPACE); if (isempty(t)) { *ret = USEC_INFINITY; @@ -1133,7 +1196,7 @@ int parse_sec_def_infinity(const char *t, usec_t *ret) { return parse_sec(t, ret); } -static const char* extract_nsec_multiplier(const char *p, nsec_t *multiplier) { +static const char* extract_nsec_multiplier(const char *p, nsec_t *ret) { static const struct { const char *suffix; nsec_t nsec; @@ -1173,12 +1236,15 @@ static const char* extract_nsec_multiplier(const char *p, nsec_t *multiplier) { }; size_t i; + assert(p); + assert(ret); + for (i = 0; i < ELEMENTSOF(table); i++) { char *e; e = startswith(p, table[i].suffix); if (e) { - *multiplier = table[i].nsec; + *ret = table[i].nsec; return e; } } @@ -1186,13 +1252,13 @@ static const char* extract_nsec_multiplier(const char *p, nsec_t *multiplier) { return p; } -int parse_nsec(const char *t, nsec_t *nsec) { +int parse_nsec(const char *t, nsec_t *ret) { const char *p, *s; - nsec_t r = 0; + nsec_t nsec = 0; bool something = false; assert(t); - assert(nsec); + assert(ret); p = t; @@ -1203,7 +1269,7 @@ int parse_nsec(const char *t, nsec_t *nsec) { if (*s != 0) return -EINVAL; - *nsec = NSEC_INFINITY; + *ret = NSEC_INFINITY; return 0; } @@ -1250,10 +1316,10 @@ int parse_nsec(const char *t, nsec_t *nsec) { return -ERANGE; k = (nsec_t) l * multiplier; - if (k >= NSEC_INFINITY - r) + if (k >= NSEC_INFINITY - nsec) return -ERANGE; - r += k; + nsec += k; something = true; @@ -1263,10 +1329,10 @@ int parse_nsec(const char *t, nsec_t *nsec) { for (b = e + 1; *b >= '0' && *b <= '9'; b++, m /= 10) { k = (nsec_t) (*b - '0') * m; - if (k >= NSEC_INFINITY - r) + if (k >= NSEC_INFINITY - nsec) return -ERANGE; - r += k; + nsec += k; } /* Don't allow "0.-0", "3.+1", "3. 1", "3.sec" or "3.hoge" */ @@ -1275,7 +1341,7 @@ int parse_nsec(const char *t, nsec_t *nsec) { } } - *nsec = r; + *ret = nsec; return 0; } @@ -1326,6 +1392,8 @@ static int get_timezones_from_tzdata_zi(char ***ret) { _cleanup_strv_free_ char **zones = NULL; int r; + assert(ret); + f = fopen("/usr/share/zoneinfo/tzdata.zi", "re"); if (!f) return -errno; @@ -1485,6 +1553,8 @@ int get_timezone(char **ret) { char *z; int r; + assert(ret); + r = readlink_malloc("/etc/localtime", &t); if (r == -ENOENT) { /* If the symlink does not exist, assume "UTC", like glibc does */ @@ -1514,10 +1584,15 @@ int get_timezone(char **ret) { } time_t mktime_or_timegm(struct tm *tm, bool utc) { + assert(tm); + return utc ? timegm(tm) : mktime(tm); } struct tm *localtime_or_gmtime_r(const time_t *t, struct tm *tm, bool utc) { + assert(t); + assert(tm); + return utc ? gmtime_r(t, tm) : localtime_r(t, tm); } @@ -1617,14 +1692,14 @@ int time_change_fd(void) { static const char* const timestamp_style_table[_TIMESTAMP_STYLE_MAX] = { [TIMESTAMP_PRETTY] = "pretty", - [TIMESTAMP_US] = "us", - [TIMESTAMP_UTC] = "utc", + [TIMESTAMP_US] = "us", + [TIMESTAMP_UTC] = "utc", [TIMESTAMP_US_UTC] = "us+utc", - [TIMESTAMP_UNIX] = "unix", + [TIMESTAMP_UNIX] = "unix", }; /* Use the macro for enum → string to allow for aliases */ -_DEFINE_STRING_TABLE_LOOKUP_TO_STRING(timestamp_style, TimestampStyle,); +DEFINE_STRING_TABLE_LOOKUP_TO_STRING(timestamp_style, TimestampStyle); /* For the string → enum mapping we use the generic implementation, but also support two aliases */ TimestampStyle timestamp_style_from_string(const char *s) { diff --git a/src/libnm-systemd-shared/src/basic/time-util.h b/src/libnm-systemd-shared/src/basic/time-util.h index c5ae0c98d4..b49137d5c3 100644 --- a/src/libnm-systemd-shared/src/basic/time-util.h +++ b/src/libnm-systemd-shared/src/basic/time-util.h @@ -66,7 +66,6 @@ typedef enum TimestampStyle { /* We assume a maximum timezone length of 6. TZNAME_MAX is not defined on Linux, but glibc internally initializes this * to 6. Let's rely on that. */ #define FORMAT_TIMESTAMP_MAX (3U+1U+10U+1U+8U+1U+6U+1U+6U+1U) -#define FORMAT_TIMESTAMP_WIDTH 28U /* when outputting, assume this width */ #define FORMAT_TIMESTAMP_RELATIVE_MAX 256U #define FORMAT_TIMESPAN_MAX 64U @@ -124,10 +123,19 @@ struct timeval* timeval_store(struct timeval *tv, usec_t u); #define TIMEVAL_STORE(u) timeval_store(&(struct timeval) {}, (u)) char* format_timestamp_style(char *buf, size_t l, usec_t t, TimestampStyle style) _warn_unused_result_; -char* format_timestamp_relative(char *buf, size_t l, usec_t t) _warn_unused_result_; +char* format_timestamp_relative_full(char *buf, size_t l, usec_t t, clockid_t clock, bool implicit_left) _warn_unused_result_; char* format_timespan(char *buf, size_t l, usec_t t, usec_t accuracy) _warn_unused_result_; _warn_unused_result_ +static inline char* format_timestamp_relative(char *buf, size_t l, usec_t t) { + return format_timestamp_relative_full(buf, l, t, CLOCK_REALTIME, /* implicit_left = */ false); +} +_warn_unused_result_ +static inline char* format_timestamp_relative_monotonic(char *buf, size_t l, usec_t t) { + return format_timestamp_relative_full(buf, l, t, CLOCK_MONOTONIC, /* implicit_left = */ false); +} + +_warn_unused_result_ static inline char* format_timestamp(char *buf, size_t l, usec_t t) { return format_timestamp_style(buf, l, t, TIMESTAMP_PRETTY); } @@ -138,19 +146,21 @@ static inline char* format_timestamp(char *buf, size_t l, usec_t t) { #define FORMAT_TIMESTAMP(t) format_timestamp((char[FORMAT_TIMESTAMP_MAX]){}, FORMAT_TIMESTAMP_MAX, t) #define FORMAT_TIMESTAMP_RELATIVE(t) \ format_timestamp_relative((char[FORMAT_TIMESTAMP_RELATIVE_MAX]){}, FORMAT_TIMESTAMP_RELATIVE_MAX, t) +#define FORMAT_TIMESTAMP_RELATIVE_MONOTONIC(t) \ + format_timestamp_relative_monotonic((char[FORMAT_TIMESTAMP_RELATIVE_MAX]){}, FORMAT_TIMESTAMP_RELATIVE_MAX, t) #define FORMAT_TIMESPAN(t, accuracy) format_timespan((char[FORMAT_TIMESPAN_MAX]){}, FORMAT_TIMESPAN_MAX, t, accuracy) #define FORMAT_TIMESTAMP_STYLE(t, style) \ format_timestamp_style((char[FORMAT_TIMESTAMP_MAX]){}, FORMAT_TIMESTAMP_MAX, t, style) -int parse_timestamp(const char *t, usec_t *usec); +int parse_timestamp(const char *t, usec_t *ret); -int parse_sec(const char *t, usec_t *usec); -int parse_sec_fix_0(const char *t, usec_t *usec); -int parse_sec_def_infinity(const char *t, usec_t *usec); -int parse_time(const char *t, usec_t *usec, usec_t default_unit); -int parse_nsec(const char *t, nsec_t *nsec); +int parse_sec(const char *t, usec_t *ret); +int parse_sec_fix_0(const char *t, usec_t *ret); +int parse_sec_def_infinity(const char *t, usec_t *ret); +int parse_time(const char *t, usec_t *ret, usec_t default_unit); +int parse_nsec(const char *t, nsec_t *ret); -int get_timezones(char ***l); +int get_timezones(char ***ret); int verify_timezone(const char *name, int log_level); static inline bool timezone_is_valid(const char *name, int log_level) { return verify_timezone(name, log_level) >= 0; @@ -160,7 +170,7 @@ bool clock_supported(clockid_t clock); usec_t usec_shift_clock(usec_t, clockid_t from, clockid_t to); -int get_timezone(char **timezone); +int get_timezone(char **ret); time_t mktime_or_timegm(struct tm *tm, bool utc); struct tm *localtime_or_gmtime_r(const time_t *t, struct tm *tm, bool utc); @@ -201,13 +211,17 @@ static inline usec_t usec_sub_signed(usec_t timestamp, int64_t delta) { return usec_sub_unsigned(timestamp, (usec_t) delta); } +/* The last second we can format is 31. Dec 9999, 1s before midnight, because otherwise we'd enter 5 digit + * year territory. However, since we want to stay away from this in all timezones we take one day off. */ +#define USEC_TIMESTAMP_FORMATTABLE_MAX_64BIT ((usec_t) 253402214399000000) /* Thu 9999-12-30 23:59:59 UTC */ +/* With a 32bit time_t we can't go beyond 2038... + * We parse timestamp with RFC-822/ISO 8601 (e.g. +06, or -03:00) as UTC, hence the upper bound must be off + * by USEC_PER_DAY. See parse_timestamp() for more details. */ +#define USEC_TIMESTAMP_FORMATTABLE_MAX_32BIT (((usec_t) INT32_MAX) * USEC_PER_SEC - USEC_PER_DAY) #if SIZEOF_TIME_T == 8 - /* The last second we can format is 31. Dec 9999, 1s before midnight, because otherwise we'd enter 5 digit - * year territory. However, since we want to stay away from this in all timezones we take one day off. */ -# define USEC_TIMESTAMP_FORMATTABLE_MAX ((usec_t) 253402214399000000) +# define USEC_TIMESTAMP_FORMATTABLE_MAX USEC_TIMESTAMP_FORMATTABLE_MAX_64BIT #elif SIZEOF_TIME_T == 4 -/* With a 32bit time_t we can't go beyond 2038... */ -# define USEC_TIMESTAMP_FORMATTABLE_MAX ((usec_t) 2147483647000000) +# define USEC_TIMESTAMP_FORMATTABLE_MAX USEC_TIMESTAMP_FORMATTABLE_MAX_32BIT #else # error "Yuck, time_t is neither 4 nor 8 bytes wide?" #endif diff --git a/src/libnm-systemd-shared/src/basic/tmpfile-util.c b/src/libnm-systemd-shared/src/basic/tmpfile-util.c index d6bafa5d7d..a66ee82d7e 100644 --- a/src/libnm-systemd-shared/src/basic/tmpfile-util.c +++ b/src/libnm-systemd-shared/src/basic/tmpfile-util.c @@ -16,8 +16,10 @@ #include "path-util.h" #include "process-util.h" #include "random-util.h" +#include "stat-util.h" #include "stdio-util.h" #include "string-util.h" +#include "sync-util.h" #include "tmpfile-util.h" #include "umask-util.h" @@ -276,7 +278,7 @@ int open_tmpfile_unlinkable(const char *directory, int flags) { return fd; } -int open_tmpfile_linkable(const char *target, int flags, char **ret_path) { +int open_tmpfile_linkable_at(int dir_fd, const char *target, int flags, char **ret_path) { _cleanup_free_ char *tmp = NULL; int r, fd; @@ -290,7 +292,7 @@ int open_tmpfile_linkable(const char *target, int flags, char **ret_path) { * which case "ret_path" will be returned as NULL. If not possible the temporary path name used is returned in * "ret_path". Use link_tmpfile() below to rename the result after writing the file in full. */ - fd = open_parent(target, O_TMPFILE|flags, 0640); + fd = open_parent_at(dir_fd, target, O_TMPFILE|flags, 0640); if (fd >= 0) { *ret_path = NULL; return fd; @@ -302,7 +304,7 @@ int open_tmpfile_linkable(const char *target, int flags, char **ret_path) { if (r < 0) return r; - fd = open(tmp, O_CREAT|O_EXCL|O_NOFOLLOW|O_NOCTTY|flags, 0640); + fd = openat(dir_fd, tmp, O_CREAT|O_EXCL|O_NOFOLLOW|O_NOCTTY|flags, 0640); if (fd < 0) return -errno; @@ -333,24 +335,84 @@ int fopen_tmpfile_linkable(const char *target, int flags, char **ret_path, FILE return 0; } -int link_tmpfile(int fd, const char *path, const char *target) { +static int link_fd(int fd, int newdirfd, const char *newpath) { + int r; + + assert(fd >= 0); + assert(newdirfd >= 0 || newdirfd == AT_FDCWD); + assert(newpath); + + /* Try symlinking via /proc/fd/ first. */ + r = RET_NERRNO(linkat(AT_FDCWD, FORMAT_PROC_FD_PATH(fd), newdirfd, newpath, AT_SYMLINK_FOLLOW)); + if (r != -ENOENT) + return r; + + /* Fall back to symlinking via AT_EMPTY_PATH as fallback (this requires CAP_DAC_READ_SEARCH and a + * more recent kernel, but does not require /proc/ mounted) */ + if (proc_mounted() != 0) + return r; + + return RET_NERRNO(linkat(fd, "", newdirfd, newpath, AT_EMPTY_PATH)); +} + +int link_tmpfile_at(int fd, int dir_fd, const char *path, const char *target, LinkTmpfileFlags flags) { + _cleanup_free_ char *tmp = NULL; + int r; + assert(fd >= 0); + assert(dir_fd >= 0 || dir_fd == AT_FDCWD); assert(target); - /* Moves a temporary file created with open_tmpfile() above into its final place. if "path" is NULL an fd - * created with O_TMPFILE is assumed, and linkat() is used. Otherwise it is assumed O_TMPFILE is not supported - * on the directory, and renameat2() is used instead. - * - * Note that in both cases we will not replace existing files. This is because linkat() does not support this - * operation currently (renameat2() does), and there is no nice way to emulate this. */ + /* Moves a temporary file created with open_tmpfile() above into its final place. If "path" is NULL + * an fd created with O_TMPFILE is assumed, and linkat() is used. Otherwise it is assumed O_TMPFILE + * is not supported on the directory, and renameat2() is used instead. */ - if (path) - return rename_noreplace(AT_FDCWD, path, AT_FDCWD, target); + if (FLAGS_SET(flags, LINK_TMPFILE_SYNC) && fsync(fd) < 0) + return -errno; - return RET_NERRNO(linkat(AT_FDCWD, FORMAT_PROC_FD_PATH(fd), AT_FDCWD, target, AT_SYMLINK_FOLLOW)); + if (path) { + if (FLAGS_SET(flags, LINK_TMPFILE_REPLACE)) + r = RET_NERRNO(renameat(dir_fd, path, dir_fd, target)); + else + r = rename_noreplace(dir_fd, path, dir_fd, target); + if (r < 0) + return r; + } else { + + r = link_fd(fd, dir_fd, target); + if (r != -EEXIST || !FLAGS_SET(flags, LINK_TMPFILE_REPLACE)) + return r; + + /* So the target already exists and we were asked to replace it. That sucks a bit, since the kernel's + * linkat() logic does not allow that. We work-around this by linking the file to a random name + * first, and then renaming that to the final name. This reintroduces the race O_TMPFILE kinda is + * trying to fix, but at least the vulnerability window (i.e. where the file is linked into the file + * system under a temporary name) is very short. */ + + r = tempfn_random(target, NULL, &tmp); + if (r < 0) + return r; + + if (link_fd(fd, dir_fd, tmp) < 0) + return -EEXIST; /* propagate original error */ + + r = RET_NERRNO(renameat(dir_fd, tmp, dir_fd, target)); + if (r < 0) { + (void) unlinkat(dir_fd, tmp, 0); + return r; + } + } + + if (FLAGS_SET(flags, LINK_TMPFILE_SYNC)) { + r = fsync_full(fd); + if (r < 0) + return r; + } + + return 0; } -int flink_tmpfile(FILE *f, const char *path, const char *target) { +int flink_tmpfile(FILE *f, const char *path, const char *target, LinkTmpfileFlags flags) { int fd, r; assert(f); @@ -360,11 +422,11 @@ int flink_tmpfile(FILE *f, const char *path, const char *target) { if (fd < 0) /* Not all FILE* objects encapsulate fds */ return -EBADF; - r = fflush_sync_and_check(f); + r = fflush_and_check(f); if (r < 0) return r; - return link_tmpfile(fd, path, target); + return link_tmpfile(fd, path, target, flags); } int mkdtemp_malloc(const char *template, char **ret) { diff --git a/src/libnm-systemd-shared/src/basic/tmpfile-util.h b/src/libnm-systemd-shared/src/basic/tmpfile-util.h index e5b7709e3f..50904ecac1 100644 --- a/src/libnm-systemd-shared/src/basic/tmpfile-util.h +++ b/src/libnm-systemd-shared/src/basic/tmpfile-util.h @@ -2,6 +2,7 @@ #pragma once #include <fcntl.h> +#include <stdbool.h> #include <stdio.h> int fopen_temporary_at(int dir_fd, const char *path, FILE **ret_file, char **ret_path); @@ -22,11 +23,23 @@ int tempfn_random(const char *p, const char *extra, char **ret); int tempfn_random_child(const char *p, const char *extra, char **ret); int open_tmpfile_unlinkable(const char *directory, int flags); -int open_tmpfile_linkable(const char *target, int flags, char **ret_path); +int open_tmpfile_linkable_at(int dir_fd, const char *target, int flags, char **ret_path); +static inline int open_tmpfile_linkable(const char *target, int flags, char **ret_path) { + return open_tmpfile_linkable_at(AT_FDCWD, target, flags, ret_path); +} int fopen_tmpfile_linkable(const char *target, int flags, char **ret_path, FILE **ret_file); -int link_tmpfile(int fd, const char *path, const char *target); -int flink_tmpfile(FILE *f, const char *path, const char *target); + +typedef enum LinkTmpfileFlags { + LINK_TMPFILE_REPLACE = 1 << 0, + LINK_TMPFILE_SYNC = 1 << 1, +} LinkTmpfileFlags; + +int link_tmpfile_at(int fd, int dir_fd, const char *path, const char *target, LinkTmpfileFlags flags); +static inline int link_tmpfile(int fd, const char *path, const char *target, LinkTmpfileFlags flags) { + return link_tmpfile_at(fd, AT_FDCWD, path, target, flags); +} +int flink_tmpfile(FILE *f, const char *path, const char *target, LinkTmpfileFlags flags); int mkdtemp_malloc(const char *template, char **ret); int mkdtemp_open(const char *template, int flags, char **ret); diff --git a/src/libnm-systemd-shared/src/basic/user-util.h b/src/libnm-systemd-shared/src/basic/user-util.h index a08683bcea..8b829a9ae2 100644 --- a/src/libnm-systemd-shared/src/basic/user-util.h +++ b/src/libnm-systemd-shared/src/basic/user-util.h @@ -80,7 +80,8 @@ int take_etc_passwd_lock(const char *root); #define UID_MAPPED_ROOT ((uid_t) (INT32_MAX-1)) #define GID_MAPPED_ROOT ((gid_t) (INT32_MAX-1)) -#define ETC_PASSWD_LOCK_PATH "/etc/.pwd.lock" +#define ETC_PASSWD_LOCK_FILENAME ".pwd.lock" +#define ETC_PASSWD_LOCK_PATH "/etc/" ETC_PASSWD_LOCK_FILENAME /* The following macros add 1 when converting things, since UID 0 is a valid UID, while the pointer * NULL is special */ @@ -102,7 +103,7 @@ typedef enum ValidUserFlags { bool valid_user_group_name(const char *u, ValidUserFlags flags); bool valid_gecos(const char *d); -char *mangle_gecos(const char *d); +char* mangle_gecos(const char *d); bool valid_home(const char *p); static inline bool valid_shell(const char *p) { @@ -130,11 +131,12 @@ int putsgent_sane(const struct sgrp *sg, FILE *stream); #endif bool is_nologin_shell(const char *shell); +const char* default_root_shell_at(int rfd); const char* default_root_shell(const char *root); int is_this_me(const char *username); -const char *get_home_root(void); +const char* get_home_root(void); static inline bool hashed_password_is_locked_or_invalid(const char *password) { return password && password[0] != '$'; @@ -148,3 +150,8 @@ static inline bool hashed_password_is_locked_or_invalid(const char *password) { /* A password indicating "hey, no password required for login" */ #define PASSWORD_NONE "" + +/* Used by sysusers to indicate that the password should be filled in by firstboot. + * Also see https://github.com/systemd/systemd/pull/24680#pullrequestreview-1439464325. + */ +#define PASSWORD_UNPROVISIONED "!unprovisioned" diff --git a/src/libnm-systemd-shared/src/basic/logarithm.h b/src/libnm-systemd-shared/src/fundamental/logarithm.h index 35c5fc57e0..5f7429513c 100644 --- a/src/libnm-systemd-shared/src/basic/logarithm.h +++ b/src/libnm-systemd-shared/src/fundamental/logarithm.h @@ -3,8 +3,6 @@ #include <stdint.h> -#include "macro.h" - /* Note: log2(0) == log2(1) == 0 here and below. */ #define CONST_LOG2ULL(x) ((x) > 1 ? (unsigned) __builtin_clzll(x) ^ 63U : 0) @@ -34,6 +32,14 @@ static inline unsigned u32ctz(uint32_t n) { #endif } +#define popcount(n) \ + _Generic((n), \ + unsigned char: __builtin_popcount(n), \ + unsigned short: __builtin_popcount(n), \ + unsigned: __builtin_popcount(n), \ + unsigned long: __builtin_popcountl(n), \ + unsigned long long: __builtin_popcountll(n)) + #define CONST_LOG2U(x) ((x) > 1 ? __SIZEOF_INT__ * 8 - __builtin_clz(x) - 1 : 0) #define NONCONST_LOG2U(x) ({ \ unsigned _x = (x); \ diff --git a/src/libnm-systemd-shared/src/fundamental/macro-fundamental.h b/src/libnm-systemd-shared/src/fundamental/macro-fundamental.h index 061477a6d6..89b83e7d0b 100644 --- a/src/libnm-systemd-shared/src/fundamental/macro-fundamental.h +++ b/src/libnm-systemd-shared/src/fundamental/macro-fundamental.h @@ -6,12 +6,13 @@ #endif #include <limits.h> +#include <stdalign.h> #include <stdbool.h> #include <stddef.h> #include <stdint.h> #define _align_(x) __attribute__((__aligned__(x))) -#define _alignas_(x) __attribute__((__aligned__(__alignof__(x)))) +#define _alignas_(x) __attribute__((__aligned__(alignof(x)))) #define _alignptr_ __attribute__((__aligned__(sizeof(void *)))) #define _cleanup_(x) __attribute__((__cleanup__(x))) #define _const_ __attribute__((__const__)) @@ -114,7 +115,6 @@ #define assert_cc(expr) static_assert(expr, #expr) - #define UNIQ_T(x, uniq) CONCATENATE(__unique_prefix_, CONCATENATE(x, uniq)) #define UNIQ __COUNTER__ @@ -267,6 +267,16 @@ (UNIQ_T(X, xq) / UNIQ_T(Y, yq) + !!(UNIQ_T(X, xq) % UNIQ_T(Y, yq))); \ }) +/* Rounds up x to the next multiple of y. Resolves to typeof(x) -1 in case of overflow */ +#define __ROUND_UP(q, x, y) \ + ({ \ + const typeof(y) UNIQ_T(A, q) = (y); \ + const typeof(x) UNIQ_T(B, q) = DIV_ROUND_UP((x), UNIQ_T(A, q)); \ + typeof(x) UNIQ_T(C, q); \ + __builtin_mul_overflow(UNIQ_T(B, q), UNIQ_T(A, q), &UNIQ_T(C, q)) ? (typeof(x)) -1 : UNIQ_T(C, q); \ + }) +#define ROUND_UP(x, y) __ROUND_UP(UNIQ, (x), (y)) + #define CASE_F_1(X) case X: #define CASE_F_2(X, ...) case X: CASE_F_1( __VA_ARGS__) #define CASE_F_3(X, ...) case X: CASE_F_2( __VA_ARGS__) @@ -354,16 +364,13 @@ static inline size_t ALIGN_TO(size_t l, size_t ali) { #define ALIGN2_PTR(p) ((void*) ALIGN2((uintptr_t) p)) #define ALIGN4_PTR(p) ((void*) ALIGN4((uintptr_t) p)) #define ALIGN8_PTR(p) ((void*) ALIGN8((uintptr_t) p)) -#if !SD_BOOT -/* libefi also provides ALIGN, and we do not use them in sd-boot explicitly. */ #define ALIGN(l) ALIGN_TO(l, sizeof(void*)) #define ALIGN_PTR(p) ((void*) ALIGN((uintptr_t) (p))) -#endif /* Checks if the specified pointer is aligned as appropriate for the specific type */ -#define IS_ALIGNED16(p) (((uintptr_t) p) % __alignof__(uint16_t) == 0) -#define IS_ALIGNED32(p) (((uintptr_t) p) % __alignof__(uint32_t) == 0) -#define IS_ALIGNED64(p) (((uintptr_t) p) % __alignof__(uint64_t) == 0) +#define IS_ALIGNED16(p) (((uintptr_t) p) % alignof(uint16_t) == 0) +#define IS_ALIGNED32(p) (((uintptr_t) p) % alignof(uint32_t) == 0) +#define IS_ALIGNED64(p) (((uintptr_t) p) % alignof(uint64_t) == 0) /* Same as ALIGN_TO but callable in constant contexts. */ #define CONST_ALIGN_TO(l, ali) \ @@ -381,7 +388,7 @@ static inline size_t ALIGN_TO(size_t l, size_t ali) { #define CAST_ALIGN_PTR(t, p) \ ({ \ const void *_p = (p); \ - assert(((uintptr_t) _p) % __alignof__(t) == 0); \ + assert(((uintptr_t) _p) % alignof(t) == 0); \ (t *) _p; \ }) diff --git a/src/libnm-systemd-shared/src/fundamental/string-util-fundamental.c b/src/libnm-systemd-shared/src/fundamental/string-util-fundamental.c index ad80272e27..3a3e7f593a 100644 --- a/src/libnm-systemd-shared/src/fundamental/string-util-fundamental.c +++ b/src/libnm-systemd-shared/src/fundamental/string-util-fundamental.c @@ -22,7 +22,6 @@ sd_char *startswith(const sd_char *s, const sd_char *prefix) { return (sd_char*) s + l; } -#if !SD_BOOT sd_char *startswith_no_case(const sd_char *s, const sd_char *prefix) { size_t l; @@ -35,7 +34,6 @@ sd_char *startswith_no_case(const sd_char *s, const sd_char *prefix) { return (sd_char*) s + l; } -#endif sd_char* endswith(const sd_char *s, const sd_char *postfix) { size_t sl, pl; diff --git a/src/libnm-systemd-shared/src/fundamental/string-util-fundamental.h b/src/libnm-systemd-shared/src/fundamental/string-util-fundamental.h index c35ce5b88f..9019542b16 100644 --- a/src/libnm-systemd-shared/src/fundamental/string-util-fundamental.h +++ b/src/libnm-systemd-shared/src/fundamental/string-util-fundamental.h @@ -2,8 +2,7 @@ #pragma once #if SD_BOOT -# include <efi.h> -# include <efilib.h> +# include "efi.h" # include "efi-string.h" #else # include <string.h> @@ -59,9 +58,7 @@ static inline size_t strlen_ptr(const sd_char *s) { } sd_char *startswith(const sd_char *s, const sd_char *prefix) _pure_; -#if !SD_BOOT sd_char *startswith_no_case(const sd_char *s, const sd_char *prefix) _pure_; -#endif sd_char *endswith(const sd_char *s, const sd_char *postfix) _pure_; sd_char *endswith_no_case(const sd_char *s, const sd_char *postfix) _pure_; |