summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-10-03 17:51:52 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2022-10-03 17:51:52 -0700
commit26b84401da8458c5cbd6818d5732f7bbb84124a2 (patch)
treef73ae58b98f9c4ea5734597d06926c9da44cc41a
parente816da29bc0cf0504afddd314a2d71b694b5d7af (diff)
parent1e7d8bcbe37d3c63babe628443f13f77970dd06b (diff)
Merge tag 'lsm-pr-20221003' of git://git.kernel.org/pub/scm/linux/kernel/git/pcmoore/lsm
Pull LSM updates from Paul Moore: "Seven patches for the LSM layer and we've got a mix of trivial and significant patches. Highlights below, starting with the smaller bits first so they don't get lost in the discussion of the larger items: - Remove some redundant NULL pointer checks in the common LSM audit code. - Ratelimit the lockdown LSM's access denial messages. With this change there is a chance that the last visible lockdown message on the console is outdated/old, but it does help preserve the initial series of lockdown denials that started the denial message flood and my gut feeling is that these might be the more valuable messages. - Open userfaultfds as readonly instead of read/write. While this code obviously lives outside the LSM, it does have a noticeable impact on the LSMs with Ondrej explaining the situation in the commit description. It is worth noting that this patch languished on the VFS list for over a year without any comments (objections or otherwise) so I took the liberty of pulling it into the LSM tree after giving fair notice. It has been in linux-next since the end of August without any noticeable problems. - Add a LSM hook for user namespace creation, with implementations for both the BPF LSM and SELinux. Even though the changes are fairly small, this is the bulk of the diffstat as we are also including BPF LSM selftests for the new hook. It's also the most contentious of the changes in this pull request with Eric Biederman NACK'ing the LSM hook multiple times during its development and discussion upstream. While I've never taken NACK's lightly, I'm sending these patches to you because it is my belief that they are of good quality, satisfy a long-standing need of users and distros, and are in keeping with the existing nature of the LSM layer and the Linux Kernel as a whole. The patches in implement a LSM hook for user namespace creation that allows for a granular approach, configurable at runtime, which enables both monitoring and control of user namespaces. The general consensus has been that this is far preferable to the other solutions that have been adopted downstream including outright removal from the kernel, disabling via system wide sysctls, or various other out-of-tree mechanisms that users have been forced to adopt since we haven't been able to provide them an upstream solution for their requests. Eric has been steadfast in his objections to this LSM hook, explaining that any restrictions on the user namespace could have significant impact on userspace. While there is the possibility of impacting userspace, it is important to note that this solution only impacts userspace when it is requested based on the runtime configuration supplied by the distro/admin/user. Frederick (the pathset author), the LSM/security community, and myself have tried to work with Eric during development of this patchset to find a mutually acceptable solution, but Eric's approach and unwillingness to engage in a meaningful way have made this impossible. I have CC'd Eric directly on this pull request so he has a chance to provide his side of the story; there have been no objections outside of Eric's" * tag 'lsm-pr-20221003' of git://git.kernel.org/pub/scm/linux/kernel/git/pcmoore/lsm: lockdown: ratelimit denial messages userfaultfd: open userfaultfds with O_RDONLY selinux: Implement userns_create hook selftests/bpf: Add tests verifying bpf lsm userns_create hook bpf-lsm: Make bpf_lsm_userns_create() sleepable security, lsm: Introduce security_create_user_ns() lsm: clean up redundant NULL pointer check
-rw-r--r--fs/userfaultfd.c4
-rw-r--r--include/linux/lsm_hook_defs.h1
-rw-r--r--include/linux/lsm_hooks.h4
-rw-r--r--include/linux/security.h6
-rw-r--r--kernel/bpf/bpf_lsm.c1
-rw-r--r--kernel/user_namespace.c5
-rw-r--r--security/lockdown/lockdown.c2
-rw-r--r--security/lsm_audit.c14
-rw-r--r--security/security.c5
-rw-r--r--security/selinux/hooks.c9
-rw-r--r--security/selinux/include/classmap.h2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/deny_namespace.c102
-rw-r--r--tools/testing/selftests/bpf/progs/test_deny_namespace.c33
13 files changed, 172 insertions, 16 deletions
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 175de70e3adf..0c1d33c4f74c 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -991,7 +991,7 @@ static int resolve_userfault_fork(struct userfaultfd_ctx *new,
int fd;
fd = anon_inode_getfd_secure("[userfaultfd]", &userfaultfd_fops, new,
- O_RDWR | (new->flags & UFFD_SHARED_FCNTL_FLAGS), inode);
+ O_RDONLY | (new->flags & UFFD_SHARED_FCNTL_FLAGS), inode);
if (fd < 0)
return fd;
@@ -2094,7 +2094,7 @@ SYSCALL_DEFINE1(userfaultfd, int, flags)
mmgrab(ctx->mm);
fd = anon_inode_getfd_secure("[userfaultfd]", &userfaultfd_fops, ctx,
- O_RDWR | (flags & UFFD_SHARED_FCNTL_FLAGS), NULL);
+ O_RDONLY | (flags & UFFD_SHARED_FCNTL_FLAGS), NULL);
if (fd < 0) {
mmdrop(ctx->mm);
kmem_cache_free(userfaultfd_ctx_cachep, ctx);
diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h
index 60fff133c0b1..f8715ddbfcf4 100644
--- a/include/linux/lsm_hook_defs.h
+++ b/include/linux/lsm_hook_defs.h
@@ -224,6 +224,7 @@ LSM_HOOK(int, -ENOSYS, task_prctl, int option, unsigned long arg2,
unsigned long arg3, unsigned long arg4, unsigned long arg5)
LSM_HOOK(void, LSM_RET_VOID, task_to_inode, struct task_struct *p,
struct inode *inode)
+LSM_HOOK(int, 0, userns_create, const struct cred *cred)
LSM_HOOK(int, 0, ipc_permission, struct kern_ipc_perm *ipcp, short flag)
LSM_HOOK(void, LSM_RET_VOID, ipc_getsecid, struct kern_ipc_perm *ipcp,
u32 *secid)
diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
index 3aa6030302f5..4ec80b96c22e 100644
--- a/include/linux/lsm_hooks.h
+++ b/include/linux/lsm_hooks.h
@@ -806,6 +806,10 @@
* security attributes, e.g. for /proc/pid inodes.
* @p contains the task_struct for the task.
* @inode contains the inode structure for the inode.
+ * @userns_create:
+ * Check permission prior to creating a new user namespace.
+ * @cred points to prepared creds.
+ * Return 0 if successful, otherwise < 0 error code.
*
* Security hooks for Netlink messaging.
*
diff --git a/include/linux/security.h b/include/linux/security.h
index 7bd0c490703d..3480f61e1b2d 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -437,6 +437,7 @@ int security_task_kill(struct task_struct *p, struct kernel_siginfo *info,
int security_task_prctl(int option, unsigned long arg2, unsigned long arg3,
unsigned long arg4, unsigned long arg5);
void security_task_to_inode(struct task_struct *p, struct inode *inode);
+int security_create_user_ns(const struct cred *cred);
int security_ipc_permission(struct kern_ipc_perm *ipcp, short flag);
void security_ipc_getsecid(struct kern_ipc_perm *ipcp, u32 *secid);
int security_msg_msg_alloc(struct msg_msg *msg);
@@ -1194,6 +1195,11 @@ static inline int security_task_prctl(int option, unsigned long arg2,
static inline void security_task_to_inode(struct task_struct *p, struct inode *inode)
{ }
+static inline int security_create_user_ns(const struct cred *cred)
+{
+ return 0;
+}
+
static inline int security_ipc_permission(struct kern_ipc_perm *ipcp,
short flag)
{
diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c
index fa71d58b7ded..761998fda762 100644
--- a/kernel/bpf/bpf_lsm.c
+++ b/kernel/bpf/bpf_lsm.c
@@ -335,6 +335,7 @@ BTF_ID(func, bpf_lsm_task_getsecid_obj)
BTF_ID(func, bpf_lsm_task_prctl)
BTF_ID(func, bpf_lsm_task_setscheduler)
BTF_ID(func, bpf_lsm_task_to_inode)
+BTF_ID(func, bpf_lsm_userns_create)
BTF_SET_END(sleepable_lsm_hooks)
bool bpf_lsm_is_sleepable_hook(u32 btf_id)
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 5481ba44a8d6..3f464bbda0e9 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -9,6 +9,7 @@
#include <linux/highuid.h>
#include <linux/cred.h>
#include <linux/securebits.h>
+#include <linux/security.h>
#include <linux/keyctl.h>
#include <linux/key-type.h>
#include <keys/user-type.h>
@@ -113,6 +114,10 @@ int create_user_ns(struct cred *new)
!kgid_has_mapping(parent_ns, group))
goto fail_dec;
+ ret = security_create_user_ns(new);
+ if (ret < 0)
+ goto fail_dec;
+
ret = -ENOMEM;
ns = kmem_cache_zalloc(user_ns_cachep, GFP_KERNEL);
if (!ns)
diff --git a/security/lockdown/lockdown.c b/security/lockdown/lockdown.c
index 87cbdc64d272..a79b985e917e 100644
--- a/security/lockdown/lockdown.c
+++ b/security/lockdown/lockdown.c
@@ -63,7 +63,7 @@ static int lockdown_is_locked_down(enum lockdown_reason what)
if (kernel_locked_down >= what) {
if (lockdown_reasons[what])
- pr_notice("Lockdown: %s: %s is restricted; see man kernel_lockdown.7\n",
+ pr_notice_ratelimited("Lockdown: %s: %s is restricted; see man kernel_lockdown.7\n",
current->comm, lockdown_reasons[what]);
return -EPERM;
}
diff --git a/security/lsm_audit.c b/security/lsm_audit.c
index 78a278f28e49..75cc3f8d2a42 100644
--- a/security/lsm_audit.c
+++ b/security/lsm_audit.c
@@ -44,9 +44,6 @@ int ipv4_skb_to_auditdata(struct sk_buff *skb,
struct iphdr *ih;
ih = ip_hdr(skb);
- if (ih == NULL)
- return -EINVAL;
-
ad->u.net->v4info.saddr = ih->saddr;
ad->u.net->v4info.daddr = ih->daddr;
@@ -59,8 +56,6 @@ int ipv4_skb_to_auditdata(struct sk_buff *skb,
switch (ih->protocol) {
case IPPROTO_TCP: {
struct tcphdr *th = tcp_hdr(skb);
- if (th == NULL)
- break;
ad->u.net->sport = th->source;
ad->u.net->dport = th->dest;
@@ -68,8 +63,6 @@ int ipv4_skb_to_auditdata(struct sk_buff *skb,
}
case IPPROTO_UDP: {
struct udphdr *uh = udp_hdr(skb);
- if (uh == NULL)
- break;
ad->u.net->sport = uh->source;
ad->u.net->dport = uh->dest;
@@ -77,8 +70,6 @@ int ipv4_skb_to_auditdata(struct sk_buff *skb,
}
case IPPROTO_DCCP: {
struct dccp_hdr *dh = dccp_hdr(skb);
- if (dh == NULL)
- break;
ad->u.net->sport = dh->dccph_sport;
ad->u.net->dport = dh->dccph_dport;
@@ -86,8 +77,7 @@ int ipv4_skb_to_auditdata(struct sk_buff *skb,
}
case IPPROTO_SCTP: {
struct sctphdr *sh = sctp_hdr(skb);
- if (sh == NULL)
- break;
+
ad->u.net->sport = sh->source;
ad->u.net->dport = sh->dest;
break;
@@ -115,8 +105,6 @@ int ipv6_skb_to_auditdata(struct sk_buff *skb,
__be16 frag_off;
ip6 = ipv6_hdr(skb);
- if (ip6 == NULL)
- return -EINVAL;
ad->u.net->v6info.saddr = ip6->saddr;
ad->u.net->v6info.daddr = ip6->daddr;
/* IPv6 can have several extension header before the Transport header
diff --git a/security/security.c b/security/security.c
index 4b95de24bc8d..9696dd64095e 100644
--- a/security/security.c
+++ b/security/security.c
@@ -1909,6 +1909,11 @@ void security_task_to_inode(struct task_struct *p, struct inode *inode)
call_void_hook(task_to_inode, p, inode);
}
+int security_create_user_ns(const struct cred *cred)
+{
+ return call_int_hook(userns_create, 0, cred);
+}
+
int security_ipc_permission(struct kern_ipc_perm *ipcp, short flag)
{
return call_int_hook(ipc_permission, 0, ipcp, flag);
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 753c876f0804..f8b05f14284b 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -4222,6 +4222,14 @@ static void selinux_task_to_inode(struct task_struct *p,
spin_unlock(&isec->lock);
}
+static int selinux_userns_create(const struct cred *cred)
+{
+ u32 sid = current_sid();
+
+ return avc_has_perm(&selinux_state, sid, sid, SECCLASS_USER_NAMESPACE,
+ USER_NAMESPACE__CREATE, NULL);
+}
+
/* Returns error only if unable to parse addresses */
static int selinux_parse_skb_ipv4(struct sk_buff *skb,
struct common_audit_data *ad, u8 *proto)
@@ -7128,6 +7136,7 @@ static struct security_hook_list selinux_hooks[] __lsm_ro_after_init = {
LSM_HOOK_INIT(task_movememory, selinux_task_movememory),
LSM_HOOK_INIT(task_kill, selinux_task_kill),
LSM_HOOK_INIT(task_to_inode, selinux_task_to_inode),
+ LSM_HOOK_INIT(userns_create, selinux_userns_create),
LSM_HOOK_INIT(ipc_permission, selinux_ipc_permission),
LSM_HOOK_INIT(ipc_getsecid, selinux_ipc_getsecid),
diff --git a/security/selinux/include/classmap.h b/security/selinux/include/classmap.h
index 1c2f41ff4e55..a3c380775d41 100644
--- a/security/selinux/include/classmap.h
+++ b/security/selinux/include/classmap.h
@@ -254,6 +254,8 @@ const struct security_class_mapping secclass_map[] = {
{ COMMON_FILE_PERMS, NULL } },
{ "io_uring",
{ "override_creds", "sqpoll", "cmd", NULL } },
+ { "user_namespace",
+ { "create", NULL } },
{ NULL }
};
diff --git a/tools/testing/selftests/bpf/prog_tests/deny_namespace.c b/tools/testing/selftests/bpf/prog_tests/deny_namespace.c
new file mode 100644
index 000000000000..1bc6241b755b
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/deny_namespace.c
@@ -0,0 +1,102 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <test_progs.h>
+#include "test_deny_namespace.skel.h"
+#include <sched.h>
+#include "cap_helpers.h"
+#include <stdio.h>
+
+static int wait_for_pid(pid_t pid)
+{
+ int status, ret;
+
+again:
+ ret = waitpid(pid, &status, 0);
+ if (ret == -1) {
+ if (errno == EINTR)
+ goto again;
+
+ return -1;
+ }
+
+ if (!WIFEXITED(status))
+ return -1;
+
+ return WEXITSTATUS(status);
+}
+
+/* negative return value -> some internal error
+ * positive return value -> userns creation failed
+ * 0 -> userns creation succeeded
+ */
+static int create_user_ns(void)
+{
+ pid_t pid;
+
+ pid = fork();
+ if (pid < 0)
+ return -1;
+
+ if (pid == 0) {
+ if (unshare(CLONE_NEWUSER))
+ _exit(EXIT_FAILURE);
+ _exit(EXIT_SUCCESS);
+ }
+
+ return wait_for_pid(pid);
+}
+
+static void test_userns_create_bpf(void)
+{
+ __u32 cap_mask = 1ULL << CAP_SYS_ADMIN;
+ __u64 old_caps = 0;
+
+ cap_enable_effective(cap_mask, &old_caps);
+
+ ASSERT_OK(create_user_ns(), "priv new user ns");
+
+ cap_disable_effective(cap_mask, &old_caps);
+
+ ASSERT_EQ(create_user_ns(), EPERM, "unpriv new user ns");
+
+ if (cap_mask & old_caps)
+ cap_enable_effective(cap_mask, NULL);
+}
+
+static void test_unpriv_userns_create_no_bpf(void)
+{
+ __u32 cap_mask = 1ULL << CAP_SYS_ADMIN;
+ __u64 old_caps = 0;
+
+ cap_disable_effective(cap_mask, &old_caps);
+
+ ASSERT_OK(create_user_ns(), "no-bpf unpriv new user ns");
+
+ if (cap_mask & old_caps)
+ cap_enable_effective(cap_mask, NULL);
+}
+
+void test_deny_namespace(void)
+{
+ struct test_deny_namespace *skel = NULL;
+ int err;
+
+ if (test__start_subtest("unpriv_userns_create_no_bpf"))
+ test_unpriv_userns_create_no_bpf();
+
+ skel = test_deny_namespace__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel load"))
+ goto close_prog;
+
+ err = test_deny_namespace__attach(skel);
+ if (!ASSERT_OK(err, "attach"))
+ goto close_prog;
+
+ if (test__start_subtest("userns_create_bpf"))
+ test_userns_create_bpf();
+
+ test_deny_namespace__detach(skel);
+
+close_prog:
+ test_deny_namespace__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/progs/test_deny_namespace.c b/tools/testing/selftests/bpf/progs/test_deny_namespace.c
new file mode 100644
index 000000000000..09ad5a4ebd1f
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_deny_namespace.c
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <errno.h>
+#include <linux/capability.h>
+
+struct kernel_cap_struct {
+ __u32 cap[_LINUX_CAPABILITY_U32S_3];
+} __attribute__((preserve_access_index));
+
+struct cred {
+ struct kernel_cap_struct cap_effective;
+} __attribute__((preserve_access_index));
+
+char _license[] SEC("license") = "GPL";
+
+SEC("lsm.s/userns_create")
+int BPF_PROG(test_userns_create, const struct cred *cred, int ret)
+{
+ struct kernel_cap_struct caps = cred->cap_effective;
+ int cap_index = CAP_TO_INDEX(CAP_SYS_ADMIN);
+ __u32 cap_mask = CAP_TO_MASK(CAP_SYS_ADMIN);
+
+ if (ret)
+ return 0;
+
+ ret = -EPERM;
+ if (caps.cap[cap_index] & cap_mask)
+ return 0;
+
+ return -EPERM;
+}