diff options
-rw-r--r-- | net/netfilter/nf_nat_proto.c | 64 | ||||
-rw-r--r-- | net/netfilter/nf_tables_api.c | 43 | ||||
-rwxr-xr-x | tools/testing/selftests/netfilter/nf_nat_edemux.sh | 46 |
3 files changed, 126 insertions, 27 deletions
diff --git a/net/netfilter/nf_nat_proto.c b/net/netfilter/nf_nat_proto.c index 48cc60084d28..5a049740758f 100644 --- a/net/netfilter/nf_nat_proto.c +++ b/net/netfilter/nf_nat_proto.c @@ -697,6 +697,31 @@ static int nf_xfrm_me_harder(struct net *net, struct sk_buff *skb, unsigned int } #endif +static bool nf_nat_inet_port_was_mangled(const struct sk_buff *skb, __be16 sport) +{ + enum ip_conntrack_info ctinfo; + enum ip_conntrack_dir dir; + const struct nf_conn *ct; + + ct = nf_ct_get(skb, &ctinfo); + if (!ct) + return false; + + switch (nf_ct_protonum(ct)) { + case IPPROTO_TCP: + case IPPROTO_UDP: + break; + default: + return false; + } + + dir = CTINFO2DIR(ctinfo); + if (dir != IP_CT_DIR_ORIGINAL) + return false; + + return ct->tuplehash[!dir].tuple.dst.u.all != sport; +} + static unsigned int nf_nat_ipv4_local_in(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) @@ -707,8 +732,20 @@ nf_nat_ipv4_local_in(void *priv, struct sk_buff *skb, ret = nf_nat_ipv4_fn(priv, skb, state); - if (ret == NF_ACCEPT && sk && saddr != ip_hdr(skb)->saddr && - !inet_sk_transparent(sk)) + if (ret != NF_ACCEPT || !sk || inet_sk_transparent(sk)) + return ret; + + /* skb has a socket assigned via tcp edemux. We need to check + * if nf_nat_ipv4_fn() has mangled the packet in a way that + * edemux would not have found this socket. + * + * This includes both changes to the source address and changes + * to the source port, which are both handled by the + * nf_nat_ipv4_fn() call above -- long after tcp/udp early demux + * might have found a socket for the old (pre-snat) address. + */ + if (saddr != ip_hdr(skb)->saddr || + nf_nat_inet_port_was_mangled(skb, sk->sk_dport)) skb_orphan(skb); /* TCP edemux obtained wrong socket */ return ret; @@ -938,6 +975,27 @@ nf_nat_ipv6_fn(void *priv, struct sk_buff *skb, } static unsigned int +nf_nat_ipv6_local_in(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state) +{ + struct in6_addr saddr = ipv6_hdr(skb)->saddr; + struct sock *sk = skb->sk; + unsigned int ret; + + ret = nf_nat_ipv6_fn(priv, skb, state); + + if (ret != NF_ACCEPT || !sk || inet_sk_transparent(sk)) + return ret; + + /* see nf_nat_ipv4_local_in */ + if (ipv6_addr_cmp(&saddr, &ipv6_hdr(skb)->saddr) || + nf_nat_inet_port_was_mangled(skb, sk->sk_dport)) + skb_orphan(skb); + + return ret; +} + +static unsigned int nf_nat_ipv6_in(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { @@ -1051,7 +1109,7 @@ static const struct nf_hook_ops nf_nat_ipv6_ops[] = { }, /* After packet filtering, change source */ { - .hook = nf_nat_ipv6_fn, + .hook = nf_nat_ipv6_local_in, .pf = NFPROTO_IPV6, .hooknum = NF_INET_LOCAL_IN, .priority = NF_IP6_PRI_NAT_SRC, diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 4356189360fb..7e2e76086d25 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -3316,7 +3316,7 @@ static const struct nla_policy nft_rule_policy[NFTA_RULE_MAX + 1] = { [NFTA_RULE_CHAIN] = { .type = NLA_STRING, .len = NFT_CHAIN_MAXNAMELEN - 1 }, [NFTA_RULE_HANDLE] = { .type = NLA_U64 }, - [NFTA_RULE_EXPRESSIONS] = { .type = NLA_NESTED }, + [NFTA_RULE_EXPRESSIONS] = NLA_POLICY_NESTED_ARRAY(nft_expr_policy), [NFTA_RULE_COMPAT] = { .type = NLA_NESTED }, [NFTA_RULE_POSITION] = { .type = NLA_U64 }, [NFTA_RULE_USERDATA] = { .type = NLA_BINARY, @@ -4254,12 +4254,16 @@ static const struct nla_policy nft_set_policy[NFTA_SET_MAX + 1] = { [NFTA_SET_OBJ_TYPE] = { .type = NLA_U32 }, [NFTA_SET_HANDLE] = { .type = NLA_U64 }, [NFTA_SET_EXPR] = { .type = NLA_NESTED }, - [NFTA_SET_EXPRESSIONS] = { .type = NLA_NESTED }, + [NFTA_SET_EXPRESSIONS] = NLA_POLICY_NESTED_ARRAY(nft_expr_policy), +}; + +static const struct nla_policy nft_concat_policy[NFTA_SET_FIELD_MAX + 1] = { + [NFTA_SET_FIELD_LEN] = { .type = NLA_U32 }, }; static const struct nla_policy nft_set_desc_policy[NFTA_SET_DESC_MAX + 1] = { [NFTA_SET_DESC_SIZE] = { .type = NLA_U32 }, - [NFTA_SET_DESC_CONCAT] = { .type = NLA_NESTED }, + [NFTA_SET_DESC_CONCAT] = NLA_POLICY_NESTED_ARRAY(nft_concat_policy), }; static struct nft_set *nft_set_lookup(const struct nft_table *table, @@ -4695,8 +4699,10 @@ static int nf_tables_getset(struct sk_buff *skb, const struct nfnl_info *info, return -EINVAL; set = nft_set_lookup(table, nla[NFTA_SET_NAME], genmask); - if (IS_ERR(set)) + if (IS_ERR(set)) { + NL_SET_BAD_ATTR(extack, nla[NFTA_SET_NAME]); return PTR_ERR(set); + } skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC); if (skb2 == NULL) @@ -4713,10 +4719,6 @@ err_fill_set_info: return err; } -static const struct nla_policy nft_concat_policy[NFTA_SET_FIELD_MAX + 1] = { - [NFTA_SET_FIELD_LEN] = { .type = NLA_U32 }, -}; - static int nft_set_desc_concat_parse(const struct nlattr *attr, struct nft_set_desc *desc) { @@ -5498,7 +5500,7 @@ static const struct nla_policy nft_set_elem_policy[NFTA_SET_ELEM_MAX + 1] = { [NFTA_SET_ELEM_OBJREF] = { .type = NLA_STRING, .len = NFT_OBJ_MAXNAMELEN - 1 }, [NFTA_SET_ELEM_KEY_END] = { .type = NLA_NESTED }, - [NFTA_SET_ELEM_EXPRESSIONS] = { .type = NLA_NESTED }, + [NFTA_SET_ELEM_EXPRESSIONS] = NLA_POLICY_NESTED_ARRAY(nft_expr_policy), }; static const struct nla_policy nft_set_elem_list_policy[NFTA_SET_ELEM_LIST_MAX + 1] = { @@ -5506,7 +5508,7 @@ static const struct nla_policy nft_set_elem_list_policy[NFTA_SET_ELEM_LIST_MAX + .len = NFT_TABLE_MAXNAMELEN - 1 }, [NFTA_SET_ELEM_LIST_SET] = { .type = NLA_STRING, .len = NFT_SET_MAXNAMELEN - 1 }, - [NFTA_SET_ELEM_LIST_ELEMENTS] = { .type = NLA_NESTED }, + [NFTA_SET_ELEM_LIST_ELEMENTS] = NLA_POLICY_NESTED_ARRAY(nft_set_elem_policy), [NFTA_SET_ELEM_LIST_SET_ID] = { .type = NLA_U32 }, }; @@ -6025,8 +6027,10 @@ static int nf_tables_getsetelem(struct sk_buff *skb, } set = nft_set_lookup(table, nla[NFTA_SET_ELEM_LIST_SET], genmask); - if (IS_ERR(set)) + if (IS_ERR(set)) { + NL_SET_BAD_ATTR(extack, nla[NFTA_SET_ELEM_LIST_SET]); return PTR_ERR(set); + } nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla); @@ -6919,8 +6923,10 @@ static int nf_tables_newsetelem(struct sk_buff *skb, set = nft_set_lookup_global(net, table, nla[NFTA_SET_ELEM_LIST_SET], nla[NFTA_SET_ELEM_LIST_SET_ID], genmask); - if (IS_ERR(set)) + if (IS_ERR(set)) { + NL_SET_BAD_ATTR(extack, nla[NFTA_SET_ELEM_LIST_SET]); return PTR_ERR(set); + } if (!list_empty(&set->bindings) && (set->flags & (NFT_SET_CONSTANT | NFT_SET_ANONYMOUS))) @@ -7195,8 +7201,10 @@ static int nf_tables_delsetelem(struct sk_buff *skb, } set = nft_set_lookup(table, nla[NFTA_SET_ELEM_LIST_SET], genmask); - if (IS_ERR(set)) + if (IS_ERR(set)) { + NL_SET_BAD_ATTR(extack, nla[NFTA_SET_ELEM_LIST_SET]); return PTR_ERR(set); + } if (nft_set_is_anonymous(set)) return -EOPNOTSUPP; @@ -8680,6 +8688,7 @@ static int nf_tables_getflowtable(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const nla[]) { + struct netlink_ext_ack *extack = info->extack; u8 genmask = nft_genmask_cur(info->net); u8 family = info->nfmsg->nfgen_family; struct nft_flowtable *flowtable; @@ -8705,13 +8714,17 @@ static int nf_tables_getflowtable(struct sk_buff *skb, table = nft_table_lookup(net, nla[NFTA_FLOWTABLE_TABLE], family, genmask, 0); - if (IS_ERR(table)) + if (IS_ERR(table)) { + NL_SET_BAD_ATTR(extack, nla[NFTA_FLOWTABLE_TABLE]); return PTR_ERR(table); + } flowtable = nft_flowtable_lookup(table, nla[NFTA_FLOWTABLE_NAME], genmask); - if (IS_ERR(flowtable)) + if (IS_ERR(flowtable)) { + NL_SET_BAD_ATTR(extack, nla[NFTA_FLOWTABLE_NAME]); return PTR_ERR(flowtable); + } skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC); if (!skb2) diff --git a/tools/testing/selftests/netfilter/nf_nat_edemux.sh b/tools/testing/selftests/netfilter/nf_nat_edemux.sh index 1092bbcb1fba..a1aa8f4a5828 100755 --- a/tools/testing/selftests/netfilter/nf_nat_edemux.sh +++ b/tools/testing/selftests/netfilter/nf_nat_edemux.sh @@ -11,16 +11,18 @@ ret=0 sfx=$(mktemp -u "XXXXXXXX") ns1="ns1-$sfx" ns2="ns2-$sfx" +socatpid=0 cleanup() { + [ $socatpid -gt 0 ] && kill $socatpid ip netns del $ns1 ip netns del $ns2 } -iperf3 -v > /dev/null 2>&1 +socat -h > /dev/null 2>&1 if [ $? -ne 0 ];then - echo "SKIP: Could not run test without iperf3" + echo "SKIP: Could not run test without socat" exit $ksft_skip fi @@ -60,8 +62,8 @@ ip netns exec $ns2 ip link set up dev veth2 ip netns exec $ns2 ip addr add 192.168.1.2/24 dev veth2 # Create a server in one namespace -ip netns exec $ns1 iperf3 -s > /dev/null 2>&1 & -iperfs=$! +ip netns exec $ns1 socat -u TCP-LISTEN:5201,fork OPEN:/dev/null,wronly=1 & +socatpid=$! # Restrict source port to just one so we don't have to exhaust # all others. @@ -83,17 +85,43 @@ sleep 1 # ip daddr:dport will be rewritten to 192.168.1.1 5201 # NAT must reallocate source port 10000 because # 192.168.1.2:10000 -> 192.168.1.1:5201 is already in use -echo test | ip netns exec $ns2 socat -t 3 -u STDIN TCP:10.96.0.1:443 >/dev/null +echo test | ip netns exec $ns2 socat -t 3 -u STDIN TCP:10.96.0.1:443,connect-timeout=3 >/dev/null ret=$? -kill $iperfs - # Check socat can connect to 10.96.0.1:443 (aka 192.168.1.1:5201). if [ $ret -eq 0 ]; then echo "PASS: socat can connect via NAT'd address" else echo "FAIL: socat cannot connect via NAT'd address" - exit 1 fi -exit 0 +# check sport clashres. +ip netns exec $ns1 iptables -t nat -A PREROUTING -p tcp --dport 5202 -j REDIRECT --to-ports 5201 +ip netns exec $ns1 iptables -t nat -A PREROUTING -p tcp --dport 5203 -j REDIRECT --to-ports 5201 + +sleep 5 | ip netns exec $ns2 socat -t 5 -u STDIN TCP:192.168.1.1:5202,connect-timeout=5 >/dev/null & +cpid1=$! +sleep 1 + +# if connect succeeds, client closes instantly due to EOF on stdin. +# if connect hangs, it will time out after 5s. +echo | ip netns exec $ns2 socat -t 3 -u STDIN TCP:192.168.1.1:5203,connect-timeout=5 >/dev/null & +cpid2=$! + +time_then=$(date +%s) +wait $cpid2 +rv=$? +time_now=$(date +%s) + +# Check how much time has elapsed, expectation is for +# 'cpid2' to connect and then exit (and no connect delay). +delta=$((time_now - time_then)) + +if [ $delta -lt 2 -a $rv -eq 0 ]; then + echo "PASS: could connect to service via redirected ports" +else + echo "FAIL: socat cannot connect to service via redirect ($delta seconds elapsed, returned $rv)" + ret=1 +fi + +exit $ret |