ipv6: lockless IPV6_MTU_DISCOVER implementation

Most np->pmtudisc reads are racy.

Move this 3bit field on a full byte, add annotations
and make IPV6_MTU_DISCOVER setsockopt() lockless.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
Eric Dumazet 2023-09-12 16:02:11 +00:00 committed by David S. Miller
parent 83cd5eb654
commit 6b724bc430
7 changed files with 24 additions and 22 deletions

View File

@ -243,13 +243,12 @@ struct ipv6_pinfo {
} rxopt; } rxopt;
/* sockopt flags */ /* sockopt flags */
__u16 sndflow:1, __u8 sndflow:1,
pmtudisc:3,
padding:1, /* 1 bit hole */
srcprefs:3; /* 001: prefer temporary address srcprefs:3; /* 001: prefer temporary address
* 010: prefer public address * 010: prefer public address
* 100: prefer care-of address * 100: prefer care-of address
*/ */
__u8 pmtudisc;
__u8 min_hopcount; __u8 min_hopcount;
__u8 tclass; __u8 tclass;
__be32 rcv_flowinfo; __be32 rcv_flowinfo;

View File

@ -266,7 +266,7 @@ static inline unsigned int ip6_skb_dst_mtu(const struct sk_buff *skb)
const struct dst_entry *dst = skb_dst(skb); const struct dst_entry *dst = skb_dst(skb);
unsigned int mtu; unsigned int mtu;
if (np && np->pmtudisc >= IPV6_PMTUDISC_PROBE) { if (np && READ_ONCE(np->pmtudisc) >= IPV6_PMTUDISC_PROBE) {
mtu = READ_ONCE(dst->dev->mtu); mtu = READ_ONCE(dst->dev->mtu);
mtu -= lwtunnel_headroom(dst->lwtstate, mtu); mtu -= lwtunnel_headroom(dst->lwtstate, mtu);
} else { } else {
@ -277,14 +277,18 @@ static inline unsigned int ip6_skb_dst_mtu(const struct sk_buff *skb)
static inline bool ip6_sk_accept_pmtu(const struct sock *sk) static inline bool ip6_sk_accept_pmtu(const struct sock *sk)
{ {
return inet6_sk(sk)->pmtudisc != IPV6_PMTUDISC_INTERFACE && u8 pmtudisc = READ_ONCE(inet6_sk(sk)->pmtudisc);
inet6_sk(sk)->pmtudisc != IPV6_PMTUDISC_OMIT;
return pmtudisc != IPV6_PMTUDISC_INTERFACE &&
pmtudisc != IPV6_PMTUDISC_OMIT;
} }
static inline bool ip6_sk_ignore_df(const struct sock *sk) static inline bool ip6_sk_ignore_df(const struct sock *sk)
{ {
return inet6_sk(sk)->pmtudisc < IPV6_PMTUDISC_DO || u8 pmtudisc = READ_ONCE(inet6_sk(sk)->pmtudisc);
inet6_sk(sk)->pmtudisc == IPV6_PMTUDISC_OMIT;
return pmtudisc < IPV6_PMTUDISC_DO ||
pmtudisc == IPV6_PMTUDISC_OMIT;
} }
static inline const struct in6_addr *rt6_nexthop(const struct rt6_info *rt, static inline const struct in6_addr *rt6_nexthop(const struct rt6_info *rt,

View File

@ -1436,10 +1436,10 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
v6_cork->hop_limit = ipc6->hlimit; v6_cork->hop_limit = ipc6->hlimit;
v6_cork->tclass = ipc6->tclass; v6_cork->tclass = ipc6->tclass;
if (rt->dst.flags & DST_XFRM_TUNNEL) if (rt->dst.flags & DST_XFRM_TUNNEL)
mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ? mtu = READ_ONCE(np->pmtudisc) >= IPV6_PMTUDISC_PROBE ?
READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst); READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst);
else else
mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ? mtu = READ_ONCE(np->pmtudisc) >= IPV6_PMTUDISC_PROBE ?
READ_ONCE(rt->dst.dev->mtu) : dst_mtu(xfrm_dst_path(&rt->dst)); READ_ONCE(rt->dst.dev->mtu) : dst_mtu(xfrm_dst_path(&rt->dst));
frag_size = READ_ONCE(np->frag_size); frag_size = READ_ONCE(np->frag_size);

View File

@ -493,6 +493,13 @@ int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
return -EINVAL; return -EINVAL;
inet6_assign_bit(RTALERT_ISOLATE, sk, valbool); inet6_assign_bit(RTALERT_ISOLATE, sk, valbool);
return 0; return 0;
case IPV6_MTU_DISCOVER:
if (optlen < sizeof(int))
return -EINVAL;
if (val < IPV6_PMTUDISC_DONT || val > IPV6_PMTUDISC_OMIT)
return -EINVAL;
WRITE_ONCE(np->pmtudisc, val);
return 0;
} }
if (needs_rtnl) if (needs_rtnl)
rtnl_lock(); rtnl_lock();
@ -941,14 +948,6 @@ done:
goto e_inval; goto e_inval;
retv = ip6_ra_control(sk, val); retv = ip6_ra_control(sk, val);
break; break;
case IPV6_MTU_DISCOVER:
if (optlen < sizeof(int))
goto e_inval;
if (val < IPV6_PMTUDISC_DONT || val > IPV6_PMTUDISC_OMIT)
goto e_inval;
np->pmtudisc = val;
retv = 0;
break;
case IPV6_FLOWINFO_SEND: case IPV6_FLOWINFO_SEND:
if (optlen < sizeof(int)) if (optlen < sizeof(int))
goto e_inval; goto e_inval;
@ -1374,7 +1373,7 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
break; break;
case IPV6_MTU_DISCOVER: case IPV6_MTU_DISCOVER:
val = np->pmtudisc; val = READ_ONCE(np->pmtudisc);
break; break;
case IPV6_RECVERR: case IPV6_RECVERR:

View File

@ -307,7 +307,7 @@ static void rawv6_err(struct sock *sk, struct sk_buff *skb,
harderr = icmpv6_err_convert(type, code, &err); harderr = icmpv6_err_convert(type, code, &err);
if (type == ICMPV6_PKT_TOOBIG) { if (type == ICMPV6_PKT_TOOBIG) {
ip6_sk_update_pmtu(skb, sk, info); ip6_sk_update_pmtu(skb, sk, info);
harderr = (np->pmtudisc == IPV6_PMTUDISC_DO); harderr = (READ_ONCE(np->pmtudisc) == IPV6_PMTUDISC_DO);
} }
if (type == NDISC_REDIRECT) { if (type == NDISC_REDIRECT) {
ip6_sk_redirect(skb, sk); ip6_sk_redirect(skb, sk);

View File

@ -598,7 +598,7 @@ int __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
if (!ip6_sk_accept_pmtu(sk)) if (!ip6_sk_accept_pmtu(sk))
goto out; goto out;
ip6_sk_update_pmtu(skb, sk, info); ip6_sk_update_pmtu(skb, sk, info);
if (np->pmtudisc != IPV6_PMTUDISC_DONT) if (READ_ONCE(np->pmtudisc) != IPV6_PMTUDISC_DONT)
harderr = 1; harderr = 1;
} }
if (type == NDISC_REDIRECT) { if (type == NDISC_REDIRECT) {

View File

@ -1341,7 +1341,7 @@ static void set_mcast_pmtudisc(struct sock *sk, int val)
struct ipv6_pinfo *np = inet6_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk);
/* IPV6_MTU_DISCOVER */ /* IPV6_MTU_DISCOVER */
np->pmtudisc = val; WRITE_ONCE(np->pmtudisc, val);
} }
#endif #endif
release_sock(sk); release_sock(sk);