From 6b724bc4300b431443f3b99520994a5aece347cd Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 12 Sep 2023 16:02:11 +0000 Subject: [PATCH] ipv6: lockless IPV6_MTU_DISCOVER implementation Most np->pmtudisc reads are racy. Move this 3bit field on a full byte, add annotations and make IPV6_MTU_DISCOVER setsockopt() lockless. Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Signed-off-by: David S. Miller --- include/linux/ipv6.h | 5 ++--- include/net/ip6_route.h | 14 +++++++++----- net/ipv6/ip6_output.c | 4 ++-- net/ipv6/ipv6_sockglue.c | 17 ++++++++--------- net/ipv6/raw.c | 2 +- net/ipv6/udp.c | 2 +- net/netfilter/ipvs/ip_vs_sync.c | 2 +- 7 files changed, 24 insertions(+), 22 deletions(-) diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index f288a35f157f..10f521a6a9c8 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -243,13 +243,12 @@ struct ipv6_pinfo { } rxopt; /* sockopt flags */ - __u16 sndflow:1, - pmtudisc:3, - padding:1, /* 1 bit hole */ + __u8 sndflow:1, srcprefs:3; /* 001: prefer temporary address * 010: prefer public address * 100: prefer care-of address */ + __u8 pmtudisc; __u8 min_hopcount; __u8 tclass; __be32 rcv_flowinfo; diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index b32539bb0fb0..b1ea49900b4a 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -266,7 +266,7 @@ static inline unsigned int ip6_skb_dst_mtu(const struct sk_buff *skb) const struct dst_entry *dst = skb_dst(skb); unsigned int mtu; - if (np && np->pmtudisc >= IPV6_PMTUDISC_PROBE) { + if (np && READ_ONCE(np->pmtudisc) >= IPV6_PMTUDISC_PROBE) { mtu = READ_ONCE(dst->dev->mtu); mtu -= lwtunnel_headroom(dst->lwtstate, mtu); } else { @@ -277,14 +277,18 @@ static inline unsigned int ip6_skb_dst_mtu(const struct sk_buff *skb) static inline bool ip6_sk_accept_pmtu(const struct sock *sk) { - return inet6_sk(sk)->pmtudisc != IPV6_PMTUDISC_INTERFACE && - inet6_sk(sk)->pmtudisc != IPV6_PMTUDISC_OMIT; + u8 pmtudisc = READ_ONCE(inet6_sk(sk)->pmtudisc); + + return pmtudisc != IPV6_PMTUDISC_INTERFACE && + pmtudisc != IPV6_PMTUDISC_OMIT; } static inline bool ip6_sk_ignore_df(const struct sock *sk) { - return inet6_sk(sk)->pmtudisc < IPV6_PMTUDISC_DO || - inet6_sk(sk)->pmtudisc == IPV6_PMTUDISC_OMIT; + u8 pmtudisc = READ_ONCE(inet6_sk(sk)->pmtudisc); + + return pmtudisc < IPV6_PMTUDISC_DO || + pmtudisc == IPV6_PMTUDISC_OMIT; } static inline const struct in6_addr *rt6_nexthop(const struct rt6_info *rt, diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index f87d8491d7e2..7e5d9eeb990f 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1436,10 +1436,10 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork, v6_cork->hop_limit = ipc6->hlimit; v6_cork->tclass = ipc6->tclass; if (rt->dst.flags & DST_XFRM_TUNNEL) - mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ? + mtu = READ_ONCE(np->pmtudisc) >= IPV6_PMTUDISC_PROBE ? READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst); else - mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ? + mtu = READ_ONCE(np->pmtudisc) >= IPV6_PMTUDISC_PROBE ? READ_ONCE(rt->dst.dev->mtu) : dst_mtu(xfrm_dst_path(&rt->dst)); frag_size = READ_ONCE(np->frag_size); diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index c22a492e0536..85ea42644dcb 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -493,6 +493,13 @@ int do_ipv6_setsockopt(struct sock *sk, int level, int optname, return -EINVAL; inet6_assign_bit(RTALERT_ISOLATE, sk, valbool); return 0; + case IPV6_MTU_DISCOVER: + if (optlen < sizeof(int)) + return -EINVAL; + if (val < IPV6_PMTUDISC_DONT || val > IPV6_PMTUDISC_OMIT) + return -EINVAL; + WRITE_ONCE(np->pmtudisc, val); + return 0; } if (needs_rtnl) rtnl_lock(); @@ -941,14 +948,6 @@ done: goto e_inval; retv = ip6_ra_control(sk, val); break; - case IPV6_MTU_DISCOVER: - if (optlen < sizeof(int)) - goto e_inval; - if (val < IPV6_PMTUDISC_DONT || val > IPV6_PMTUDISC_OMIT) - goto e_inval; - np->pmtudisc = val; - retv = 0; - break; case IPV6_FLOWINFO_SEND: if (optlen < sizeof(int)) goto e_inval; @@ -1374,7 +1373,7 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname, break; case IPV6_MTU_DISCOVER: - val = np->pmtudisc; + val = READ_ONCE(np->pmtudisc); break; case IPV6_RECVERR: diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 71f6bdccfa1f..47372cceb98f 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -307,7 +307,7 @@ static void rawv6_err(struct sock *sk, struct sk_buff *skb, harderr = icmpv6_err_convert(type, code, &err); if (type == ICMPV6_PKT_TOOBIG) { ip6_sk_update_pmtu(skb, sk, info); - harderr = (np->pmtudisc == IPV6_PMTUDISC_DO); + harderr = (READ_ONCE(np->pmtudisc) == IPV6_PMTUDISC_DO); } if (type == NDISC_REDIRECT) { ip6_sk_redirect(skb, sk); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 90e873689b88..c17e19fece1b 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -598,7 +598,7 @@ int __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (!ip6_sk_accept_pmtu(sk)) goto out; ip6_sk_update_pmtu(skb, sk, info); - if (np->pmtudisc != IPV6_PMTUDISC_DONT) + if (READ_ONCE(np->pmtudisc) != IPV6_PMTUDISC_DONT) harderr = 1; } if (type == NDISC_REDIRECT) { diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index df1b33b61059..5820a8156c47 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -1341,7 +1341,7 @@ static void set_mcast_pmtudisc(struct sock *sk, int val) struct ipv6_pinfo *np = inet6_sk(sk); /* IPV6_MTU_DISCOVER */ - np->pmtudisc = val; + WRITE_ONCE(np->pmtudisc, val); } #endif release_sock(sk);