ipv6: lockless IPV6_MTU_DISCOVER implementation
Most np->pmtudisc reads are racy. Move this 3bit field on a full byte, add annotations and make IPV6_MTU_DISCOVER setsockopt() lockless. Signed-off-by: Eric Dumazet <edumazet@google.com> Reviewed-by: David Ahern <dsahern@kernel.org> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
parent
83cd5eb654
commit
6b724bc430
|
@ -243,13 +243,12 @@ struct ipv6_pinfo {
|
||||||
} rxopt;
|
} rxopt;
|
||||||
|
|
||||||
/* sockopt flags */
|
/* sockopt flags */
|
||||||
__u16 sndflow:1,
|
__u8 sndflow:1,
|
||||||
pmtudisc:3,
|
|
||||||
padding:1, /* 1 bit hole */
|
|
||||||
srcprefs:3; /* 001: prefer temporary address
|
srcprefs:3; /* 001: prefer temporary address
|
||||||
* 010: prefer public address
|
* 010: prefer public address
|
||||||
* 100: prefer care-of address
|
* 100: prefer care-of address
|
||||||
*/
|
*/
|
||||||
|
__u8 pmtudisc;
|
||||||
__u8 min_hopcount;
|
__u8 min_hopcount;
|
||||||
__u8 tclass;
|
__u8 tclass;
|
||||||
__be32 rcv_flowinfo;
|
__be32 rcv_flowinfo;
|
||||||
|
|
|
@ -266,7 +266,7 @@ static inline unsigned int ip6_skb_dst_mtu(const struct sk_buff *skb)
|
||||||
const struct dst_entry *dst = skb_dst(skb);
|
const struct dst_entry *dst = skb_dst(skb);
|
||||||
unsigned int mtu;
|
unsigned int mtu;
|
||||||
|
|
||||||
if (np && np->pmtudisc >= IPV6_PMTUDISC_PROBE) {
|
if (np && READ_ONCE(np->pmtudisc) >= IPV6_PMTUDISC_PROBE) {
|
||||||
mtu = READ_ONCE(dst->dev->mtu);
|
mtu = READ_ONCE(dst->dev->mtu);
|
||||||
mtu -= lwtunnel_headroom(dst->lwtstate, mtu);
|
mtu -= lwtunnel_headroom(dst->lwtstate, mtu);
|
||||||
} else {
|
} else {
|
||||||
|
@ -277,14 +277,18 @@ static inline unsigned int ip6_skb_dst_mtu(const struct sk_buff *skb)
|
||||||
|
|
||||||
static inline bool ip6_sk_accept_pmtu(const struct sock *sk)
|
static inline bool ip6_sk_accept_pmtu(const struct sock *sk)
|
||||||
{
|
{
|
||||||
return inet6_sk(sk)->pmtudisc != IPV6_PMTUDISC_INTERFACE &&
|
u8 pmtudisc = READ_ONCE(inet6_sk(sk)->pmtudisc);
|
||||||
inet6_sk(sk)->pmtudisc != IPV6_PMTUDISC_OMIT;
|
|
||||||
|
return pmtudisc != IPV6_PMTUDISC_INTERFACE &&
|
||||||
|
pmtudisc != IPV6_PMTUDISC_OMIT;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline bool ip6_sk_ignore_df(const struct sock *sk)
|
static inline bool ip6_sk_ignore_df(const struct sock *sk)
|
||||||
{
|
{
|
||||||
return inet6_sk(sk)->pmtudisc < IPV6_PMTUDISC_DO ||
|
u8 pmtudisc = READ_ONCE(inet6_sk(sk)->pmtudisc);
|
||||||
inet6_sk(sk)->pmtudisc == IPV6_PMTUDISC_OMIT;
|
|
||||||
|
return pmtudisc < IPV6_PMTUDISC_DO ||
|
||||||
|
pmtudisc == IPV6_PMTUDISC_OMIT;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline const struct in6_addr *rt6_nexthop(const struct rt6_info *rt,
|
static inline const struct in6_addr *rt6_nexthop(const struct rt6_info *rt,
|
||||||
|
|
|
@ -1436,10 +1436,10 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
|
||||||
v6_cork->hop_limit = ipc6->hlimit;
|
v6_cork->hop_limit = ipc6->hlimit;
|
||||||
v6_cork->tclass = ipc6->tclass;
|
v6_cork->tclass = ipc6->tclass;
|
||||||
if (rt->dst.flags & DST_XFRM_TUNNEL)
|
if (rt->dst.flags & DST_XFRM_TUNNEL)
|
||||||
mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
|
mtu = READ_ONCE(np->pmtudisc) >= IPV6_PMTUDISC_PROBE ?
|
||||||
READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst);
|
READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst);
|
||||||
else
|
else
|
||||||
mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
|
mtu = READ_ONCE(np->pmtudisc) >= IPV6_PMTUDISC_PROBE ?
|
||||||
READ_ONCE(rt->dst.dev->mtu) : dst_mtu(xfrm_dst_path(&rt->dst));
|
READ_ONCE(rt->dst.dev->mtu) : dst_mtu(xfrm_dst_path(&rt->dst));
|
||||||
|
|
||||||
frag_size = READ_ONCE(np->frag_size);
|
frag_size = READ_ONCE(np->frag_size);
|
||||||
|
|
|
@ -493,6 +493,13 @@ int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
inet6_assign_bit(RTALERT_ISOLATE, sk, valbool);
|
inet6_assign_bit(RTALERT_ISOLATE, sk, valbool);
|
||||||
return 0;
|
return 0;
|
||||||
|
case IPV6_MTU_DISCOVER:
|
||||||
|
if (optlen < sizeof(int))
|
||||||
|
return -EINVAL;
|
||||||
|
if (val < IPV6_PMTUDISC_DONT || val > IPV6_PMTUDISC_OMIT)
|
||||||
|
return -EINVAL;
|
||||||
|
WRITE_ONCE(np->pmtudisc, val);
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
if (needs_rtnl)
|
if (needs_rtnl)
|
||||||
rtnl_lock();
|
rtnl_lock();
|
||||||
|
@ -941,14 +948,6 @@ done:
|
||||||
goto e_inval;
|
goto e_inval;
|
||||||
retv = ip6_ra_control(sk, val);
|
retv = ip6_ra_control(sk, val);
|
||||||
break;
|
break;
|
||||||
case IPV6_MTU_DISCOVER:
|
|
||||||
if (optlen < sizeof(int))
|
|
||||||
goto e_inval;
|
|
||||||
if (val < IPV6_PMTUDISC_DONT || val > IPV6_PMTUDISC_OMIT)
|
|
||||||
goto e_inval;
|
|
||||||
np->pmtudisc = val;
|
|
||||||
retv = 0;
|
|
||||||
break;
|
|
||||||
case IPV6_FLOWINFO_SEND:
|
case IPV6_FLOWINFO_SEND:
|
||||||
if (optlen < sizeof(int))
|
if (optlen < sizeof(int))
|
||||||
goto e_inval;
|
goto e_inval;
|
||||||
|
@ -1374,7 +1373,7 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case IPV6_MTU_DISCOVER:
|
case IPV6_MTU_DISCOVER:
|
||||||
val = np->pmtudisc;
|
val = READ_ONCE(np->pmtudisc);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case IPV6_RECVERR:
|
case IPV6_RECVERR:
|
||||||
|
|
|
@ -307,7 +307,7 @@ static void rawv6_err(struct sock *sk, struct sk_buff *skb,
|
||||||
harderr = icmpv6_err_convert(type, code, &err);
|
harderr = icmpv6_err_convert(type, code, &err);
|
||||||
if (type == ICMPV6_PKT_TOOBIG) {
|
if (type == ICMPV6_PKT_TOOBIG) {
|
||||||
ip6_sk_update_pmtu(skb, sk, info);
|
ip6_sk_update_pmtu(skb, sk, info);
|
||||||
harderr = (np->pmtudisc == IPV6_PMTUDISC_DO);
|
harderr = (READ_ONCE(np->pmtudisc) == IPV6_PMTUDISC_DO);
|
||||||
}
|
}
|
||||||
if (type == NDISC_REDIRECT) {
|
if (type == NDISC_REDIRECT) {
|
||||||
ip6_sk_redirect(skb, sk);
|
ip6_sk_redirect(skb, sk);
|
||||||
|
|
|
@ -598,7 +598,7 @@ int __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
|
||||||
if (!ip6_sk_accept_pmtu(sk))
|
if (!ip6_sk_accept_pmtu(sk))
|
||||||
goto out;
|
goto out;
|
||||||
ip6_sk_update_pmtu(skb, sk, info);
|
ip6_sk_update_pmtu(skb, sk, info);
|
||||||
if (np->pmtudisc != IPV6_PMTUDISC_DONT)
|
if (READ_ONCE(np->pmtudisc) != IPV6_PMTUDISC_DONT)
|
||||||
harderr = 1;
|
harderr = 1;
|
||||||
}
|
}
|
||||||
if (type == NDISC_REDIRECT) {
|
if (type == NDISC_REDIRECT) {
|
||||||
|
|
|
@ -1341,7 +1341,7 @@ static void set_mcast_pmtudisc(struct sock *sk, int val)
|
||||||
struct ipv6_pinfo *np = inet6_sk(sk);
|
struct ipv6_pinfo *np = inet6_sk(sk);
|
||||||
|
|
||||||
/* IPV6_MTU_DISCOVER */
|
/* IPV6_MTU_DISCOVER */
|
||||||
np->pmtudisc = val;
|
WRITE_ONCE(np->pmtudisc, val);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
release_sock(sk);
|
release_sock(sk);
|
||||||
|
|
Loading…
Reference in New Issue