net: introduce include/net/rps.h
Move RPS related structures and helpers from include/linux/netdevice.h and include/net/sock.h to a new include file. Signed-off-by: Eric Dumazet <edumazet@google.com> Acked-by: Soheil Hassas Yeganeh <soheil@google.com> Reviewed-by: David Ahern <dsahern@kernel.org> Link: https://lore.kernel.org/r/20240306160031.874438-18-edumazet@google.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
parent
df51b84564
commit
490a79faf9
|
@ -2,6 +2,7 @@
|
|||
/* Copyright (C) 2018-2020, Intel Corporation. */
|
||||
|
||||
#include "ice.h"
|
||||
#include <net/rps.h>
|
||||
|
||||
/**
|
||||
* ice_is_arfs_active - helper to check is aRFS is active
|
||||
|
|
|
@ -42,6 +42,7 @@
|
|||
#include <net/ip.h>
|
||||
#include <net/vxlan.h>
|
||||
#include <net/devlink.h>
|
||||
#include <net/rps.h>
|
||||
|
||||
#include <linux/mlx4/driver.h>
|
||||
#include <linux/mlx4/device.h>
|
||||
|
|
|
@ -34,6 +34,7 @@
|
|||
#include <linux/mlx5/fs.h>
|
||||
#include <linux/ip.h>
|
||||
#include <linux/ipv6.h>
|
||||
#include <net/rps.h>
|
||||
#include "en.h"
|
||||
|
||||
#define ARFS_HASH_SHIFT BITS_PER_BYTE
|
||||
|
|
|
@ -11,6 +11,7 @@
|
|||
#include "net_driver.h"
|
||||
#include <linux/module.h>
|
||||
#include <linux/iommu.h>
|
||||
#include <net/rps.h>
|
||||
#include "efx.h"
|
||||
#include "nic.h"
|
||||
#include "rx_common.h"
|
||||
|
|
|
@ -11,6 +11,7 @@
|
|||
#include "net_driver.h"
|
||||
#include <linux/module.h>
|
||||
#include <linux/iommu.h>
|
||||
#include <net/rps.h>
|
||||
#include "efx.h"
|
||||
#include "nic.h"
|
||||
#include "rx_common.h"
|
||||
|
|
|
@ -78,6 +78,7 @@
|
|||
#include <net/ax25.h>
|
||||
#include <net/rose.h>
|
||||
#include <net/6lowpan.h>
|
||||
#include <net/rps.h>
|
||||
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/proc_fs.h>
|
||||
|
|
|
@ -225,12 +225,6 @@ struct net_device_core_stats {
|
|||
#include <linux/cache.h>
|
||||
#include <linux/skbuff.h>
|
||||
|
||||
#ifdef CONFIG_RPS
|
||||
#include <linux/static_key.h>
|
||||
extern struct static_key_false rps_needed;
|
||||
extern struct static_key_false rfs_needed;
|
||||
#endif
|
||||
|
||||
struct neighbour;
|
||||
struct neigh_parms;
|
||||
struct sk_buff;
|
||||
|
@ -730,86 +724,10 @@ static inline void netdev_queue_numa_node_write(struct netdev_queue *q, int node
|
|||
#endif
|
||||
}
|
||||
|
||||
#ifdef CONFIG_RPS
|
||||
/*
|
||||
* This structure holds an RPS map which can be of variable length. The
|
||||
* map is an array of CPUs.
|
||||
*/
|
||||
struct rps_map {
|
||||
unsigned int len;
|
||||
struct rcu_head rcu;
|
||||
u16 cpus[];
|
||||
};
|
||||
#define RPS_MAP_SIZE(_num) (sizeof(struct rps_map) + ((_num) * sizeof(u16)))
|
||||
|
||||
/*
|
||||
* The rps_dev_flow structure contains the mapping of a flow to a CPU, the
|
||||
* tail pointer for that CPU's input queue at the time of last enqueue, and
|
||||
* a hardware filter index.
|
||||
*/
|
||||
struct rps_dev_flow {
|
||||
u16 cpu;
|
||||
u16 filter;
|
||||
unsigned int last_qtail;
|
||||
};
|
||||
#define RPS_NO_FILTER 0xffff
|
||||
|
||||
/*
|
||||
* The rps_dev_flow_table structure contains a table of flow mappings.
|
||||
*/
|
||||
struct rps_dev_flow_table {
|
||||
unsigned int mask;
|
||||
struct rcu_head rcu;
|
||||
struct rps_dev_flow flows[];
|
||||
};
|
||||
#define RPS_DEV_FLOW_TABLE_SIZE(_num) (sizeof(struct rps_dev_flow_table) + \
|
||||
((_num) * sizeof(struct rps_dev_flow)))
|
||||
|
||||
/*
|
||||
* The rps_sock_flow_table contains mappings of flows to the last CPU
|
||||
* on which they were processed by the application (set in recvmsg).
|
||||
* Each entry is a 32bit value. Upper part is the high-order bits
|
||||
* of flow hash, lower part is CPU number.
|
||||
* rps_cpu_mask is used to partition the space, depending on number of
|
||||
* possible CPUs : rps_cpu_mask = roundup_pow_of_two(nr_cpu_ids) - 1
|
||||
* For example, if 64 CPUs are possible, rps_cpu_mask = 0x3f,
|
||||
* meaning we use 32-6=26 bits for the hash.
|
||||
*/
|
||||
struct rps_sock_flow_table {
|
||||
u32 mask;
|
||||
|
||||
u32 ents[] ____cacheline_aligned_in_smp;
|
||||
};
|
||||
#define RPS_SOCK_FLOW_TABLE_SIZE(_num) (offsetof(struct rps_sock_flow_table, ents[_num]))
|
||||
|
||||
#define RPS_NO_CPU 0xffff
|
||||
|
||||
extern u32 rps_cpu_mask;
|
||||
extern struct rps_sock_flow_table __rcu *rps_sock_flow_table;
|
||||
|
||||
static inline void rps_record_sock_flow(struct rps_sock_flow_table *table,
|
||||
u32 hash)
|
||||
{
|
||||
if (table && hash) {
|
||||
unsigned int index = hash & table->mask;
|
||||
u32 val = hash & ~rps_cpu_mask;
|
||||
|
||||
/* We only give a hint, preemption can change CPU under us */
|
||||
val |= raw_smp_processor_id();
|
||||
|
||||
/* The following WRITE_ONCE() is paired with the READ_ONCE()
|
||||
* here, and another one in get_rps_cpu().
|
||||
*/
|
||||
if (READ_ONCE(table->ents[index]) != val)
|
||||
WRITE_ONCE(table->ents[index], val);
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef CONFIG_RFS_ACCEL
|
||||
bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index, u32 flow_id,
|
||||
u16 filter_id);
|
||||
#endif
|
||||
#endif /* CONFIG_RPS */
|
||||
|
||||
/* XPS map type and offset of the xps map within net_device->xps_maps[]. */
|
||||
enum xps_map_type {
|
||||
|
|
|
@ -0,0 +1,127 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0-or-later */
|
||||
#ifndef _NET_RPS_H
|
||||
#define _NET_RPS_H
|
||||
|
||||
#include <linux/types.h>
|
||||
#include <linux/static_key.h>
|
||||
#include <net/sock.h>
|
||||
|
||||
#ifdef CONFIG_RPS
|
||||
|
||||
extern struct static_key_false rps_needed;
|
||||
extern struct static_key_false rfs_needed;
|
||||
|
||||
/*
|
||||
* This structure holds an RPS map which can be of variable length. The
|
||||
* map is an array of CPUs.
|
||||
*/
|
||||
struct rps_map {
|
||||
unsigned int len;
|
||||
struct rcu_head rcu;
|
||||
u16 cpus[];
|
||||
};
|
||||
#define RPS_MAP_SIZE(_num) (sizeof(struct rps_map) + ((_num) * sizeof(u16)))
|
||||
|
||||
/*
|
||||
* The rps_dev_flow structure contains the mapping of a flow to a CPU, the
|
||||
* tail pointer for that CPU's input queue at the time of last enqueue, and
|
||||
* a hardware filter index.
|
||||
*/
|
||||
struct rps_dev_flow {
|
||||
u16 cpu;
|
||||
u16 filter;
|
||||
unsigned int last_qtail;
|
||||
};
|
||||
#define RPS_NO_FILTER 0xffff
|
||||
|
||||
/*
|
||||
* The rps_dev_flow_table structure contains a table of flow mappings.
|
||||
*/
|
||||
struct rps_dev_flow_table {
|
||||
unsigned int mask;
|
||||
struct rcu_head rcu;
|
||||
struct rps_dev_flow flows[];
|
||||
};
|
||||
#define RPS_DEV_FLOW_TABLE_SIZE(_num) (sizeof(struct rps_dev_flow_table) + \
|
||||
((_num) * sizeof(struct rps_dev_flow)))
|
||||
|
||||
/*
|
||||
* The rps_sock_flow_table contains mappings of flows to the last CPU
|
||||
* on which they were processed by the application (set in recvmsg).
|
||||
* Each entry is a 32bit value. Upper part is the high-order bits
|
||||
* of flow hash, lower part is CPU number.
|
||||
* rps_cpu_mask is used to partition the space, depending on number of
|
||||
* possible CPUs : rps_cpu_mask = roundup_pow_of_two(nr_cpu_ids) - 1
|
||||
* For example, if 64 CPUs are possible, rps_cpu_mask = 0x3f,
|
||||
* meaning we use 32-6=26 bits for the hash.
|
||||
*/
|
||||
struct rps_sock_flow_table {
|
||||
u32 mask;
|
||||
|
||||
u32 ents[] ____cacheline_aligned_in_smp;
|
||||
};
|
||||
#define RPS_SOCK_FLOW_TABLE_SIZE(_num) (offsetof(struct rps_sock_flow_table, ents[_num]))
|
||||
|
||||
#define RPS_NO_CPU 0xffff
|
||||
|
||||
extern u32 rps_cpu_mask;
|
||||
extern struct rps_sock_flow_table __rcu *rps_sock_flow_table;
|
||||
|
||||
static inline void rps_record_sock_flow(struct rps_sock_flow_table *table,
|
||||
u32 hash)
|
||||
{
|
||||
unsigned int index = hash & table->mask;
|
||||
u32 val = hash & ~rps_cpu_mask;
|
||||
|
||||
/* We only give a hint, preemption can change CPU under us */
|
||||
val |= raw_smp_processor_id();
|
||||
|
||||
/* The following WRITE_ONCE() is paired with the READ_ONCE()
|
||||
* here, and another one in get_rps_cpu().
|
||||
*/
|
||||
if (READ_ONCE(table->ents[index]) != val)
|
||||
WRITE_ONCE(table->ents[index], val);
|
||||
}
|
||||
|
||||
#endif /* CONFIG_RPS */
|
||||
|
||||
static inline void sock_rps_record_flow_hash(__u32 hash)
|
||||
{
|
||||
#ifdef CONFIG_RPS
|
||||
struct rps_sock_flow_table *sock_flow_table;
|
||||
|
||||
if (!hash)
|
||||
return;
|
||||
rcu_read_lock();
|
||||
sock_flow_table = rcu_dereference(rps_sock_flow_table);
|
||||
if (sock_flow_table)
|
||||
rps_record_sock_flow(sock_flow_table, hash);
|
||||
rcu_read_unlock();
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void sock_rps_record_flow(const struct sock *sk)
|
||||
{
|
||||
#ifdef CONFIG_RPS
|
||||
if (static_branch_unlikely(&rfs_needed)) {
|
||||
/* Reading sk->sk_rxhash might incur an expensive cache line
|
||||
* miss.
|
||||
*
|
||||
* TCP_ESTABLISHED does cover almost all states where RFS
|
||||
* might be useful, and is cheaper [1] than testing :
|
||||
* IPv4: inet_sk(sk)->inet_daddr
|
||||
* IPv6: ipv6_addr_any(&sk->sk_v6_daddr)
|
||||
* OR an additional socket flag
|
||||
* [1] : sk_state and sk_prot are in the same cache line.
|
||||
*/
|
||||
if (sk->sk_state == TCP_ESTABLISHED) {
|
||||
/* This READ_ONCE() is paired with the WRITE_ONCE()
|
||||
* from sock_rps_save_rxhash() and sock_rps_reset_rxhash().
|
||||
*/
|
||||
sock_rps_record_flow_hash(READ_ONCE(sk->sk_rxhash));
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif /* _NET_RPS_H */
|
|
@ -1117,41 +1117,6 @@ static inline void sk_incoming_cpu_update(struct sock *sk)
|
|||
WRITE_ONCE(sk->sk_incoming_cpu, cpu);
|
||||
}
|
||||
|
||||
static inline void sock_rps_record_flow_hash(__u32 hash)
|
||||
{
|
||||
#ifdef CONFIG_RPS
|
||||
struct rps_sock_flow_table *sock_flow_table;
|
||||
|
||||
rcu_read_lock();
|
||||
sock_flow_table = rcu_dereference(rps_sock_flow_table);
|
||||
rps_record_sock_flow(sock_flow_table, hash);
|
||||
rcu_read_unlock();
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void sock_rps_record_flow(const struct sock *sk)
|
||||
{
|
||||
#ifdef CONFIG_RPS
|
||||
if (static_branch_unlikely(&rfs_needed)) {
|
||||
/* Reading sk->sk_rxhash might incur an expensive cache line
|
||||
* miss.
|
||||
*
|
||||
* TCP_ESTABLISHED does cover almost all states where RFS
|
||||
* might be useful, and is cheaper [1] than testing :
|
||||
* IPv4: inet_sk(sk)->inet_daddr
|
||||
* IPv6: ipv6_addr_any(&sk->sk_v6_daddr)
|
||||
* OR an additional socket flag
|
||||
* [1] : sk_state and sk_prot are in the same cache line.
|
||||
*/
|
||||
if (sk->sk_state == TCP_ESTABLISHED) {
|
||||
/* This READ_ONCE() is paired with the WRITE_ONCE()
|
||||
* from sock_rps_save_rxhash() and sock_rps_reset_rxhash().
|
||||
*/
|
||||
sock_rps_record_flow_hash(READ_ONCE(sk->sk_rxhash));
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void sock_rps_save_rxhash(struct sock *sk,
|
||||
const struct sk_buff *skb)
|
||||
|
|
|
@ -155,6 +155,7 @@
|
|||
#include <net/netdev_rx_queue.h>
|
||||
#include <net/page_pool/types.h>
|
||||
#include <net/page_pool/helpers.h>
|
||||
#include <net/rps.h>
|
||||
|
||||
#include "dev.h"
|
||||
#include "net-sysfs.h"
|
||||
|
|
|
@ -24,6 +24,7 @@
|
|||
#include <linux/of_net.h>
|
||||
#include <linux/cpu.h>
|
||||
#include <net/netdev_rx_queue.h>
|
||||
#include <net/rps.h>
|
||||
|
||||
#include "dev.h"
|
||||
#include "net-sysfs.h"
|
||||
|
|
|
@ -24,6 +24,7 @@
|
|||
#include <net/busy_poll.h>
|
||||
#include <net/pkt_sched.h>
|
||||
#include <net/hotdata.h>
|
||||
#include <net/rps.h>
|
||||
|
||||
#include "dev.h"
|
||||
|
||||
|
|
|
@ -119,6 +119,7 @@
|
|||
#endif
|
||||
#include <net/l3mdev.h>
|
||||
#include <net/compat.h>
|
||||
#include <net/rps.h>
|
||||
|
||||
#include <trace/events/sock.h>
|
||||
|
||||
|
|
|
@ -279,6 +279,7 @@
|
|||
#include <linux/uaccess.h>
|
||||
#include <asm/ioctls.h>
|
||||
#include <net/busy_poll.h>
|
||||
#include <net/rps.h>
|
||||
|
||||
/* Track pending CMSGs. */
|
||||
enum {
|
||||
|
|
|
@ -64,6 +64,7 @@
|
|||
#include <net/xfrm.h>
|
||||
#include <net/ioam6.h>
|
||||
#include <net/rawv6.h>
|
||||
#include <net/rps.h>
|
||||
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/mroute6.h>
|
||||
|
|
|
@ -67,6 +67,7 @@
|
|||
#include <net/sctp/sctp.h>
|
||||
#include <net/sctp/sm.h>
|
||||
#include <net/sctp/stream_sched.h>
|
||||
#include <net/rps.h>
|
||||
|
||||
/* Forward declarations for internal helper functions. */
|
||||
static bool sctp_writeable(const struct sock *sk);
|
||||
|
|
Loading…
Reference in New Issue