| 1 | /* SPDX-License-Identifier: GPL-2.0-or-later */ | 
|---|
| 2 | #ifndef _NET_RPS_H | 
|---|
| 3 | #define _NET_RPS_H | 
|---|
| 4 |  | 
|---|
| 5 | #include <linux/types.h> | 
|---|
| 6 | #include <linux/static_key.h> | 
|---|
| 7 | #include <net/sock.h> | 
|---|
| 8 | #include <net/hotdata.h> | 
|---|
| 9 |  | 
|---|
| 10 | #ifdef CONFIG_RPS | 
|---|
| 11 |  | 
|---|
| 12 | extern struct static_key_false rps_needed; | 
|---|
| 13 | extern struct static_key_false rfs_needed; | 
|---|
| 14 |  | 
|---|
| 15 | /* | 
|---|
| 16 | * This structure holds an RPS map which can be of variable length.  The | 
|---|
| 17 | * map is an array of CPUs. | 
|---|
| 18 | */ | 
|---|
| 19 | struct rps_map { | 
|---|
| 20 | unsigned int	len; | 
|---|
| 21 | struct rcu_head	rcu; | 
|---|
| 22 | u16		cpus[]; | 
|---|
| 23 | }; | 
|---|
| 24 | #define RPS_MAP_SIZE(_num) (sizeof(struct rps_map) + ((_num) * sizeof(u16))) | 
|---|
| 25 |  | 
|---|
| 26 | /* | 
|---|
| 27 | * The rps_dev_flow structure contains the mapping of a flow to a CPU, the | 
|---|
| 28 | * tail pointer for that CPU's input queue at the time of last enqueue, a | 
|---|
| 29 | * hardware filter index, and the hash of the flow if aRFS is enabled. | 
|---|
| 30 | */ | 
|---|
| 31 | struct rps_dev_flow { | 
|---|
| 32 | u16		cpu; | 
|---|
| 33 | u16		filter; | 
|---|
| 34 | unsigned int	last_qtail; | 
|---|
| 35 | #ifdef CONFIG_RFS_ACCEL | 
|---|
| 36 | u32		hash; | 
|---|
| 37 | #endif | 
|---|
| 38 | }; | 
|---|
| 39 | #define RPS_NO_FILTER 0xffff | 
|---|
| 40 |  | 
|---|
| 41 | /* | 
|---|
| 42 | * The rps_dev_flow_table structure contains a table of flow mappings. | 
|---|
| 43 | */ | 
|---|
| 44 | struct rps_dev_flow_table { | 
|---|
| 45 | u8			log; | 
|---|
| 46 | struct rcu_head		rcu; | 
|---|
| 47 | struct rps_dev_flow	flows[]; | 
|---|
| 48 | }; | 
|---|
| 49 | #define RPS_DEV_FLOW_TABLE_SIZE(_num) (sizeof(struct rps_dev_flow_table) + \ | 
|---|
| 50 | ((_num) * sizeof(struct rps_dev_flow))) | 
|---|
| 51 |  | 
|---|
| 52 | /* | 
|---|
| 53 | * The rps_sock_flow_table contains mappings of flows to the last CPU | 
|---|
| 54 | * on which they were processed by the application (set in recvmsg). | 
|---|
| 55 | * Each entry is a 32bit value. Upper part is the high-order bits | 
|---|
| 56 | * of flow hash, lower part is CPU number. | 
|---|
| 57 | * rps_cpu_mask is used to partition the space, depending on number of | 
|---|
| 58 | * possible CPUs : rps_cpu_mask = roundup_pow_of_two(nr_cpu_ids) - 1 | 
|---|
| 59 | * For example, if 64 CPUs are possible, rps_cpu_mask = 0x3f, | 
|---|
| 60 | * meaning we use 32-6=26 bits for the hash. | 
|---|
| 61 | */ | 
|---|
| 62 | struct rps_sock_flow_table { | 
|---|
| 63 | struct rcu_head	rcu; | 
|---|
| 64 | u32		mask; | 
|---|
| 65 |  | 
|---|
| 66 | u32		ents[] ____cacheline_aligned_in_smp; | 
|---|
| 67 | }; | 
|---|
| 68 | #define	RPS_SOCK_FLOW_TABLE_SIZE(_num) (offsetof(struct rps_sock_flow_table, ents[_num])) | 
|---|
| 69 |  | 
|---|
| 70 | #define RPS_NO_CPU 0xffff | 
|---|
| 71 |  | 
|---|
| 72 | static inline void rps_record_sock_flow(struct rps_sock_flow_table *table, | 
|---|
| 73 | u32 hash) | 
|---|
| 74 | { | 
|---|
| 75 | unsigned int index = hash & table->mask; | 
|---|
| 76 | u32 val = hash & ~net_hotdata.rps_cpu_mask; | 
|---|
| 77 |  | 
|---|
| 78 | /* We only give a hint, preemption can change CPU under us */ | 
|---|
| 79 | val |= raw_smp_processor_id(); | 
|---|
| 80 |  | 
|---|
| 81 | /* The following WRITE_ONCE() is paired with the READ_ONCE() | 
|---|
| 82 | * here, and another one in get_rps_cpu(). | 
|---|
| 83 | */ | 
|---|
| 84 | if (READ_ONCE(table->ents[index]) != val) | 
|---|
| 85 | WRITE_ONCE(table->ents[index], val); | 
|---|
| 86 | } | 
|---|
| 87 |  | 
|---|
| 88 | static inline void _sock_rps_record_flow_hash(__u32 hash) | 
|---|
| 89 | { | 
|---|
| 90 | struct rps_sock_flow_table *sock_flow_table; | 
|---|
| 91 |  | 
|---|
| 92 | if (!hash) | 
|---|
| 93 | return; | 
|---|
| 94 | rcu_read_lock(); | 
|---|
| 95 | sock_flow_table = rcu_dereference(net_hotdata.rps_sock_flow_table); | 
|---|
| 96 | if (sock_flow_table) | 
|---|
| 97 | rps_record_sock_flow(table: sock_flow_table, hash); | 
|---|
| 98 | rcu_read_unlock(); | 
|---|
| 99 | } | 
|---|
| 100 |  | 
|---|
| 101 | static inline void _sock_rps_record_flow(const struct sock *sk) | 
|---|
| 102 | { | 
|---|
| 103 | /* Reading sk->sk_rxhash might incur an expensive cache line | 
|---|
| 104 | * miss. | 
|---|
| 105 | * | 
|---|
| 106 | * TCP_ESTABLISHED does cover almost all states where RFS | 
|---|
| 107 | * might be useful, and is cheaper [1] than testing : | 
|---|
| 108 | *	IPv4: inet_sk(sk)->inet_daddr | 
|---|
| 109 | *	IPv6: ipv6_addr_any(&sk->sk_v6_daddr) | 
|---|
| 110 | * OR	an additional socket flag | 
|---|
| 111 | * [1] : sk_state and sk_prot are in the same cache line. | 
|---|
| 112 | */ | 
|---|
| 113 | if (sk->sk_state == TCP_ESTABLISHED) { | 
|---|
| 114 | /* This READ_ONCE() is paired with the WRITE_ONCE() | 
|---|
| 115 | * from sock_rps_save_rxhash() and sock_rps_reset_rxhash(). | 
|---|
| 116 | */ | 
|---|
| 117 | _sock_rps_record_flow_hash(READ_ONCE(sk->sk_rxhash)); | 
|---|
| 118 | } | 
|---|
| 119 | } | 
|---|
| 120 |  | 
|---|
| 121 | static inline void _sock_rps_delete_flow(const struct sock *sk) | 
|---|
| 122 | { | 
|---|
| 123 | struct rps_sock_flow_table *table; | 
|---|
| 124 | u32 hash, index; | 
|---|
| 125 |  | 
|---|
| 126 | hash = READ_ONCE(sk->sk_rxhash); | 
|---|
| 127 | if (!hash) | 
|---|
| 128 | return; | 
|---|
| 129 |  | 
|---|
| 130 | rcu_read_lock(); | 
|---|
| 131 | table = rcu_dereference(net_hotdata.rps_sock_flow_table); | 
|---|
| 132 | if (table) { | 
|---|
| 133 | index = hash & table->mask; | 
|---|
| 134 | if (READ_ONCE(table->ents[index]) != RPS_NO_CPU) | 
|---|
| 135 | WRITE_ONCE(table->ents[index], RPS_NO_CPU); | 
|---|
| 136 | } | 
|---|
| 137 | rcu_read_unlock(); | 
|---|
| 138 | } | 
|---|
| 139 | #endif /* CONFIG_RPS */ | 
|---|
| 140 |  | 
|---|
| 141 | static inline bool rfs_is_needed(void) | 
|---|
| 142 | { | 
|---|
| 143 | #ifdef CONFIG_RPS | 
|---|
| 144 | return static_branch_unlikely(&rfs_needed); | 
|---|
| 145 | #else | 
|---|
| 146 | return false; | 
|---|
| 147 | #endif | 
|---|
| 148 | } | 
|---|
| 149 |  | 
|---|
| 150 | static inline void sock_rps_record_flow_hash(__u32 hash) | 
|---|
| 151 | { | 
|---|
| 152 | #ifdef CONFIG_RPS | 
|---|
| 153 | if (!rfs_is_needed()) | 
|---|
| 154 | return; | 
|---|
| 155 |  | 
|---|
| 156 | _sock_rps_record_flow_hash(hash); | 
|---|
| 157 | #endif | 
|---|
| 158 | } | 
|---|
| 159 |  | 
|---|
| 160 | static inline void sock_rps_record_flow(const struct sock *sk) | 
|---|
| 161 | { | 
|---|
| 162 | #ifdef CONFIG_RPS | 
|---|
| 163 | if (!rfs_is_needed()) | 
|---|
| 164 | return; | 
|---|
| 165 |  | 
|---|
| 166 | _sock_rps_record_flow(sk); | 
|---|
| 167 | #endif | 
|---|
| 168 | } | 
|---|
| 169 |  | 
|---|
| 170 | static inline void sock_rps_delete_flow(const struct sock *sk) | 
|---|
| 171 | { | 
|---|
| 172 | #ifdef CONFIG_RPS | 
|---|
| 173 | if (!rfs_is_needed()) | 
|---|
| 174 | return; | 
|---|
| 175 |  | 
|---|
| 176 | _sock_rps_delete_flow(sk); | 
|---|
| 177 | #endif | 
|---|
| 178 | } | 
|---|
| 179 |  | 
|---|
| 180 | static inline u32 rps_input_queue_tail_incr(struct softnet_data *sd) | 
|---|
| 181 | { | 
|---|
| 182 | #ifdef CONFIG_RPS | 
|---|
| 183 | return ++sd->input_queue_tail; | 
|---|
| 184 | #else | 
|---|
| 185 | return 0; | 
|---|
| 186 | #endif | 
|---|
| 187 | } | 
|---|
| 188 |  | 
|---|
| 189 | static inline void rps_input_queue_tail_save(u32 *dest, u32 tail) | 
|---|
| 190 | { | 
|---|
| 191 | #ifdef CONFIG_RPS | 
|---|
| 192 | WRITE_ONCE(*dest, tail); | 
|---|
| 193 | #endif | 
|---|
| 194 | } | 
|---|
| 195 |  | 
|---|
| 196 | static inline void rps_input_queue_head_add(struct softnet_data *sd, int val) | 
|---|
| 197 | { | 
|---|
| 198 | #ifdef CONFIG_RPS | 
|---|
| 199 | WRITE_ONCE(sd->input_queue_head, sd->input_queue_head + val); | 
|---|
| 200 | #endif | 
|---|
| 201 | } | 
|---|
| 202 |  | 
|---|
| 203 | static inline void rps_input_queue_head_incr(struct softnet_data *sd) | 
|---|
| 204 | { | 
|---|
| 205 | rps_input_queue_head_add(sd, val: 1); | 
|---|
| 206 | } | 
|---|
| 207 |  | 
|---|
| 208 | #endif /* _NET_RPS_H */ | 
|---|
| 209 |  | 
|---|