1/* SPDX-License-Identifier: GPL-2.0-or-later */
2
3#ifndef _NET_GRO_H
4#define _NET_GRO_H
5
6#include <linux/indirect_call_wrapper.h>
7#include <linux/ip.h>
8#include <linux/ipv6.h>
9#include <net/ip6_checksum.h>
10#include <linux/skbuff.h>
11#include <net/udp.h>
12#include <net/hotdata.h>
13
14/* This should be increased if a protocol with a bigger head is added. */
15#define GRO_MAX_HEAD (MAX_HEADER + 128)
16
17struct napi_gro_cb {
18 union {
19 struct {
20 /* Virtual address of skb_shinfo(skb)->frags[0].page + offset. */
21 void *frag0;
22
23 /* Length of frag0. */
24 unsigned int frag0_len;
25 };
26
27 struct {
28 /* used in skb_gro_receive() slow path */
29 struct sk_buff *last;
30
31 /* jiffies when first packet was created/queued */
32 unsigned long age;
33 };
34 };
35
36 /* This indicates where we are processing relative to skb->data. */
37 int data_offset;
38
39 /* This is non-zero if the packet cannot be merged with the new skb. */
40 u16 flush;
41
42 /* Number of segments aggregated. */
43 u16 count;
44
45 /* Used in ipv6_gro_receive() and foo-over-udp and esp-in-udp */
46 u16 proto;
47
48 u16 pad;
49
50/* Used in napi_gro_cb::free */
51#define NAPI_GRO_FREE 1
52#define NAPI_GRO_FREE_STOLEN_HEAD 2
53 /* portion of the cb set to zero at every gro iteration */
54 struct_group(zeroed,
55
56 /* Start offset for remote checksum offload */
57 u16 gro_remcsum_start;
58
59 /* This is non-zero if the packet may be of the same flow. */
60 u8 same_flow:1;
61
62 /* Used in tunnel GRO receive */
63 u8 encap_mark:1;
64
65 /* GRO checksum is valid */
66 u8 csum_valid:1;
67
68 /* Number of checksums via CHECKSUM_UNNECESSARY */
69 u8 csum_cnt:3;
70
71 /* Free the skb? */
72 u8 free:2;
73
74 /* Used in GRE, set in fou/gue_gro_receive */
75 u8 is_fou:1;
76
77 /* Used to determine if ipid_offset can be ignored */
78 u8 ip_fixedid:2;
79
80 /* Number of gro_receive callbacks this packet already went through */
81 u8 recursion_counter:4;
82
83 /* GRO is done by frag_list pointer chaining. */
84 u8 is_flist:1;
85 );
86
87 /* used to support CHECKSUM_COMPLETE for tunneling protocols */
88 __wsum csum;
89
90 /* L3 offsets */
91 union {
92 struct {
93 u16 network_offset;
94 u16 inner_network_offset;
95 };
96 u16 network_offsets[2];
97 };
98};
99
100#define NAPI_GRO_CB(skb) ((struct napi_gro_cb *)(skb)->cb)
101
102#define GRO_RECURSION_LIMIT 15
103static inline int gro_recursion_inc_test(struct sk_buff *skb)
104{
105 return ++NAPI_GRO_CB(skb)->recursion_counter == GRO_RECURSION_LIMIT;
106}
107
108typedef struct sk_buff *(*gro_receive_t)(struct list_head *, struct sk_buff *);
109static inline struct sk_buff *call_gro_receive(gro_receive_t cb,
110 struct list_head *head,
111 struct sk_buff *skb)
112{
113 if (unlikely(gro_recursion_inc_test(skb))) {
114 NAPI_GRO_CB(skb)->flush |= 1;
115 return NULL;
116 }
117
118 return cb(head, skb);
119}
120
121typedef struct sk_buff *(*gro_receive_sk_t)(struct sock *, struct list_head *,
122 struct sk_buff *);
123static inline struct sk_buff *call_gro_receive_sk(gro_receive_sk_t cb,
124 struct sock *sk,
125 struct list_head *head,
126 struct sk_buff *skb)
127{
128 if (unlikely(gro_recursion_inc_test(skb))) {
129 NAPI_GRO_CB(skb)->flush |= 1;
130 return NULL;
131 }
132
133 return cb(sk, head, skb);
134}
135
136static inline unsigned int skb_gro_offset(const struct sk_buff *skb)
137{
138 return NAPI_GRO_CB(skb)->data_offset;
139}
140
141static inline unsigned int skb_gro_len(const struct sk_buff *skb)
142{
143 return skb->len - NAPI_GRO_CB(skb)->data_offset;
144}
145
146static inline void skb_gro_pull(struct sk_buff *skb, unsigned int len)
147{
148 NAPI_GRO_CB(skb)->data_offset += len;
149}
150
151static inline void *skb_gro_header_fast(const struct sk_buff *skb,
152 unsigned int offset)
153{
154 return NAPI_GRO_CB(skb)->frag0 + offset;
155}
156
157static inline bool skb_gro_may_pull(const struct sk_buff *skb,
158 unsigned int hlen)
159{
160 return likely(hlen <= NAPI_GRO_CB(skb)->frag0_len);
161}
162
163static inline void *skb_gro_header_slow(struct sk_buff *skb, unsigned int hlen,
164 unsigned int offset)
165{
166 if (!pskb_may_pull(skb, len: hlen))
167 return NULL;
168
169 return skb->data + offset;
170}
171
172static inline void *skb_gro_header(struct sk_buff *skb, unsigned int hlen,
173 unsigned int offset)
174{
175 void *ptr;
176
177 ptr = skb_gro_header_fast(skb, offset);
178 if (!skb_gro_may_pull(skb, hlen))
179 ptr = skb_gro_header_slow(skb, hlen, offset);
180 return ptr;
181}
182
183static inline int skb_gro_receive_network_offset(const struct sk_buff *skb)
184{
185 return NAPI_GRO_CB(skb)->network_offsets[NAPI_GRO_CB(skb)->encap_mark];
186}
187
188static inline void *skb_gro_network_header(const struct sk_buff *skb)
189{
190 if (skb_gro_may_pull(skb, hlen: skb_gro_offset(skb)))
191 return skb_gro_header_fast(skb, offset: skb_gro_receive_network_offset(skb));
192
193 return skb->data + skb_gro_receive_network_offset(skb);
194}
195
196static inline __wsum inet_gro_compute_pseudo(const struct sk_buff *skb,
197 int proto)
198{
199 const struct iphdr *iph = skb_gro_network_header(skb);
200
201 return csum_tcpudp_nofold(saddr: iph->saddr, daddr: iph->daddr,
202 len: skb_gro_len(skb), proto, sum: 0);
203}
204
205static inline void skb_gro_postpull_rcsum(struct sk_buff *skb,
206 const void *start, unsigned int len)
207{
208 if (NAPI_GRO_CB(skb)->csum_valid)
209 NAPI_GRO_CB(skb)->csum = wsum_negate(val: csum_partial(buff: start, len,
210 sum: wsum_negate(NAPI_GRO_CB(skb)->csum)));
211}
212
213/* GRO checksum functions. These are logical equivalents of the normal
214 * checksum functions (in skbuff.h) except that they operate on the GRO
215 * offsets and fields in sk_buff.
216 */
217
218__sum16 __skb_gro_checksum_complete(struct sk_buff *skb);
219
220static inline bool skb_at_gro_remcsum_start(struct sk_buff *skb)
221{
222 return (NAPI_GRO_CB(skb)->gro_remcsum_start == skb_gro_offset(skb));
223}
224
225static inline bool __skb_gro_checksum_validate_needed(struct sk_buff *skb,
226 bool zero_okay,
227 __sum16 check)
228{
229 return ((skb->ip_summed != CHECKSUM_PARTIAL ||
230 skb_checksum_start_offset(skb) <
231 skb_gro_offset(skb)) &&
232 !skb_at_gro_remcsum_start(skb) &&
233 NAPI_GRO_CB(skb)->csum_cnt == 0 &&
234 (!zero_okay || check));
235}
236
237static inline __sum16 __skb_gro_checksum_validate_complete(struct sk_buff *skb,
238 __wsum psum)
239{
240 if (NAPI_GRO_CB(skb)->csum_valid &&
241 !csum_fold(sum: csum_add(csum: psum, NAPI_GRO_CB(skb)->csum)))
242 return 0;
243
244 NAPI_GRO_CB(skb)->csum = psum;
245
246 return __skb_gro_checksum_complete(skb);
247}
248
249static inline void skb_gro_incr_csum_unnecessary(struct sk_buff *skb)
250{
251 if (NAPI_GRO_CB(skb)->csum_cnt > 0) {
252 /* Consume a checksum from CHECKSUM_UNNECESSARY */
253 NAPI_GRO_CB(skb)->csum_cnt--;
254 } else {
255 /* Update skb for CHECKSUM_UNNECESSARY and csum_level when we
256 * verified a new top level checksum or an encapsulated one
257 * during GRO. This saves work if we fallback to normal path.
258 */
259 __skb_incr_checksum_unnecessary(skb);
260 }
261}
262
263#define __skb_gro_checksum_validate(skb, proto, zero_okay, check, \
264 compute_pseudo) \
265({ \
266 __sum16 __ret = 0; \
267 if (__skb_gro_checksum_validate_needed(skb, zero_okay, check)) \
268 __ret = __skb_gro_checksum_validate_complete(skb, \
269 compute_pseudo(skb, proto)); \
270 if (!__ret) \
271 skb_gro_incr_csum_unnecessary(skb); \
272 __ret; \
273})
274
275#define skb_gro_checksum_validate(skb, proto, compute_pseudo) \
276 __skb_gro_checksum_validate(skb, proto, false, 0, compute_pseudo)
277
278#define skb_gro_checksum_validate_zero_check(skb, proto, check, \
279 compute_pseudo) \
280 __skb_gro_checksum_validate(skb, proto, true, check, compute_pseudo)
281
282#define skb_gro_checksum_simple_validate(skb) \
283 __skb_gro_checksum_validate(skb, 0, false, 0, null_compute_pseudo)
284
285static inline bool __skb_gro_checksum_convert_check(struct sk_buff *skb)
286{
287 return (NAPI_GRO_CB(skb)->csum_cnt == 0 &&
288 !NAPI_GRO_CB(skb)->csum_valid);
289}
290
291static inline void __skb_gro_checksum_convert(struct sk_buff *skb,
292 __wsum pseudo)
293{
294 NAPI_GRO_CB(skb)->csum = ~pseudo;
295 NAPI_GRO_CB(skb)->csum_valid = 1;
296}
297
298#define skb_gro_checksum_try_convert(skb, proto, compute_pseudo) \
299do { \
300 if (__skb_gro_checksum_convert_check(skb)) \
301 __skb_gro_checksum_convert(skb, \
302 compute_pseudo(skb, proto)); \
303} while (0)
304
305struct gro_remcsum {
306 int offset;
307 __wsum delta;
308};
309
310static inline void skb_gro_remcsum_init(struct gro_remcsum *grc)
311{
312 grc->offset = 0;
313 grc->delta = 0;
314}
315
316static inline void *skb_gro_remcsum_process(struct sk_buff *skb, void *ptr,
317 unsigned int off, size_t hdrlen,
318 int start, int offset,
319 struct gro_remcsum *grc,
320 bool nopartial)
321{
322 __wsum delta;
323 size_t plen = hdrlen + max_t(size_t, offset + sizeof(u16), start);
324
325 BUG_ON(!NAPI_GRO_CB(skb)->csum_valid);
326
327 if (!nopartial) {
328 NAPI_GRO_CB(skb)->gro_remcsum_start = off + hdrlen + start;
329 return ptr;
330 }
331
332 ptr = skb_gro_header(skb, hlen: off + plen, offset: off);
333 if (!ptr)
334 return NULL;
335
336 delta = remcsum_adjust(ptr: ptr + hdrlen, NAPI_GRO_CB(skb)->csum,
337 start, offset);
338
339 /* Adjust skb->csum since we changed the packet */
340 NAPI_GRO_CB(skb)->csum = csum_add(NAPI_GRO_CB(skb)->csum, addend: delta);
341
342 grc->offset = off + hdrlen + offset;
343 grc->delta = delta;
344
345 return ptr;
346}
347
348static inline void skb_gro_remcsum_cleanup(struct sk_buff *skb,
349 struct gro_remcsum *grc)
350{
351 void *ptr;
352 size_t plen = grc->offset + sizeof(u16);
353
354 if (!grc->delta)
355 return;
356
357 ptr = skb_gro_header(skb, hlen: plen, offset: grc->offset);
358 if (!ptr)
359 return;
360
361 remcsum_unadjust(psum: (__sum16 *)ptr, delta: grc->delta);
362}
363
364#ifdef CONFIG_XFRM_OFFLOAD
365static inline void skb_gro_flush_final(struct sk_buff *skb, struct sk_buff *pp, int flush)
366{
367 if (PTR_ERR(pp) != -EINPROGRESS)
368 NAPI_GRO_CB(skb)->flush |= flush;
369}
370static inline void skb_gro_flush_final_remcsum(struct sk_buff *skb,
371 struct sk_buff *pp,
372 int flush,
373 struct gro_remcsum *grc)
374{
375 if (PTR_ERR(pp) != -EINPROGRESS) {
376 NAPI_GRO_CB(skb)->flush |= flush;
377 skb_gro_remcsum_cleanup(skb, grc);
378 skb->remcsum_offload = 0;
379 }
380}
381#else
382static inline void skb_gro_flush_final(struct sk_buff *skb, struct sk_buff *pp, int flush)
383{
384 NAPI_GRO_CB(skb)->flush |= flush;
385}
386static inline void skb_gro_flush_final_remcsum(struct sk_buff *skb,
387 struct sk_buff *pp,
388 int flush,
389 struct gro_remcsum *grc)
390{
391 NAPI_GRO_CB(skb)->flush |= flush;
392 skb_gro_remcsum_cleanup(skb, grc);
393 skb->remcsum_offload = 0;
394}
395#endif
396
397INDIRECT_CALLABLE_DECLARE(struct sk_buff *ipv6_gro_receive(struct list_head *,
398 struct sk_buff *));
399INDIRECT_CALLABLE_DECLARE(int ipv6_gro_complete(struct sk_buff *, int));
400INDIRECT_CALLABLE_DECLARE(struct sk_buff *inet_gro_receive(struct list_head *,
401 struct sk_buff *));
402INDIRECT_CALLABLE_DECLARE(int inet_gro_complete(struct sk_buff *, int));
403
404INDIRECT_CALLABLE_DECLARE(struct sk_buff *udp4_gro_receive(struct list_head *,
405 struct sk_buff *));
406INDIRECT_CALLABLE_DECLARE(int udp4_gro_complete(struct sk_buff *, int));
407
408INDIRECT_CALLABLE_DECLARE(struct sk_buff *udp6_gro_receive(struct list_head *,
409 struct sk_buff *));
410INDIRECT_CALLABLE_DECLARE(int udp6_gro_complete(struct sk_buff *, int));
411
412#define indirect_call_gro_receive_inet(cb, f2, f1, head, skb) \
413({ \
414 unlikely(gro_recursion_inc_test(skb)) ? \
415 NAPI_GRO_CB(skb)->flush |= 1, NULL : \
416 INDIRECT_CALL_INET(cb, f2, f1, head, skb); \
417})
418
419struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
420 struct udphdr *uh, struct sock *sk);
421int udp_gro_complete(struct sk_buff *skb, int nhoff, udp_lookup_t lookup);
422
423static inline struct udphdr *udp_gro_udphdr(struct sk_buff *skb)
424{
425 struct udphdr *uh;
426 unsigned int hlen, off;
427
428 off = skb_gro_offset(skb);
429 hlen = off + sizeof(*uh);
430 uh = skb_gro_header(skb, hlen, offset: off);
431
432 return uh;
433}
434
435static inline __wsum ip6_gro_compute_pseudo(const struct sk_buff *skb,
436 int proto)
437{
438 const struct ipv6hdr *iph = skb_gro_network_header(skb);
439
440 return ~csum_unfold(n: csum_ipv6_magic(saddr: &iph->saddr, daddr: &iph->daddr,
441 len: skb_gro_len(skb), proto, sum: 0));
442}
443
444static inline int inet_gro_flush(const struct iphdr *iph, const struct iphdr *iph2,
445 struct sk_buff *p, bool inner)
446{
447 const u32 id = ntohl(*(__be32 *)&iph->id);
448 const u32 id2 = ntohl(*(__be32 *)&iph2->id);
449 const u16 ipid_offset = (id >> 16) - (id2 >> 16);
450 const u16 count = NAPI_GRO_CB(p)->count;
451
452 /* All fields must match except length and checksum. */
453 if ((iph->ttl ^ iph2->ttl) | (iph->tos ^ iph2->tos) | ((id ^ id2) & IP_DF))
454 return true;
455
456 /* When we receive our second frame we can make a decision on if we
457 * continue this flow as an atomic flow with a fixed ID or if we use
458 * an incrementing ID.
459 */
460 if (count == 1 && !ipid_offset)
461 NAPI_GRO_CB(p)->ip_fixedid |= 1 << inner;
462
463 return ipid_offset ^ (count * !(NAPI_GRO_CB(p)->ip_fixedid & (1 << inner)));
464}
465
466static inline int ipv6_gro_flush(const struct ipv6hdr *iph, const struct ipv6hdr *iph2)
467{
468 /* <Version:4><Traffic_Class:8><Flow_Label:20> */
469 __be32 first_word = *(__be32 *)iph ^ *(__be32 *)iph2;
470
471 /* Flush if Traffic Class fields are different. */
472 return !!((first_word & htonl(0x0FF00000)) |
473 (__force __be32)(iph->hop_limit ^ iph2->hop_limit));
474}
475
476static inline int __gro_receive_network_flush(const void *th, const void *th2,
477 struct sk_buff *p, const u16 diff,
478 bool inner)
479{
480 const void *nh = th - diff;
481 const void *nh2 = th2 - diff;
482
483 if (((struct iphdr *)nh)->version == 6)
484 return ipv6_gro_flush(iph: nh, iph2: nh2);
485 else
486 return inet_gro_flush(iph: nh, iph2: nh2, p, inner);
487}
488
489static inline int gro_receive_network_flush(const void *th, const void *th2,
490 struct sk_buff *p)
491{
492 int off = skb_transport_offset(skb: p);
493 int flush;
494
495 flush = __gro_receive_network_flush(th, th2, p, diff: off - NAPI_GRO_CB(p)->network_offset, inner: false);
496 if (NAPI_GRO_CB(p)->encap_mark)
497 flush |= __gro_receive_network_flush(th, th2, p, diff: off - NAPI_GRO_CB(p)->inner_network_offset, inner: true);
498
499 return flush;
500}
501
502int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb);
503int skb_gro_receive_list(struct sk_buff *p, struct sk_buff *skb);
504void __gro_flush(struct gro_node *gro, bool flush_old);
505
506static inline void gro_flush(struct gro_node *gro, bool flush_old)
507{
508 if (!gro->bitmask)
509 return;
510
511 __gro_flush(gro, flush_old);
512}
513
514static inline void napi_gro_flush(struct napi_struct *napi, bool flush_old)
515{
516 gro_flush(gro: &napi->gro, flush_old);
517}
518
519/* Pass the currently batched GRO_NORMAL SKBs up to the stack. */
520static inline void gro_normal_list(struct gro_node *gro)
521{
522 if (!gro->rx_count)
523 return;
524 netif_receive_skb_list_internal(head: &gro->rx_list);
525 INIT_LIST_HEAD(list: &gro->rx_list);
526 gro->rx_count = 0;
527}
528
529static inline void gro_flush_normal(struct gro_node *gro, bool flush_old)
530{
531 gro_flush(gro, flush_old);
532 gro_normal_list(gro);
533}
534
535/* Queue one GRO_NORMAL SKB up for list processing. If batch size exceeded,
536 * pass the whole batch up to the stack.
537 */
538static inline void gro_normal_one(struct gro_node *gro, struct sk_buff *skb,
539 int segs)
540{
541 list_add_tail(new: &skb->list, head: &gro->rx_list);
542 gro->rx_count += segs;
543 if (gro->rx_count >= READ_ONCE(net_hotdata.gro_normal_batch))
544 gro_normal_list(gro);
545}
546
547void gro_init(struct gro_node *gro);
548void gro_cleanup(struct gro_node *gro);
549
550/* This function is the alternative of 'inet_iif' and 'inet_sdif'
551 * functions in case we can not rely on fields of IPCB.
552 *
553 * The caller must verify skb_valid_dst(skb) is false and skb->dev is initialized.
554 * The caller must hold the RCU read lock.
555 */
556static inline void inet_get_iif_sdif(const struct sk_buff *skb, int *iif, int *sdif)
557{
558 *iif = inet_iif(skb) ?: skb->dev->ifindex;
559 *sdif = 0;
560
561#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
562 if (netif_is_l3_slave(skb->dev)) {
563 struct net_device *master = netdev_master_upper_dev_get_rcu(skb->dev);
564
565 *sdif = *iif;
566 *iif = master ? master->ifindex : 0;
567 }
568#endif
569}
570
571/* This function is the alternative of 'inet6_iif' and 'inet6_sdif'
572 * functions in case we can not rely on fields of IP6CB.
573 *
574 * The caller must verify skb_valid_dst(skb) is false and skb->dev is initialized.
575 * The caller must hold the RCU read lock.
576 */
577static inline void inet6_get_iif_sdif(const struct sk_buff *skb, int *iif, int *sdif)
578{
579 /* using skb->dev->ifindex because skb_dst(skb) is not initialized */
580 *iif = skb->dev->ifindex;
581 *sdif = 0;
582
583#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
584 if (netif_is_l3_slave(skb->dev)) {
585 struct net_device *master = netdev_master_upper_dev_get_rcu(skb->dev);
586
587 *sdif = *iif;
588 *iif = master ? master->ifindex : 0;
589 }
590#endif
591}
592
593struct packet_offload *gro_find_receive_by_type(__be16 type);
594struct packet_offload *gro_find_complete_by_type(__be16 type);
595
596#endif /* _NET_GRO_H */
597