| 1 | // SPDX-License-Identifier: GPL-2.0-or-later | 
|---|
| 2 | #include <net/psp.h> | 
|---|
| 3 | #include <net/gro.h> | 
|---|
| 4 | #include <net/dst_metadata.h> | 
|---|
| 5 | #include <net/busy_poll.h> | 
|---|
| 6 | #include <trace/events/net.h> | 
|---|
| 7 | #include <linux/skbuff_ref.h> | 
|---|
| 8 |  | 
|---|
| 9 | #define MAX_GRO_SKBS 8 | 
|---|
| 10 |  | 
|---|
| 11 | static DEFINE_SPINLOCK(offload_lock); | 
|---|
| 12 |  | 
|---|
| 13 | /** | 
|---|
| 14 | *	dev_add_offload - register offload handlers | 
|---|
| 15 | *	@po: protocol offload declaration | 
|---|
| 16 | * | 
|---|
| 17 | *	Add protocol offload handlers to the networking stack. The passed | 
|---|
| 18 | *	&proto_offload is linked into kernel lists and may not be freed until | 
|---|
| 19 | *	it has been removed from the kernel lists. | 
|---|
| 20 | * | 
|---|
| 21 | *	This call does not sleep therefore it can not | 
|---|
| 22 | *	guarantee all CPU's that are in middle of receiving packets | 
|---|
| 23 | *	will see the new offload handlers (until the next received packet). | 
|---|
| 24 | */ | 
|---|
| 25 | void dev_add_offload(struct packet_offload *po) | 
|---|
| 26 | { | 
|---|
| 27 | struct packet_offload *elem; | 
|---|
| 28 |  | 
|---|
| 29 | spin_lock(lock: &offload_lock); | 
|---|
| 30 | list_for_each_entry(elem, &net_hotdata.offload_base, list) { | 
|---|
| 31 | if (po->priority < elem->priority) | 
|---|
| 32 | break; | 
|---|
| 33 | } | 
|---|
| 34 | list_add_rcu(new: &po->list, head: elem->list.prev); | 
|---|
| 35 | spin_unlock(lock: &offload_lock); | 
|---|
| 36 | } | 
|---|
| 37 | EXPORT_SYMBOL(dev_add_offload); | 
|---|
| 38 |  | 
|---|
| 39 | /** | 
|---|
| 40 | *	__dev_remove_offload	 - remove offload handler | 
|---|
| 41 | *	@po: packet offload declaration | 
|---|
| 42 | * | 
|---|
| 43 | *	Remove a protocol offload handler that was previously added to the | 
|---|
| 44 | *	kernel offload handlers by dev_add_offload(). The passed &offload_type | 
|---|
| 45 | *	is removed from the kernel lists and can be freed or reused once this | 
|---|
| 46 | *	function returns. | 
|---|
| 47 | * | 
|---|
| 48 | *      The packet type might still be in use by receivers | 
|---|
| 49 | *	and must not be freed until after all the CPU's have gone | 
|---|
| 50 | *	through a quiescent state. | 
|---|
| 51 | */ | 
|---|
| 52 | static void __dev_remove_offload(struct packet_offload *po) | 
|---|
| 53 | { | 
|---|
| 54 | struct list_head *head = &net_hotdata.offload_base; | 
|---|
| 55 | struct packet_offload *po1; | 
|---|
| 56 |  | 
|---|
| 57 | spin_lock(lock: &offload_lock); | 
|---|
| 58 |  | 
|---|
| 59 | list_for_each_entry(po1, head, list) { | 
|---|
| 60 | if (po == po1) { | 
|---|
| 61 | list_del_rcu(entry: &po->list); | 
|---|
| 62 | goto out; | 
|---|
| 63 | } | 
|---|
| 64 | } | 
|---|
| 65 |  | 
|---|
| 66 | pr_warn( "dev_remove_offload: %p not found\n", po); | 
|---|
| 67 | out: | 
|---|
| 68 | spin_unlock(lock: &offload_lock); | 
|---|
| 69 | } | 
|---|
| 70 |  | 
|---|
| 71 | /** | 
|---|
| 72 | *	dev_remove_offload	 - remove packet offload handler | 
|---|
| 73 | *	@po: packet offload declaration | 
|---|
| 74 | * | 
|---|
| 75 | *	Remove a packet offload handler that was previously added to the kernel | 
|---|
| 76 | *	offload handlers by dev_add_offload(). The passed &offload_type is | 
|---|
| 77 | *	removed from the kernel lists and can be freed or reused once this | 
|---|
| 78 | *	function returns. | 
|---|
| 79 | * | 
|---|
| 80 | *	This call sleeps to guarantee that no CPU is looking at the packet | 
|---|
| 81 | *	type after return. | 
|---|
| 82 | */ | 
|---|
| 83 | void dev_remove_offload(struct packet_offload *po) | 
|---|
| 84 | { | 
|---|
| 85 | __dev_remove_offload(po); | 
|---|
| 86 |  | 
|---|
| 87 | synchronize_net(); | 
|---|
| 88 | } | 
|---|
| 89 | EXPORT_SYMBOL(dev_remove_offload); | 
|---|
| 90 |  | 
|---|
| 91 |  | 
|---|
| 92 | int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb) | 
|---|
| 93 | { | 
|---|
| 94 | struct skb_shared_info *pinfo, *skbinfo = skb_shinfo(skb); | 
|---|
| 95 | unsigned int offset = skb_gro_offset(skb); | 
|---|
| 96 | unsigned int headlen = skb_headlen(skb); | 
|---|
| 97 | unsigned int len = skb_gro_len(skb); | 
|---|
| 98 | unsigned int delta_truesize; | 
|---|
| 99 | unsigned int new_truesize; | 
|---|
| 100 | struct sk_buff *lp; | 
|---|
| 101 | int segs; | 
|---|
| 102 |  | 
|---|
| 103 | /* Do not splice page pool based packets w/ non-page pool | 
|---|
| 104 | * packets. This can result in reference count issues as page | 
|---|
| 105 | * pool pages will not decrement the reference count and will | 
|---|
| 106 | * instead be immediately returned to the pool or have frag | 
|---|
| 107 | * count decremented. | 
|---|
| 108 | */ | 
|---|
| 109 | if (p->pp_recycle != skb->pp_recycle) | 
|---|
| 110 | return -ETOOMANYREFS; | 
|---|
| 111 |  | 
|---|
| 112 | if (unlikely(p->len + len >= netif_get_gro_max_size(p->dev, p) || | 
|---|
| 113 | NAPI_GRO_CB(skb)->flush)) | 
|---|
| 114 | return -E2BIG; | 
|---|
| 115 |  | 
|---|
| 116 | if (unlikely(p->len + len >= GRO_LEGACY_MAX_SIZE)) { | 
|---|
| 117 | if (NAPI_GRO_CB(skb)->proto != IPPROTO_TCP || | 
|---|
| 118 | (p->protocol == htons(ETH_P_IPV6) && | 
|---|
| 119 | skb_headroom(skb: p) < sizeof(struct hop_jumbo_hdr)) || | 
|---|
| 120 | p->encapsulation) | 
|---|
| 121 | return -E2BIG; | 
|---|
| 122 | } | 
|---|
| 123 |  | 
|---|
| 124 | segs = NAPI_GRO_CB(skb)->count; | 
|---|
| 125 | lp = NAPI_GRO_CB(p)->last; | 
|---|
| 126 | pinfo = skb_shinfo(lp); | 
|---|
| 127 |  | 
|---|
| 128 | if (headlen <= offset) { | 
|---|
| 129 | skb_frag_t *frag; | 
|---|
| 130 | skb_frag_t *frag2; | 
|---|
| 131 | int i = skbinfo->nr_frags; | 
|---|
| 132 | int nr_frags = pinfo->nr_frags + i; | 
|---|
| 133 |  | 
|---|
| 134 | if (nr_frags > MAX_SKB_FRAGS) | 
|---|
| 135 | goto merge; | 
|---|
| 136 |  | 
|---|
| 137 | offset -= headlen; | 
|---|
| 138 | pinfo->nr_frags = nr_frags; | 
|---|
| 139 | skbinfo->nr_frags = 0; | 
|---|
| 140 |  | 
|---|
| 141 | frag = pinfo->frags + nr_frags; | 
|---|
| 142 | frag2 = skbinfo->frags + i; | 
|---|
| 143 | do { | 
|---|
| 144 | *--frag = *--frag2; | 
|---|
| 145 | } while (--i); | 
|---|
| 146 |  | 
|---|
| 147 | skb_frag_off_add(frag, delta: offset); | 
|---|
| 148 | skb_frag_size_sub(frag, delta: offset); | 
|---|
| 149 |  | 
|---|
| 150 | /* all fragments truesize : remove (head size + sk_buff) */ | 
|---|
| 151 | new_truesize = SKB_TRUESIZE(skb_end_offset(skb)); | 
|---|
| 152 | delta_truesize = skb->truesize - new_truesize; | 
|---|
| 153 |  | 
|---|
| 154 | skb->truesize = new_truesize; | 
|---|
| 155 | skb->len -= skb->data_len; | 
|---|
| 156 | skb->data_len = 0; | 
|---|
| 157 |  | 
|---|
| 158 | NAPI_GRO_CB(skb)->free = NAPI_GRO_FREE; | 
|---|
| 159 | goto done; | 
|---|
| 160 | } else if (skb->head_frag) { | 
|---|
| 161 | int nr_frags = pinfo->nr_frags; | 
|---|
| 162 | skb_frag_t *frag = pinfo->frags + nr_frags; | 
|---|
| 163 | struct page *page = virt_to_head_page(x: skb->head); | 
|---|
| 164 | unsigned int first_size = headlen - offset; | 
|---|
| 165 | unsigned int first_offset; | 
|---|
| 166 |  | 
|---|
| 167 | if (nr_frags + 1 + skbinfo->nr_frags > MAX_SKB_FRAGS) | 
|---|
| 168 | goto merge; | 
|---|
| 169 |  | 
|---|
| 170 | first_offset = skb->data - | 
|---|
| 171 | (unsigned char *)page_address(page) + | 
|---|
| 172 | offset; | 
|---|
| 173 |  | 
|---|
| 174 | pinfo->nr_frags = nr_frags + 1 + skbinfo->nr_frags; | 
|---|
| 175 |  | 
|---|
| 176 | skb_frag_fill_page_desc(frag, page, off: first_offset, size: first_size); | 
|---|
| 177 |  | 
|---|
| 178 | memcpy(to: frag + 1, from: skbinfo->frags, len: sizeof(*frag) * skbinfo->nr_frags); | 
|---|
| 179 | /* We dont need to clear skbinfo->nr_frags here */ | 
|---|
| 180 |  | 
|---|
| 181 | new_truesize = SKB_DATA_ALIGN(sizeof(struct sk_buff)); | 
|---|
| 182 | delta_truesize = skb->truesize - new_truesize; | 
|---|
| 183 | skb->truesize = new_truesize; | 
|---|
| 184 | NAPI_GRO_CB(skb)->free = NAPI_GRO_FREE_STOLEN_HEAD; | 
|---|
| 185 | goto done; | 
|---|
| 186 | } | 
|---|
| 187 |  | 
|---|
| 188 | merge: | 
|---|
| 189 | /* sk ownership - if any - completely transferred to the aggregated packet */ | 
|---|
| 190 | skb->destructor = NULL; | 
|---|
| 191 | skb->sk = NULL; | 
|---|
| 192 | delta_truesize = skb->truesize; | 
|---|
| 193 | if (offset > headlen) { | 
|---|
| 194 | unsigned int eat = offset - headlen; | 
|---|
| 195 |  | 
|---|
| 196 | skb_frag_off_add(frag: &skbinfo->frags[0], delta: eat); | 
|---|
| 197 | skb_frag_size_sub(frag: &skbinfo->frags[0], delta: eat); | 
|---|
| 198 | skb->data_len -= eat; | 
|---|
| 199 | skb->len -= eat; | 
|---|
| 200 | offset = headlen; | 
|---|
| 201 | } | 
|---|
| 202 |  | 
|---|
| 203 | __skb_pull(skb, len: offset); | 
|---|
| 204 |  | 
|---|
| 205 | if (NAPI_GRO_CB(p)->last == p) | 
|---|
| 206 | skb_shinfo(p)->frag_list = skb; | 
|---|
| 207 | else | 
|---|
| 208 | NAPI_GRO_CB(p)->last->next = skb; | 
|---|
| 209 | NAPI_GRO_CB(p)->last = skb; | 
|---|
| 210 | __skb_header_release(skb); | 
|---|
| 211 | lp = p; | 
|---|
| 212 |  | 
|---|
| 213 | done: | 
|---|
| 214 | NAPI_GRO_CB(p)->count += segs; | 
|---|
| 215 | p->data_len += len; | 
|---|
| 216 | p->truesize += delta_truesize; | 
|---|
| 217 | p->len += len; | 
|---|
| 218 | if (lp != p) { | 
|---|
| 219 | lp->data_len += len; | 
|---|
| 220 | lp->truesize += delta_truesize; | 
|---|
| 221 | lp->len += len; | 
|---|
| 222 | } | 
|---|
| 223 | NAPI_GRO_CB(skb)->same_flow = 1; | 
|---|
| 224 | return 0; | 
|---|
| 225 | } | 
|---|
| 226 |  | 
|---|
| 227 | int skb_gro_receive_list(struct sk_buff *p, struct sk_buff *skb) | 
|---|
| 228 | { | 
|---|
| 229 | if (unlikely(p->len + skb->len >= 65536)) | 
|---|
| 230 | return -E2BIG; | 
|---|
| 231 |  | 
|---|
| 232 | if (NAPI_GRO_CB(p)->last == p) | 
|---|
| 233 | skb_shinfo(p)->frag_list = skb; | 
|---|
| 234 | else | 
|---|
| 235 | NAPI_GRO_CB(p)->last->next = skb; | 
|---|
| 236 |  | 
|---|
| 237 | skb_pull(skb, len: skb_gro_offset(skb)); | 
|---|
| 238 |  | 
|---|
| 239 | NAPI_GRO_CB(p)->last = skb; | 
|---|
| 240 | NAPI_GRO_CB(p)->count++; | 
|---|
| 241 | p->data_len += skb->len; | 
|---|
| 242 |  | 
|---|
| 243 | /* sk ownership - if any - completely transferred to the aggregated packet */ | 
|---|
| 244 | skb->destructor = NULL; | 
|---|
| 245 | skb->sk = NULL; | 
|---|
| 246 | p->truesize += skb->truesize; | 
|---|
| 247 | p->len += skb->len; | 
|---|
| 248 |  | 
|---|
| 249 | NAPI_GRO_CB(skb)->same_flow = 1; | 
|---|
| 250 |  | 
|---|
| 251 | return 0; | 
|---|
| 252 | } | 
|---|
| 253 |  | 
|---|
| 254 | static void gro_complete(struct gro_node *gro, struct sk_buff *skb) | 
|---|
| 255 | { | 
|---|
| 256 | struct list_head *head = &net_hotdata.offload_base; | 
|---|
| 257 | struct packet_offload *ptype; | 
|---|
| 258 | __be16 type = skb->protocol; | 
|---|
| 259 | int err = -ENOENT; | 
|---|
| 260 |  | 
|---|
| 261 | BUILD_BUG_ON(sizeof(struct napi_gro_cb) > sizeof(skb->cb)); | 
|---|
| 262 |  | 
|---|
| 263 | if (NAPI_GRO_CB(skb)->count == 1) { | 
|---|
| 264 | skb_shinfo(skb)->gso_size = 0; | 
|---|
| 265 | goto out; | 
|---|
| 266 | } | 
|---|
| 267 |  | 
|---|
| 268 | rcu_read_lock(); | 
|---|
| 269 | list_for_each_entry_rcu(ptype, head, list) { | 
|---|
| 270 | if (ptype->type != type || !ptype->callbacks.gro_complete) | 
|---|
| 271 | continue; | 
|---|
| 272 |  | 
|---|
| 273 | err = INDIRECT_CALL_INET(ptype->callbacks.gro_complete, | 
|---|
| 274 | ipv6_gro_complete, inet_gro_complete, | 
|---|
| 275 | skb, 0); | 
|---|
| 276 | break; | 
|---|
| 277 | } | 
|---|
| 278 | rcu_read_unlock(); | 
|---|
| 279 |  | 
|---|
| 280 | if (err) { | 
|---|
| 281 | WARN_ON(&ptype->list == head); | 
|---|
| 282 | kfree_skb(skb); | 
|---|
| 283 | return; | 
|---|
| 284 | } | 
|---|
| 285 |  | 
|---|
| 286 | out: | 
|---|
| 287 | gro_normal_one(gro, skb, NAPI_GRO_CB(skb)->count); | 
|---|
| 288 | } | 
|---|
| 289 |  | 
|---|
| 290 | static void __gro_flush_chain(struct gro_node *gro, u32 index, bool flush_old) | 
|---|
| 291 | { | 
|---|
| 292 | struct list_head *head = &gro->hash[index].list; | 
|---|
| 293 | struct sk_buff *skb, *p; | 
|---|
| 294 |  | 
|---|
| 295 | list_for_each_entry_safe_reverse(skb, p, head, list) { | 
|---|
| 296 | if (flush_old && NAPI_GRO_CB(skb)->age == jiffies) | 
|---|
| 297 | return; | 
|---|
| 298 | skb_list_del_init(skb); | 
|---|
| 299 | gro_complete(gro, skb); | 
|---|
| 300 | gro->hash[index].count--; | 
|---|
| 301 | } | 
|---|
| 302 |  | 
|---|
| 303 | if (!gro->hash[index].count) | 
|---|
| 304 | __clear_bit(index, &gro->bitmask); | 
|---|
| 305 | } | 
|---|
| 306 |  | 
|---|
| 307 | /* | 
|---|
| 308 | * gro->hash[].list contains packets ordered by age. | 
|---|
| 309 | * youngest packets at the head of it. | 
|---|
| 310 | * Complete skbs in reverse order to reduce latencies. | 
|---|
| 311 | */ | 
|---|
| 312 | void __gro_flush(struct gro_node *gro, bool flush_old) | 
|---|
| 313 | { | 
|---|
| 314 | unsigned long bitmask = gro->bitmask; | 
|---|
| 315 | unsigned int i, base = ~0U; | 
|---|
| 316 |  | 
|---|
| 317 | while ((i = ffs(bitmask)) != 0) { | 
|---|
| 318 | bitmask >>= i; | 
|---|
| 319 | base += i; | 
|---|
| 320 | __gro_flush_chain(gro, index: base, flush_old); | 
|---|
| 321 | } | 
|---|
| 322 | } | 
|---|
| 323 | EXPORT_SYMBOL(__gro_flush); | 
|---|
| 324 |  | 
|---|
| 325 | static unsigned long gro_list_prepare_tc_ext(const struct sk_buff *skb, | 
|---|
| 326 | const struct sk_buff *p, | 
|---|
| 327 | unsigned long diffs) | 
|---|
| 328 | { | 
|---|
| 329 | #if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) | 
|---|
| 330 | struct tc_skb_ext *skb_ext; | 
|---|
| 331 | struct tc_skb_ext *p_ext; | 
|---|
| 332 |  | 
|---|
| 333 | skb_ext = skb_ext_find(skb, TC_SKB_EXT); | 
|---|
| 334 | p_ext = skb_ext_find(p, TC_SKB_EXT); | 
|---|
| 335 |  | 
|---|
| 336 | diffs |= (!!p_ext) ^ (!!skb_ext); | 
|---|
| 337 | if (!diffs && unlikely(skb_ext)) | 
|---|
| 338 | diffs |= p_ext->chain ^ skb_ext->chain; | 
|---|
| 339 | #endif | 
|---|
| 340 | return diffs; | 
|---|
| 341 | } | 
|---|
| 342 |  | 
|---|
| 343 | static void gro_list_prepare(const struct list_head *head, | 
|---|
| 344 | const struct sk_buff *skb) | 
|---|
| 345 | { | 
|---|
| 346 | unsigned int maclen = skb->dev->hard_header_len; | 
|---|
| 347 | u32 hash = skb_get_hash_raw(skb); | 
|---|
| 348 | struct sk_buff *p; | 
|---|
| 349 |  | 
|---|
| 350 | list_for_each_entry(p, head, list) { | 
|---|
| 351 | unsigned long diffs; | 
|---|
| 352 |  | 
|---|
| 353 | if (hash != skb_get_hash_raw(skb: p)) { | 
|---|
| 354 | NAPI_GRO_CB(p)->same_flow = 0; | 
|---|
| 355 | continue; | 
|---|
| 356 | } | 
|---|
| 357 |  | 
|---|
| 358 | diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev; | 
|---|
| 359 | diffs |= p->vlan_all ^ skb->vlan_all; | 
|---|
| 360 | diffs |= skb_metadata_differs(skb_a: p, skb_b: skb); | 
|---|
| 361 | if (maclen == ETH_HLEN) | 
|---|
| 362 | diffs |= compare_ether_header(a: skb_mac_header(skb: p), | 
|---|
| 363 | b: skb_mac_header(skb)); | 
|---|
| 364 | else if (!diffs) | 
|---|
| 365 | diffs = memcmp(skb_mac_header(skb: p), | 
|---|
| 366 | skb_mac_header(skb), | 
|---|
| 367 | maclen); | 
|---|
| 368 |  | 
|---|
| 369 | /* in most common scenarios 'slow_gro' is 0 | 
|---|
| 370 | * otherwise we are already on some slower paths | 
|---|
| 371 | * either skip all the infrequent tests altogether or | 
|---|
| 372 | * avoid trying too hard to skip each of them individually | 
|---|
| 373 | */ | 
|---|
| 374 | if (!diffs && unlikely(skb->slow_gro | p->slow_gro)) { | 
|---|
| 375 | diffs |= p->sk != skb->sk; | 
|---|
| 376 | diffs |= skb_metadata_dst_cmp(skb_a: p, skb_b: skb); | 
|---|
| 377 | diffs |= skb_get_nfct(skb: p) ^ skb_get_nfct(skb); | 
|---|
| 378 |  | 
|---|
| 379 | diffs |= gro_list_prepare_tc_ext(skb, p, diffs); | 
|---|
| 380 | diffs |= __psp_skb_coalesce_diff(one: skb, two: p, diffs); | 
|---|
| 381 | } | 
|---|
| 382 |  | 
|---|
| 383 | NAPI_GRO_CB(p)->same_flow = !diffs; | 
|---|
| 384 | } | 
|---|
| 385 | } | 
|---|
| 386 |  | 
|---|
| 387 | static inline void skb_gro_reset_offset(struct sk_buff *skb, u32 nhoff) | 
|---|
| 388 | { | 
|---|
| 389 | const struct skb_shared_info *pinfo; | 
|---|
| 390 | const skb_frag_t *frag0; | 
|---|
| 391 | unsigned int headlen; | 
|---|
| 392 |  | 
|---|
| 393 | NAPI_GRO_CB(skb)->network_offset = 0; | 
|---|
| 394 | NAPI_GRO_CB(skb)->data_offset = 0; | 
|---|
| 395 | headlen = skb_headlen(skb); | 
|---|
| 396 | NAPI_GRO_CB(skb)->frag0 = skb->data; | 
|---|
| 397 | NAPI_GRO_CB(skb)->frag0_len = headlen; | 
|---|
| 398 | if (headlen) | 
|---|
| 399 | return; | 
|---|
| 400 |  | 
|---|
| 401 | pinfo = skb_shinfo(skb); | 
|---|
| 402 | frag0 = &pinfo->frags[0]; | 
|---|
| 403 |  | 
|---|
| 404 | if (pinfo->nr_frags && skb_frag_page(frag: frag0) && | 
|---|
| 405 | !PageHighMem(page: skb_frag_page(frag: frag0)) && | 
|---|
| 406 | (!NET_IP_ALIGN || !((skb_frag_off(frag: frag0) + nhoff) & 3))) { | 
|---|
| 407 | NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag: frag0); | 
|---|
| 408 | NAPI_GRO_CB(skb)->frag0_len = min_t(unsigned int, | 
|---|
| 409 | skb_frag_size(frag0), | 
|---|
| 410 | skb->end - skb->tail); | 
|---|
| 411 | } | 
|---|
| 412 | } | 
|---|
| 413 |  | 
|---|
| 414 | static void gro_pull_from_frag0(struct sk_buff *skb, int grow) | 
|---|
| 415 | { | 
|---|
| 416 | struct skb_shared_info *pinfo = skb_shinfo(skb); | 
|---|
| 417 |  | 
|---|
| 418 | BUG_ON(skb->end - skb->tail < grow); | 
|---|
| 419 |  | 
|---|
| 420 | memcpy(to: skb_tail_pointer(skb), NAPI_GRO_CB(skb)->frag0, len: grow); | 
|---|
| 421 |  | 
|---|
| 422 | skb->data_len -= grow; | 
|---|
| 423 | skb->tail += grow; | 
|---|
| 424 |  | 
|---|
| 425 | skb_frag_off_add(frag: &pinfo->frags[0], delta: grow); | 
|---|
| 426 | skb_frag_size_sub(frag: &pinfo->frags[0], delta: grow); | 
|---|
| 427 |  | 
|---|
| 428 | if (unlikely(!skb_frag_size(&pinfo->frags[0]))) { | 
|---|
| 429 | skb_frag_unref(skb, f: 0); | 
|---|
| 430 | memmove(dest: pinfo->frags, src: pinfo->frags + 1, | 
|---|
| 431 | count: --pinfo->nr_frags * sizeof(pinfo->frags[0])); | 
|---|
| 432 | } | 
|---|
| 433 | } | 
|---|
| 434 |  | 
|---|
| 435 | static void gro_try_pull_from_frag0(struct sk_buff *skb) | 
|---|
| 436 | { | 
|---|
| 437 | int grow = skb_gro_offset(skb) - skb_headlen(skb); | 
|---|
| 438 |  | 
|---|
| 439 | if (grow > 0) | 
|---|
| 440 | gro_pull_from_frag0(skb, grow); | 
|---|
| 441 | } | 
|---|
| 442 |  | 
|---|
| 443 | static void gro_flush_oldest(struct gro_node *gro, struct list_head *head) | 
|---|
| 444 | { | 
|---|
| 445 | struct sk_buff *oldest; | 
|---|
| 446 |  | 
|---|
| 447 | oldest = list_last_entry(head, struct sk_buff, list); | 
|---|
| 448 |  | 
|---|
| 449 | /* We are called with head length >= MAX_GRO_SKBS, so this is | 
|---|
| 450 | * impossible. | 
|---|
| 451 | */ | 
|---|
| 452 | if (WARN_ON_ONCE(!oldest)) | 
|---|
| 453 | return; | 
|---|
| 454 |  | 
|---|
| 455 | /* Do not adjust napi->gro_hash[].count, caller is adding a new | 
|---|
| 456 | * SKB to the chain. | 
|---|
| 457 | */ | 
|---|
| 458 | skb_list_del_init(skb: oldest); | 
|---|
| 459 | gro_complete(gro, skb: oldest); | 
|---|
| 460 | } | 
|---|
| 461 |  | 
|---|
| 462 | static enum gro_result dev_gro_receive(struct gro_node *gro, | 
|---|
| 463 | struct sk_buff *skb) | 
|---|
| 464 | { | 
|---|
| 465 | u32 bucket = skb_get_hash_raw(skb) & (GRO_HASH_BUCKETS - 1); | 
|---|
| 466 | struct list_head *head = &net_hotdata.offload_base; | 
|---|
| 467 | struct gro_list *gro_list = &gro->hash[bucket]; | 
|---|
| 468 | struct packet_offload *ptype; | 
|---|
| 469 | __be16 type = skb->protocol; | 
|---|
| 470 | struct sk_buff *pp = NULL; | 
|---|
| 471 | enum gro_result ret; | 
|---|
| 472 | int same_flow; | 
|---|
| 473 |  | 
|---|
| 474 | if (netif_elide_gro(dev: skb->dev)) | 
|---|
| 475 | goto normal; | 
|---|
| 476 |  | 
|---|
| 477 | gro_list_prepare(head: &gro_list->list, skb); | 
|---|
| 478 |  | 
|---|
| 479 | rcu_read_lock(); | 
|---|
| 480 | list_for_each_entry_rcu(ptype, head, list) { | 
|---|
| 481 | if (ptype->type == type && ptype->callbacks.gro_receive) | 
|---|
| 482 | goto found_ptype; | 
|---|
| 483 | } | 
|---|
| 484 | rcu_read_unlock(); | 
|---|
| 485 | goto normal; | 
|---|
| 486 |  | 
|---|
| 487 | found_ptype: | 
|---|
| 488 | skb_set_network_header(skb, offset: skb_gro_offset(skb)); | 
|---|
| 489 | skb_reset_mac_len(skb); | 
|---|
| 490 | BUILD_BUG_ON(sizeof_field(struct napi_gro_cb, zeroed) != sizeof(u32)); | 
|---|
| 491 | BUILD_BUG_ON(!IS_ALIGNED(offsetof(struct napi_gro_cb, zeroed), | 
|---|
| 492 | sizeof(u32))); /* Avoid slow unaligned acc */ | 
|---|
| 493 | *(u32 *)&NAPI_GRO_CB(skb)->zeroed = 0; | 
|---|
| 494 | NAPI_GRO_CB(skb)->flush = skb_has_frag_list(skb); | 
|---|
| 495 | NAPI_GRO_CB(skb)->count = 1; | 
|---|
| 496 | if (unlikely(skb_is_gso(skb))) { | 
|---|
| 497 | NAPI_GRO_CB(skb)->count = skb_shinfo(skb)->gso_segs; | 
|---|
| 498 | /* Only support TCP and non DODGY users. */ | 
|---|
| 499 | if (!skb_is_gso_tcp(skb) || | 
|---|
| 500 | (skb_shinfo(skb)->gso_type & SKB_GSO_DODGY)) | 
|---|
| 501 | NAPI_GRO_CB(skb)->flush = 1; | 
|---|
| 502 | } | 
|---|
| 503 |  | 
|---|
| 504 | /* Setup for GRO checksum validation */ | 
|---|
| 505 | switch (skb->ip_summed) { | 
|---|
| 506 | case CHECKSUM_COMPLETE: | 
|---|
| 507 | NAPI_GRO_CB(skb)->csum = skb->csum; | 
|---|
| 508 | NAPI_GRO_CB(skb)->csum_valid = 1; | 
|---|
| 509 | break; | 
|---|
| 510 | case CHECKSUM_UNNECESSARY: | 
|---|
| 511 | NAPI_GRO_CB(skb)->csum_cnt = skb->csum_level + 1; | 
|---|
| 512 | break; | 
|---|
| 513 | } | 
|---|
| 514 |  | 
|---|
| 515 | pp = INDIRECT_CALL_INET(ptype->callbacks.gro_receive, | 
|---|
| 516 | ipv6_gro_receive, inet_gro_receive, | 
|---|
| 517 | &gro_list->list, skb); | 
|---|
| 518 |  | 
|---|
| 519 | rcu_read_unlock(); | 
|---|
| 520 |  | 
|---|
| 521 | if (PTR_ERR(ptr: pp) == -EINPROGRESS) { | 
|---|
| 522 | ret = GRO_CONSUMED; | 
|---|
| 523 | goto ok; | 
|---|
| 524 | } | 
|---|
| 525 |  | 
|---|
| 526 | same_flow = NAPI_GRO_CB(skb)->same_flow; | 
|---|
| 527 | ret = NAPI_GRO_CB(skb)->free ? GRO_MERGED_FREE : GRO_MERGED; | 
|---|
| 528 |  | 
|---|
| 529 | if (pp) { | 
|---|
| 530 | skb_list_del_init(skb: pp); | 
|---|
| 531 | gro_complete(gro, skb: pp); | 
|---|
| 532 | gro_list->count--; | 
|---|
| 533 | } | 
|---|
| 534 |  | 
|---|
| 535 | if (same_flow) | 
|---|
| 536 | goto ok; | 
|---|
| 537 |  | 
|---|
| 538 | if (NAPI_GRO_CB(skb)->flush) | 
|---|
| 539 | goto normal; | 
|---|
| 540 |  | 
|---|
| 541 | if (unlikely(gro_list->count >= MAX_GRO_SKBS)) | 
|---|
| 542 | gro_flush_oldest(gro, head: &gro_list->list); | 
|---|
| 543 | else | 
|---|
| 544 | gro_list->count++; | 
|---|
| 545 |  | 
|---|
| 546 | /* Must be called before setting NAPI_GRO_CB(skb)->{age|last} */ | 
|---|
| 547 | gro_try_pull_from_frag0(skb); | 
|---|
| 548 | NAPI_GRO_CB(skb)->age = jiffies; | 
|---|
| 549 | NAPI_GRO_CB(skb)->last = skb; | 
|---|
| 550 | if (!skb_is_gso(skb)) | 
|---|
| 551 | skb_shinfo(skb)->gso_size = skb_gro_len(skb); | 
|---|
| 552 | list_add(new: &skb->list, head: &gro_list->list); | 
|---|
| 553 | ret = GRO_HELD; | 
|---|
| 554 | ok: | 
|---|
| 555 | if (gro_list->count) { | 
|---|
| 556 | if (!test_bit(bucket, &gro->bitmask)) | 
|---|
| 557 | __set_bit(bucket, &gro->bitmask); | 
|---|
| 558 | } else if (test_bit(bucket, &gro->bitmask)) { | 
|---|
| 559 | __clear_bit(bucket, &gro->bitmask); | 
|---|
| 560 | } | 
|---|
| 561 |  | 
|---|
| 562 | return ret; | 
|---|
| 563 |  | 
|---|
| 564 | normal: | 
|---|
| 565 | ret = GRO_NORMAL; | 
|---|
| 566 | gro_try_pull_from_frag0(skb); | 
|---|
| 567 | goto ok; | 
|---|
| 568 | } | 
|---|
| 569 |  | 
|---|
| 570 | struct packet_offload *gro_find_receive_by_type(__be16 type) | 
|---|
| 571 | { | 
|---|
| 572 | struct list_head *offload_head = &net_hotdata.offload_base; | 
|---|
| 573 | struct packet_offload *ptype; | 
|---|
| 574 |  | 
|---|
| 575 | list_for_each_entry_rcu(ptype, offload_head, list) { | 
|---|
| 576 | if (ptype->type != type || !ptype->callbacks.gro_receive) | 
|---|
| 577 | continue; | 
|---|
| 578 | return ptype; | 
|---|
| 579 | } | 
|---|
| 580 | return NULL; | 
|---|
| 581 | } | 
|---|
| 582 | EXPORT_SYMBOL(gro_find_receive_by_type); | 
|---|
| 583 |  | 
|---|
| 584 | struct packet_offload *gro_find_complete_by_type(__be16 type) | 
|---|
| 585 | { | 
|---|
| 586 | struct list_head *offload_head = &net_hotdata.offload_base; | 
|---|
| 587 | struct packet_offload *ptype; | 
|---|
| 588 |  | 
|---|
| 589 | list_for_each_entry_rcu(ptype, offload_head, list) { | 
|---|
| 590 | if (ptype->type != type || !ptype->callbacks.gro_complete) | 
|---|
| 591 | continue; | 
|---|
| 592 | return ptype; | 
|---|
| 593 | } | 
|---|
| 594 | return NULL; | 
|---|
| 595 | } | 
|---|
| 596 | EXPORT_SYMBOL(gro_find_complete_by_type); | 
|---|
| 597 |  | 
|---|
| 598 | static gro_result_t gro_skb_finish(struct gro_node *gro, struct sk_buff *skb, | 
|---|
| 599 | gro_result_t ret) | 
|---|
| 600 | { | 
|---|
| 601 | switch (ret) { | 
|---|
| 602 | case GRO_NORMAL: | 
|---|
| 603 | gro_normal_one(gro, skb, segs: 1); | 
|---|
| 604 | break; | 
|---|
| 605 |  | 
|---|
| 606 | case GRO_MERGED_FREE: | 
|---|
| 607 | if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD) | 
|---|
| 608 | napi_skb_free_stolen_head(skb); | 
|---|
| 609 | else if (skb->fclone != SKB_FCLONE_UNAVAILABLE) | 
|---|
| 610 | __kfree_skb(skb); | 
|---|
| 611 | else | 
|---|
| 612 | __napi_kfree_skb(skb, reason: SKB_CONSUMED); | 
|---|
| 613 | break; | 
|---|
| 614 |  | 
|---|
| 615 | case GRO_HELD: | 
|---|
| 616 | case GRO_MERGED: | 
|---|
| 617 | case GRO_CONSUMED: | 
|---|
| 618 | break; | 
|---|
| 619 | } | 
|---|
| 620 |  | 
|---|
| 621 | return ret; | 
|---|
| 622 | } | 
|---|
| 623 |  | 
|---|
| 624 | gro_result_t gro_receive_skb(struct gro_node *gro, struct sk_buff *skb) | 
|---|
| 625 | { | 
|---|
| 626 | gro_result_t ret; | 
|---|
| 627 |  | 
|---|
| 628 | __skb_mark_napi_id(skb, gro); | 
|---|
| 629 | trace_napi_gro_receive_entry(skb); | 
|---|
| 630 |  | 
|---|
| 631 | skb_gro_reset_offset(skb, nhoff: 0); | 
|---|
| 632 |  | 
|---|
| 633 | ret = gro_skb_finish(gro, skb, ret: dev_gro_receive(gro, skb)); | 
|---|
| 634 | trace_napi_gro_receive_exit(ret); | 
|---|
| 635 |  | 
|---|
| 636 | return ret; | 
|---|
| 637 | } | 
|---|
| 638 | EXPORT_SYMBOL(gro_receive_skb); | 
|---|
| 639 |  | 
|---|
| 640 | static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb) | 
|---|
| 641 | { | 
|---|
| 642 | if (unlikely(skb->pfmemalloc)) { | 
|---|
| 643 | consume_skb(skb); | 
|---|
| 644 | return; | 
|---|
| 645 | } | 
|---|
| 646 | __skb_pull(skb, len: skb_headlen(skb)); | 
|---|
| 647 | /* restore the reserve we had after netdev_alloc_skb_ip_align() */ | 
|---|
| 648 | skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN - skb_headroom(skb)); | 
|---|
| 649 | __vlan_hwaccel_clear_tag(skb); | 
|---|
| 650 | skb->dev = napi->dev; | 
|---|
| 651 | skb->skb_iif = 0; | 
|---|
| 652 |  | 
|---|
| 653 | /* eth_type_trans() assumes pkt_type is PACKET_HOST */ | 
|---|
| 654 | skb->pkt_type = PACKET_HOST; | 
|---|
| 655 |  | 
|---|
| 656 | skb->encapsulation = 0; | 
|---|
| 657 | skb->ip_summed = CHECKSUM_NONE; | 
|---|
| 658 | skb_shinfo(skb)->gso_type = 0; | 
|---|
| 659 | skb_shinfo(skb)->gso_size = 0; | 
|---|
| 660 | if (unlikely(skb->slow_gro)) { | 
|---|
| 661 | skb_orphan(skb); | 
|---|
| 662 | skb_ext_reset(skb); | 
|---|
| 663 | nf_reset_ct(skb); | 
|---|
| 664 | skb->slow_gro = 0; | 
|---|
| 665 | } | 
|---|
| 666 |  | 
|---|
| 667 | napi->skb = skb; | 
|---|
| 668 | } | 
|---|
| 669 |  | 
|---|
| 670 | struct sk_buff *napi_get_frags(struct napi_struct *napi) | 
|---|
| 671 | { | 
|---|
| 672 | struct sk_buff *skb = napi->skb; | 
|---|
| 673 |  | 
|---|
| 674 | if (!skb) { | 
|---|
| 675 | skb = napi_alloc_skb(napi, GRO_MAX_HEAD); | 
|---|
| 676 | if (skb) { | 
|---|
| 677 | napi->skb = skb; | 
|---|
| 678 | skb_mark_napi_id(skb, napi); | 
|---|
| 679 | } | 
|---|
| 680 | } | 
|---|
| 681 | return skb; | 
|---|
| 682 | } | 
|---|
| 683 | EXPORT_SYMBOL(napi_get_frags); | 
|---|
| 684 |  | 
|---|
| 685 | static gro_result_t napi_frags_finish(struct napi_struct *napi, | 
|---|
| 686 | struct sk_buff *skb, | 
|---|
| 687 | gro_result_t ret) | 
|---|
| 688 | { | 
|---|
| 689 | switch (ret) { | 
|---|
| 690 | case GRO_NORMAL: | 
|---|
| 691 | case GRO_HELD: | 
|---|
| 692 | __skb_push(skb, ETH_HLEN); | 
|---|
| 693 | skb->protocol = eth_type_trans(skb, dev: skb->dev); | 
|---|
| 694 | if (ret == GRO_NORMAL) | 
|---|
| 695 | gro_normal_one(gro: &napi->gro, skb, segs: 1); | 
|---|
| 696 | break; | 
|---|
| 697 |  | 
|---|
| 698 | case GRO_MERGED_FREE: | 
|---|
| 699 | if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD) | 
|---|
| 700 | napi_skb_free_stolen_head(skb); | 
|---|
| 701 | else | 
|---|
| 702 | napi_reuse_skb(napi, skb); | 
|---|
| 703 | break; | 
|---|
| 704 |  | 
|---|
| 705 | case GRO_MERGED: | 
|---|
| 706 | case GRO_CONSUMED: | 
|---|
| 707 | break; | 
|---|
| 708 | } | 
|---|
| 709 |  | 
|---|
| 710 | return ret; | 
|---|
| 711 | } | 
|---|
| 712 |  | 
|---|
| 713 | /* Upper GRO stack assumes network header starts at gro_offset=0 | 
|---|
| 714 | * Drivers could call both napi_gro_frags() and napi_gro_receive() | 
|---|
| 715 | * We copy ethernet header into skb->data to have a common layout. | 
|---|
| 716 | */ | 
|---|
| 717 | static struct sk_buff *napi_frags_skb(struct napi_struct *napi) | 
|---|
| 718 | { | 
|---|
| 719 | struct sk_buff *skb = napi->skb; | 
|---|
| 720 | const struct ethhdr *eth; | 
|---|
| 721 | unsigned int hlen = sizeof(*eth); | 
|---|
| 722 |  | 
|---|
| 723 | napi->skb = NULL; | 
|---|
| 724 |  | 
|---|
| 725 | skb_reset_mac_header(skb); | 
|---|
| 726 | skb_gro_reset_offset(skb, nhoff: hlen); | 
|---|
| 727 |  | 
|---|
| 728 | if (unlikely(!skb_gro_may_pull(skb, hlen))) { | 
|---|
| 729 | eth = skb_gro_header_slow(skb, hlen, offset: 0); | 
|---|
| 730 | if (unlikely(!eth)) { | 
|---|
| 731 | net_warn_ratelimited( "%s: dropping impossible skb from %s\n", | 
|---|
| 732 | __func__, napi->dev->name); | 
|---|
| 733 | napi_reuse_skb(napi, skb); | 
|---|
| 734 | return NULL; | 
|---|
| 735 | } | 
|---|
| 736 | } else { | 
|---|
| 737 | eth = (const struct ethhdr *)skb->data; | 
|---|
| 738 |  | 
|---|
| 739 | if (NAPI_GRO_CB(skb)->frag0 != skb->data) | 
|---|
| 740 | gro_pull_from_frag0(skb, grow: hlen); | 
|---|
| 741 |  | 
|---|
| 742 | NAPI_GRO_CB(skb)->frag0 += hlen; | 
|---|
| 743 | NAPI_GRO_CB(skb)->frag0_len -= hlen; | 
|---|
| 744 | } | 
|---|
| 745 | __skb_pull(skb, len: hlen); | 
|---|
| 746 |  | 
|---|
| 747 | /* | 
|---|
| 748 | * This works because the only protocols we care about don't require | 
|---|
| 749 | * special handling. | 
|---|
| 750 | * We'll fix it up properly in napi_frags_finish() | 
|---|
| 751 | */ | 
|---|
| 752 | skb->protocol = eth->h_proto; | 
|---|
| 753 |  | 
|---|
| 754 | return skb; | 
|---|
| 755 | } | 
|---|
| 756 |  | 
|---|
| 757 | gro_result_t napi_gro_frags(struct napi_struct *napi) | 
|---|
| 758 | { | 
|---|
| 759 | gro_result_t ret; | 
|---|
| 760 | struct sk_buff *skb = napi_frags_skb(napi); | 
|---|
| 761 |  | 
|---|
| 762 | trace_napi_gro_frags_entry(skb); | 
|---|
| 763 |  | 
|---|
| 764 | ret = napi_frags_finish(napi, skb, ret: dev_gro_receive(gro: &napi->gro, skb)); | 
|---|
| 765 | trace_napi_gro_frags_exit(ret); | 
|---|
| 766 |  | 
|---|
| 767 | return ret; | 
|---|
| 768 | } | 
|---|
| 769 | EXPORT_SYMBOL(napi_gro_frags); | 
|---|
| 770 |  | 
|---|
| 771 | /* Compute the checksum from gro_offset and return the folded value | 
|---|
| 772 | * after adding in any pseudo checksum. | 
|---|
| 773 | */ | 
|---|
| 774 | __sum16 __skb_gro_checksum_complete(struct sk_buff *skb) | 
|---|
| 775 | { | 
|---|
| 776 | __wsum wsum; | 
|---|
| 777 | __sum16 sum; | 
|---|
| 778 |  | 
|---|
| 779 | wsum = skb_checksum(skb, offset: skb_gro_offset(skb), len: skb_gro_len(skb), csum: 0); | 
|---|
| 780 |  | 
|---|
| 781 | /* NAPI_GRO_CB(skb)->csum holds pseudo checksum */ | 
|---|
| 782 | sum = csum_fold(sum: csum_add(NAPI_GRO_CB(skb)->csum, addend: wsum)); | 
|---|
| 783 | /* See comments in __skb_checksum_complete(). */ | 
|---|
| 784 | if (likely(!sum)) { | 
|---|
| 785 | if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) && | 
|---|
| 786 | !skb->csum_complete_sw) | 
|---|
| 787 | netdev_rx_csum_fault(dev: skb->dev, skb); | 
|---|
| 788 | } | 
|---|
| 789 |  | 
|---|
| 790 | NAPI_GRO_CB(skb)->csum = wsum; | 
|---|
| 791 | NAPI_GRO_CB(skb)->csum_valid = 1; | 
|---|
| 792 |  | 
|---|
| 793 | return sum; | 
|---|
| 794 | } | 
|---|
| 795 | EXPORT_SYMBOL(__skb_gro_checksum_complete); | 
|---|
| 796 |  | 
|---|
| 797 | void gro_init(struct gro_node *gro) | 
|---|
| 798 | { | 
|---|
| 799 | for (u32 i = 0; i < GRO_HASH_BUCKETS; i++) { | 
|---|
| 800 | INIT_LIST_HEAD(list: &gro->hash[i].list); | 
|---|
| 801 | gro->hash[i].count = 0; | 
|---|
| 802 | } | 
|---|
| 803 |  | 
|---|
| 804 | gro->bitmask = 0; | 
|---|
| 805 | gro->cached_napi_id = 0; | 
|---|
| 806 |  | 
|---|
| 807 | INIT_LIST_HEAD(list: &gro->rx_list); | 
|---|
| 808 | gro->rx_count = 0; | 
|---|
| 809 | } | 
|---|
| 810 |  | 
|---|
| 811 | void gro_cleanup(struct gro_node *gro) | 
|---|
| 812 | { | 
|---|
| 813 | struct sk_buff *skb, *n; | 
|---|
| 814 |  | 
|---|
| 815 | for (u32 i = 0; i < GRO_HASH_BUCKETS; i++) { | 
|---|
| 816 | list_for_each_entry_safe(skb, n, &gro->hash[i].list, list) | 
|---|
| 817 | kfree_skb(skb); | 
|---|
| 818 |  | 
|---|
| 819 | gro->hash[i].count = 0; | 
|---|
| 820 | } | 
|---|
| 821 |  | 
|---|
| 822 | gro->bitmask = 0; | 
|---|
| 823 | gro->cached_napi_id = 0; | 
|---|
| 824 |  | 
|---|
| 825 | list_for_each_entry_safe(skb, n, &gro->rx_list, list) | 
|---|
| 826 | kfree_skb(skb); | 
|---|
| 827 |  | 
|---|
| 828 | gro->rx_count = 0; | 
|---|
| 829 | } | 
|---|
| 830 |  | 
|---|