| 1 | // SPDX-License-Identifier: GPL-2.0-only | 
|---|
| 2 | /* net/core/xdp.c | 
|---|
| 3 | * | 
|---|
| 4 | * Copyright (c) 2017 Jesper Dangaard Brouer, Red Hat Inc. | 
|---|
| 5 | */ | 
|---|
| 6 | #include <linux/bpf.h> | 
|---|
| 7 | #include <linux/btf.h> | 
|---|
| 8 | #include <linux/btf_ids.h> | 
|---|
| 9 | #include <linux/filter.h> | 
|---|
| 10 | #include <linux/types.h> | 
|---|
| 11 | #include <linux/mm.h> | 
|---|
| 12 | #include <linux/netdevice.h> | 
|---|
| 13 | #include <linux/slab.h> | 
|---|
| 14 | #include <linux/idr.h> | 
|---|
| 15 | #include <linux/rhashtable.h> | 
|---|
| 16 | #include <linux/bug.h> | 
|---|
| 17 | #include <net/page_pool/helpers.h> | 
|---|
| 18 |  | 
|---|
| 19 | #include <net/hotdata.h> | 
|---|
| 20 | #include <net/netdev_lock.h> | 
|---|
| 21 | #include <net/xdp.h> | 
|---|
| 22 | #include <net/xdp_priv.h> /* struct xdp_mem_allocator */ | 
|---|
| 23 | #include <trace/events/xdp.h> | 
|---|
| 24 | #include <net/xdp_sock_drv.h> | 
|---|
| 25 |  | 
|---|
| 26 | #define REG_STATE_NEW		0x0 | 
|---|
| 27 | #define REG_STATE_REGISTERED	0x1 | 
|---|
| 28 | #define REG_STATE_UNREGISTERED	0x2 | 
|---|
| 29 | #define REG_STATE_UNUSED	0x3 | 
|---|
| 30 |  | 
|---|
| 31 | static DEFINE_IDA(mem_id_pool); | 
|---|
| 32 | static DEFINE_MUTEX(mem_id_lock); | 
|---|
| 33 | #define MEM_ID_MAX 0xFFFE | 
|---|
| 34 | #define MEM_ID_MIN 1 | 
|---|
| 35 | static int mem_id_next = MEM_ID_MIN; | 
|---|
| 36 |  | 
|---|
| 37 | static bool mem_id_init; /* false */ | 
|---|
| 38 | static struct rhashtable *mem_id_ht; | 
|---|
| 39 |  | 
|---|
| 40 | static u32 xdp_mem_id_hashfn(const void *data, u32 len, u32 seed) | 
|---|
| 41 | { | 
|---|
| 42 | const u32 *k = data; | 
|---|
| 43 | const u32 key = *k; | 
|---|
| 44 |  | 
|---|
| 45 | BUILD_BUG_ON(sizeof_field(struct xdp_mem_allocator, mem.id) | 
|---|
| 46 | != sizeof(u32)); | 
|---|
| 47 |  | 
|---|
| 48 | /* Use cyclic increasing ID as direct hash key */ | 
|---|
| 49 | return key; | 
|---|
| 50 | } | 
|---|
| 51 |  | 
|---|
| 52 | static int xdp_mem_id_cmp(struct rhashtable_compare_arg *arg, | 
|---|
| 53 | const void *ptr) | 
|---|
| 54 | { | 
|---|
| 55 | const struct xdp_mem_allocator *xa = ptr; | 
|---|
| 56 | u32 mem_id = *(u32 *)arg->key; | 
|---|
| 57 |  | 
|---|
| 58 | return xa->mem.id != mem_id; | 
|---|
| 59 | } | 
|---|
| 60 |  | 
|---|
| 61 | static const struct rhashtable_params mem_id_rht_params = { | 
|---|
| 62 | .nelem_hint = 64, | 
|---|
| 63 | .head_offset = offsetof(struct xdp_mem_allocator, node), | 
|---|
| 64 | .key_offset  = offsetof(struct xdp_mem_allocator, mem.id), | 
|---|
| 65 | .key_len = sizeof_field(struct xdp_mem_allocator, mem.id), | 
|---|
| 66 | .max_size = MEM_ID_MAX, | 
|---|
| 67 | .min_size = 8, | 
|---|
| 68 | .automatic_shrinking = true, | 
|---|
| 69 | .hashfn    = xdp_mem_id_hashfn, | 
|---|
| 70 | .obj_cmpfn = xdp_mem_id_cmp, | 
|---|
| 71 | }; | 
|---|
| 72 |  | 
|---|
| 73 | static void __xdp_mem_allocator_rcu_free(struct rcu_head *rcu) | 
|---|
| 74 | { | 
|---|
| 75 | struct xdp_mem_allocator *xa; | 
|---|
| 76 |  | 
|---|
| 77 | xa = container_of(rcu, struct xdp_mem_allocator, rcu); | 
|---|
| 78 |  | 
|---|
| 79 | /* Allow this ID to be reused */ | 
|---|
| 80 | ida_free(&mem_id_pool, id: xa->mem.id); | 
|---|
| 81 |  | 
|---|
| 82 | kfree(objp: xa); | 
|---|
| 83 | } | 
|---|
| 84 |  | 
|---|
| 85 | static void mem_xa_remove(struct xdp_mem_allocator *xa) | 
|---|
| 86 | { | 
|---|
| 87 | trace_mem_disconnect(xa); | 
|---|
| 88 |  | 
|---|
| 89 | if (!rhashtable_remove_fast(ht: mem_id_ht, obj: &xa->node, params: mem_id_rht_params)) | 
|---|
| 90 | call_rcu(head: &xa->rcu, func: __xdp_mem_allocator_rcu_free); | 
|---|
| 91 | } | 
|---|
| 92 |  | 
|---|
| 93 | static void mem_allocator_disconnect(void *allocator) | 
|---|
| 94 | { | 
|---|
| 95 | struct xdp_mem_allocator *xa; | 
|---|
| 96 | struct rhashtable_iter iter; | 
|---|
| 97 |  | 
|---|
| 98 | mutex_lock(lock: &mem_id_lock); | 
|---|
| 99 |  | 
|---|
| 100 | rhashtable_walk_enter(ht: mem_id_ht, iter: &iter); | 
|---|
| 101 | do { | 
|---|
| 102 | rhashtable_walk_start(iter: &iter); | 
|---|
| 103 |  | 
|---|
| 104 | while ((xa = rhashtable_walk_next(iter: &iter)) && !IS_ERR(ptr: xa)) { | 
|---|
| 105 | if (xa->allocator == allocator) | 
|---|
| 106 | mem_xa_remove(xa); | 
|---|
| 107 | } | 
|---|
| 108 |  | 
|---|
| 109 | rhashtable_walk_stop(iter: &iter); | 
|---|
| 110 |  | 
|---|
| 111 | } while (xa == ERR_PTR(error: -EAGAIN)); | 
|---|
| 112 | rhashtable_walk_exit(iter: &iter); | 
|---|
| 113 |  | 
|---|
| 114 | mutex_unlock(lock: &mem_id_lock); | 
|---|
| 115 | } | 
|---|
| 116 |  | 
|---|
| 117 | void xdp_unreg_mem_model(struct xdp_mem_info *mem) | 
|---|
| 118 | { | 
|---|
| 119 | struct xdp_mem_allocator *xa; | 
|---|
| 120 | int type = mem->type; | 
|---|
| 121 | int id = mem->id; | 
|---|
| 122 |  | 
|---|
| 123 | /* Reset mem info to defaults */ | 
|---|
| 124 | mem->id = 0; | 
|---|
| 125 | mem->type = 0; | 
|---|
| 126 |  | 
|---|
| 127 | if (id == 0) | 
|---|
| 128 | return; | 
|---|
| 129 |  | 
|---|
| 130 | if (type == MEM_TYPE_PAGE_POOL) { | 
|---|
| 131 | xa = rhashtable_lookup_fast(ht: mem_id_ht, key: &id, params: mem_id_rht_params); | 
|---|
| 132 | page_pool_destroy(pool: xa->page_pool); | 
|---|
| 133 | } | 
|---|
| 134 | } | 
|---|
| 135 | EXPORT_SYMBOL_GPL(xdp_unreg_mem_model); | 
|---|
| 136 |  | 
|---|
| 137 | void xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq) | 
|---|
| 138 | { | 
|---|
| 139 | if (xdp_rxq->reg_state != REG_STATE_REGISTERED) { | 
|---|
| 140 | WARN(1, "Missing register, driver bug"); | 
|---|
| 141 | return; | 
|---|
| 142 | } | 
|---|
| 143 |  | 
|---|
| 144 | xdp_unreg_mem_model(&xdp_rxq->mem); | 
|---|
| 145 | } | 
|---|
| 146 | EXPORT_SYMBOL_GPL(xdp_rxq_info_unreg_mem_model); | 
|---|
| 147 |  | 
|---|
| 148 | void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq) | 
|---|
| 149 | { | 
|---|
| 150 | /* Simplify driver cleanup code paths, allow unreg "unused" */ | 
|---|
| 151 | if (xdp_rxq->reg_state == REG_STATE_UNUSED) | 
|---|
| 152 | return; | 
|---|
| 153 |  | 
|---|
| 154 | xdp_rxq_info_unreg_mem_model(xdp_rxq); | 
|---|
| 155 |  | 
|---|
| 156 | xdp_rxq->reg_state = REG_STATE_UNREGISTERED; | 
|---|
| 157 | xdp_rxq->dev = NULL; | 
|---|
| 158 | } | 
|---|
| 159 | EXPORT_SYMBOL_GPL(xdp_rxq_info_unreg); | 
|---|
| 160 |  | 
|---|
| 161 | static void xdp_rxq_info_init(struct xdp_rxq_info *xdp_rxq) | 
|---|
| 162 | { | 
|---|
| 163 | memset(s: xdp_rxq, c: 0, n: sizeof(*xdp_rxq)); | 
|---|
| 164 | } | 
|---|
| 165 |  | 
|---|
| 166 | /* Returns 0 on success, negative on failure */ | 
|---|
| 167 | int __xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq, | 
|---|
| 168 | struct net_device *dev, u32 queue_index, | 
|---|
| 169 | unsigned int napi_id, u32 frag_size) | 
|---|
| 170 | { | 
|---|
| 171 | if (!dev) { | 
|---|
| 172 | WARN(1, "Missing net_device from driver"); | 
|---|
| 173 | return -ENODEV; | 
|---|
| 174 | } | 
|---|
| 175 |  | 
|---|
| 176 | if (xdp_rxq->reg_state == REG_STATE_UNUSED) { | 
|---|
| 177 | WARN(1, "Driver promised not to register this"); | 
|---|
| 178 | return -EINVAL; | 
|---|
| 179 | } | 
|---|
| 180 |  | 
|---|
| 181 | if (xdp_rxq->reg_state == REG_STATE_REGISTERED) { | 
|---|
| 182 | WARN(1, "Missing unregister, handled but fix driver"); | 
|---|
| 183 | xdp_rxq_info_unreg(xdp_rxq); | 
|---|
| 184 | } | 
|---|
| 185 |  | 
|---|
| 186 | /* State either UNREGISTERED or NEW */ | 
|---|
| 187 | xdp_rxq_info_init(xdp_rxq); | 
|---|
| 188 | xdp_rxq->dev = dev; | 
|---|
| 189 | xdp_rxq->queue_index = queue_index; | 
|---|
| 190 | xdp_rxq->frag_size = frag_size; | 
|---|
| 191 |  | 
|---|
| 192 | xdp_rxq->reg_state = REG_STATE_REGISTERED; | 
|---|
| 193 | return 0; | 
|---|
| 194 | } | 
|---|
| 195 | EXPORT_SYMBOL_GPL(__xdp_rxq_info_reg); | 
|---|
| 196 |  | 
|---|
| 197 | void xdp_rxq_info_unused(struct xdp_rxq_info *xdp_rxq) | 
|---|
| 198 | { | 
|---|
| 199 | xdp_rxq->reg_state = REG_STATE_UNUSED; | 
|---|
| 200 | } | 
|---|
| 201 | EXPORT_SYMBOL_GPL(xdp_rxq_info_unused); | 
|---|
| 202 |  | 
|---|
| 203 | bool xdp_rxq_info_is_reg(struct xdp_rxq_info *xdp_rxq) | 
|---|
| 204 | { | 
|---|
| 205 | return (xdp_rxq->reg_state == REG_STATE_REGISTERED); | 
|---|
| 206 | } | 
|---|
| 207 | EXPORT_SYMBOL_GPL(xdp_rxq_info_is_reg); | 
|---|
| 208 |  | 
|---|
| 209 | static int __mem_id_init_hash_table(void) | 
|---|
| 210 | { | 
|---|
| 211 | struct rhashtable *rht; | 
|---|
| 212 | int ret; | 
|---|
| 213 |  | 
|---|
| 214 | if (unlikely(mem_id_init)) | 
|---|
| 215 | return 0; | 
|---|
| 216 |  | 
|---|
| 217 | rht = kzalloc(sizeof(*rht), GFP_KERNEL); | 
|---|
| 218 | if (!rht) | 
|---|
| 219 | return -ENOMEM; | 
|---|
| 220 |  | 
|---|
| 221 | ret = rhashtable_init(rht, &mem_id_rht_params); | 
|---|
| 222 | if (ret < 0) { | 
|---|
| 223 | kfree(objp: rht); | 
|---|
| 224 | return ret; | 
|---|
| 225 | } | 
|---|
| 226 | mem_id_ht = rht; | 
|---|
| 227 | smp_mb(); /* mutex lock should provide enough pairing */ | 
|---|
| 228 | mem_id_init = true; | 
|---|
| 229 |  | 
|---|
| 230 | return 0; | 
|---|
| 231 | } | 
|---|
| 232 |  | 
|---|
| 233 | /* Allocate a cyclic ID that maps to allocator pointer. | 
|---|
| 234 | * See: https://www.kernel.org/doc/html/latest/core-api/idr.html | 
|---|
| 235 | * | 
|---|
| 236 | * Caller must lock mem_id_lock. | 
|---|
| 237 | */ | 
|---|
| 238 | static int __mem_id_cyclic_get(gfp_t gfp) | 
|---|
| 239 | { | 
|---|
| 240 | int retries = 1; | 
|---|
| 241 | int id; | 
|---|
| 242 |  | 
|---|
| 243 | again: | 
|---|
| 244 | id = ida_alloc_range(&mem_id_pool, min: mem_id_next, MEM_ID_MAX - 1, gfp); | 
|---|
| 245 | if (id < 0) { | 
|---|
| 246 | if (id == -ENOSPC) { | 
|---|
| 247 | /* Cyclic allocator, reset next id */ | 
|---|
| 248 | if (retries--) { | 
|---|
| 249 | mem_id_next = MEM_ID_MIN; | 
|---|
| 250 | goto again; | 
|---|
| 251 | } | 
|---|
| 252 | } | 
|---|
| 253 | return id; /* errno */ | 
|---|
| 254 | } | 
|---|
| 255 | mem_id_next = id + 1; | 
|---|
| 256 |  | 
|---|
| 257 | return id; | 
|---|
| 258 | } | 
|---|
| 259 |  | 
|---|
| 260 | static bool __is_supported_mem_type(enum xdp_mem_type type) | 
|---|
| 261 | { | 
|---|
| 262 | if (type == MEM_TYPE_PAGE_POOL) | 
|---|
| 263 | return is_page_pool_compiled_in(); | 
|---|
| 264 |  | 
|---|
| 265 | if (type >= MEM_TYPE_MAX) | 
|---|
| 266 | return false; | 
|---|
| 267 |  | 
|---|
| 268 | return true; | 
|---|
| 269 | } | 
|---|
| 270 |  | 
|---|
| 271 | static struct xdp_mem_allocator *__xdp_reg_mem_model(struct xdp_mem_info *mem, | 
|---|
| 272 | enum xdp_mem_type type, | 
|---|
| 273 | void *allocator) | 
|---|
| 274 | { | 
|---|
| 275 | struct xdp_mem_allocator *xdp_alloc; | 
|---|
| 276 | gfp_t gfp = GFP_KERNEL; | 
|---|
| 277 | int id, errno, ret; | 
|---|
| 278 | void *ptr; | 
|---|
| 279 |  | 
|---|
| 280 | if (!__is_supported_mem_type(type)) | 
|---|
| 281 | return ERR_PTR(error: -EOPNOTSUPP); | 
|---|
| 282 |  | 
|---|
| 283 | mem->type = type; | 
|---|
| 284 |  | 
|---|
| 285 | if (!allocator) { | 
|---|
| 286 | if (type == MEM_TYPE_PAGE_POOL) | 
|---|
| 287 | return ERR_PTR(error: -EINVAL); /* Setup time check page_pool req */ | 
|---|
| 288 | return NULL; | 
|---|
| 289 | } | 
|---|
| 290 |  | 
|---|
| 291 | /* Delay init of rhashtable to save memory if feature isn't used */ | 
|---|
| 292 | if (!mem_id_init) { | 
|---|
| 293 | mutex_lock(lock: &mem_id_lock); | 
|---|
| 294 | ret = __mem_id_init_hash_table(); | 
|---|
| 295 | mutex_unlock(lock: &mem_id_lock); | 
|---|
| 296 | if (ret < 0) | 
|---|
| 297 | return ERR_PTR(error: ret); | 
|---|
| 298 | } | 
|---|
| 299 |  | 
|---|
| 300 | xdp_alloc = kzalloc(sizeof(*xdp_alloc), gfp); | 
|---|
| 301 | if (!xdp_alloc) | 
|---|
| 302 | return ERR_PTR(error: -ENOMEM); | 
|---|
| 303 |  | 
|---|
| 304 | mutex_lock(lock: &mem_id_lock); | 
|---|
| 305 | id = __mem_id_cyclic_get(gfp); | 
|---|
| 306 | if (id < 0) { | 
|---|
| 307 | errno = id; | 
|---|
| 308 | goto err; | 
|---|
| 309 | } | 
|---|
| 310 | mem->id = id; | 
|---|
| 311 | xdp_alloc->mem = *mem; | 
|---|
| 312 | xdp_alloc->allocator = allocator; | 
|---|
| 313 |  | 
|---|
| 314 | /* Insert allocator into ID lookup table */ | 
|---|
| 315 | ptr = rhashtable_insert_slow(ht: mem_id_ht, key: &id, obj: &xdp_alloc->node); | 
|---|
| 316 | if (IS_ERR(ptr)) { | 
|---|
| 317 | ida_free(&mem_id_pool, id: mem->id); | 
|---|
| 318 | mem->id = 0; | 
|---|
| 319 | errno = PTR_ERR(ptr); | 
|---|
| 320 | goto err; | 
|---|
| 321 | } | 
|---|
| 322 |  | 
|---|
| 323 | if (type == MEM_TYPE_PAGE_POOL) | 
|---|
| 324 | page_pool_use_xdp_mem(pool: allocator, disconnect: mem_allocator_disconnect, mem); | 
|---|
| 325 |  | 
|---|
| 326 | mutex_unlock(lock: &mem_id_lock); | 
|---|
| 327 |  | 
|---|
| 328 | return xdp_alloc; | 
|---|
| 329 | err: | 
|---|
| 330 | mutex_unlock(lock: &mem_id_lock); | 
|---|
| 331 | kfree(objp: xdp_alloc); | 
|---|
| 332 | return ERR_PTR(error: errno); | 
|---|
| 333 | } | 
|---|
| 334 |  | 
|---|
| 335 | int xdp_reg_mem_model(struct xdp_mem_info *mem, | 
|---|
| 336 | enum xdp_mem_type type, void *allocator) | 
|---|
| 337 | { | 
|---|
| 338 | struct xdp_mem_allocator *xdp_alloc; | 
|---|
| 339 |  | 
|---|
| 340 | xdp_alloc = __xdp_reg_mem_model(mem, type, allocator); | 
|---|
| 341 | if (IS_ERR(ptr: xdp_alloc)) | 
|---|
| 342 | return PTR_ERR(ptr: xdp_alloc); | 
|---|
| 343 | return 0; | 
|---|
| 344 | } | 
|---|
| 345 | EXPORT_SYMBOL_GPL(xdp_reg_mem_model); | 
|---|
| 346 |  | 
|---|
| 347 | int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq, | 
|---|
| 348 | enum xdp_mem_type type, void *allocator) | 
|---|
| 349 | { | 
|---|
| 350 | struct xdp_mem_allocator *xdp_alloc; | 
|---|
| 351 |  | 
|---|
| 352 | if (xdp_rxq->reg_state != REG_STATE_REGISTERED) { | 
|---|
| 353 | WARN(1, "Missing register, driver bug"); | 
|---|
| 354 | return -EFAULT; | 
|---|
| 355 | } | 
|---|
| 356 |  | 
|---|
| 357 | xdp_alloc = __xdp_reg_mem_model(mem: &xdp_rxq->mem, type, allocator); | 
|---|
| 358 | if (IS_ERR(ptr: xdp_alloc)) | 
|---|
| 359 | return PTR_ERR(ptr: xdp_alloc); | 
|---|
| 360 |  | 
|---|
| 361 | if (type == MEM_TYPE_XSK_BUFF_POOL && allocator) | 
|---|
| 362 | xsk_pool_set_rxq_info(pool: allocator, rxq: xdp_rxq); | 
|---|
| 363 |  | 
|---|
| 364 | if (trace_mem_connect_enabled() && xdp_alloc) | 
|---|
| 365 | trace_mem_connect(xa: xdp_alloc, rxq: xdp_rxq); | 
|---|
| 366 | return 0; | 
|---|
| 367 | } | 
|---|
| 368 |  | 
|---|
| 369 | EXPORT_SYMBOL_GPL(xdp_rxq_info_reg_mem_model); | 
|---|
| 370 |  | 
|---|
| 371 | /** | 
|---|
| 372 | * xdp_reg_page_pool - register &page_pool as a memory provider for XDP | 
|---|
| 373 | * @pool: &page_pool to register | 
|---|
| 374 | * | 
|---|
| 375 | * Can be used to register pools manually without connecting to any XDP RxQ | 
|---|
| 376 | * info, so that the XDP layer will be aware of them. Then, they can be | 
|---|
| 377 | * attached to an RxQ info manually via xdp_rxq_info_attach_page_pool(). | 
|---|
| 378 | * | 
|---|
| 379 | * Return: %0 on success, -errno on error. | 
|---|
| 380 | */ | 
|---|
| 381 | int xdp_reg_page_pool(struct page_pool *pool) | 
|---|
| 382 | { | 
|---|
| 383 | struct xdp_mem_info mem; | 
|---|
| 384 |  | 
|---|
| 385 | return xdp_reg_mem_model(&mem, MEM_TYPE_PAGE_POOL, pool); | 
|---|
| 386 | } | 
|---|
| 387 | EXPORT_SYMBOL_GPL(xdp_reg_page_pool); | 
|---|
| 388 |  | 
|---|
| 389 | /** | 
|---|
| 390 | * xdp_unreg_page_pool - unregister &page_pool from the memory providers list | 
|---|
| 391 | * @pool: &page_pool to unregister | 
|---|
| 392 | * | 
|---|
| 393 | * A shorthand for manual unregistering page pools. If the pool was previously | 
|---|
| 394 | * attached to an RxQ info, it must be detached first. | 
|---|
| 395 | */ | 
|---|
| 396 | void xdp_unreg_page_pool(const struct page_pool *pool) | 
|---|
| 397 | { | 
|---|
| 398 | struct xdp_mem_info mem = { | 
|---|
| 399 | .type	= MEM_TYPE_PAGE_POOL, | 
|---|
| 400 | .id	= pool->xdp_mem_id, | 
|---|
| 401 | }; | 
|---|
| 402 |  | 
|---|
| 403 | xdp_unreg_mem_model(&mem); | 
|---|
| 404 | } | 
|---|
| 405 | EXPORT_SYMBOL_GPL(xdp_unreg_page_pool); | 
|---|
| 406 |  | 
|---|
| 407 | /** | 
|---|
| 408 | * xdp_rxq_info_attach_page_pool - attach registered pool to RxQ info | 
|---|
| 409 | * @xdp_rxq: XDP RxQ info to attach the pool to | 
|---|
| 410 | * @pool: pool to attach | 
|---|
| 411 | * | 
|---|
| 412 | * If the pool was registered manually, this function must be called instead | 
|---|
| 413 | * of xdp_rxq_info_reg_mem_model() to connect it to the RxQ info. | 
|---|
| 414 | */ | 
|---|
| 415 | void xdp_rxq_info_attach_page_pool(struct xdp_rxq_info *xdp_rxq, | 
|---|
| 416 | const struct page_pool *pool) | 
|---|
| 417 | { | 
|---|
| 418 | struct xdp_mem_info mem = { | 
|---|
| 419 | .type	= MEM_TYPE_PAGE_POOL, | 
|---|
| 420 | .id	= pool->xdp_mem_id, | 
|---|
| 421 | }; | 
|---|
| 422 |  | 
|---|
| 423 | xdp_rxq_info_attach_mem_model(xdp_rxq, mem: &mem); | 
|---|
| 424 | } | 
|---|
| 425 | EXPORT_SYMBOL_GPL(xdp_rxq_info_attach_page_pool); | 
|---|
| 426 |  | 
|---|
| 427 | /* XDP RX runs under NAPI protection, and in different delivery error | 
|---|
| 428 | * scenarios (e.g. queue full), it is possible to return the xdp_frame | 
|---|
| 429 | * while still leveraging this protection.  The @napi_direct boolean | 
|---|
| 430 | * is used for those calls sites.  Thus, allowing for faster recycling | 
|---|
| 431 | * of xdp_frames/pages in those cases. | 
|---|
| 432 | */ | 
|---|
| 433 | void __xdp_return(netmem_ref netmem, enum xdp_mem_type mem_type, | 
|---|
| 434 | bool napi_direct, struct xdp_buff *xdp) | 
|---|
| 435 | { | 
|---|
| 436 | switch (mem_type) { | 
|---|
| 437 | case MEM_TYPE_PAGE_POOL: | 
|---|
| 438 | netmem = netmem_compound_head(netmem); | 
|---|
| 439 | if (napi_direct && xdp_return_frame_no_direct()) | 
|---|
| 440 | napi_direct = false; | 
|---|
| 441 | /* No need to check netmem_is_pp() as mem->type knows this a | 
|---|
| 442 | * page_pool page | 
|---|
| 443 | */ | 
|---|
| 444 | page_pool_put_full_netmem(pool: netmem_get_pp(netmem), netmem, | 
|---|
| 445 | allow_direct: napi_direct); | 
|---|
| 446 | break; | 
|---|
| 447 | case MEM_TYPE_PAGE_SHARED: | 
|---|
| 448 | page_frag_free(addr: __netmem_address(netmem)); | 
|---|
| 449 | break; | 
|---|
| 450 | case MEM_TYPE_PAGE_ORDER0: | 
|---|
| 451 | put_page(page: __netmem_to_page(netmem)); | 
|---|
| 452 | break; | 
|---|
| 453 | case MEM_TYPE_XSK_BUFF_POOL: | 
|---|
| 454 | /* NB! Only valid from an xdp_buff! */ | 
|---|
| 455 | xsk_buff_free(xdp); | 
|---|
| 456 | break; | 
|---|
| 457 | default: | 
|---|
| 458 | /* Not possible, checked in xdp_rxq_info_reg_mem_model() */ | 
|---|
| 459 | WARN(1, "Incorrect XDP memory type (%d) usage", mem_type); | 
|---|
| 460 | break; | 
|---|
| 461 | } | 
|---|
| 462 | } | 
|---|
| 463 |  | 
|---|
| 464 | void xdp_return_frame(struct xdp_frame *xdpf) | 
|---|
| 465 | { | 
|---|
| 466 | struct skb_shared_info *sinfo; | 
|---|
| 467 |  | 
|---|
| 468 | if (likely(!xdp_frame_has_frags(xdpf))) | 
|---|
| 469 | goto out; | 
|---|
| 470 |  | 
|---|
| 471 | sinfo = xdp_get_shared_info_from_frame(frame: xdpf); | 
|---|
| 472 | for (u32 i = 0; i < sinfo->nr_frags; i++) | 
|---|
| 473 | __xdp_return(netmem: skb_frag_netmem(frag: &sinfo->frags[i]), mem_type: xdpf->mem_type, | 
|---|
| 474 | napi_direct: false, NULL); | 
|---|
| 475 |  | 
|---|
| 476 | out: | 
|---|
| 477 | __xdp_return(netmem: virt_to_netmem(data: xdpf->data), mem_type: xdpf->mem_type, napi_direct: false, NULL); | 
|---|
| 478 | } | 
|---|
| 479 | EXPORT_SYMBOL_GPL(xdp_return_frame); | 
|---|
| 480 |  | 
|---|
| 481 | void xdp_return_frame_rx_napi(struct xdp_frame *xdpf) | 
|---|
| 482 | { | 
|---|
| 483 | struct skb_shared_info *sinfo; | 
|---|
| 484 |  | 
|---|
| 485 | if (likely(!xdp_frame_has_frags(xdpf))) | 
|---|
| 486 | goto out; | 
|---|
| 487 |  | 
|---|
| 488 | sinfo = xdp_get_shared_info_from_frame(frame: xdpf); | 
|---|
| 489 | for (u32 i = 0; i < sinfo->nr_frags; i++) | 
|---|
| 490 | __xdp_return(netmem: skb_frag_netmem(frag: &sinfo->frags[i]), mem_type: xdpf->mem_type, | 
|---|
| 491 | napi_direct: true, NULL); | 
|---|
| 492 |  | 
|---|
| 493 | out: | 
|---|
| 494 | __xdp_return(netmem: virt_to_netmem(data: xdpf->data), mem_type: xdpf->mem_type, napi_direct: true, NULL); | 
|---|
| 495 | } | 
|---|
| 496 | EXPORT_SYMBOL_GPL(xdp_return_frame_rx_napi); | 
|---|
| 497 |  | 
|---|
| 498 | /* XDP bulk APIs introduce a defer/flush mechanism to return | 
|---|
| 499 | * pages belonging to the same xdp_mem_allocator object | 
|---|
| 500 | * (identified via the mem.id field) in bulk to optimize | 
|---|
| 501 | * I-cache and D-cache. | 
|---|
| 502 | * The bulk queue size is set to 16 to be aligned to how | 
|---|
| 503 | * XDP_REDIRECT bulking works. The bulk is flushed when | 
|---|
| 504 | * it is full or when mem.id changes. | 
|---|
| 505 | * xdp_frame_bulk is usually stored/allocated on the function | 
|---|
| 506 | * call-stack to avoid locking penalties. | 
|---|
| 507 | */ | 
|---|
| 508 |  | 
|---|
| 509 | /* Must be called with rcu_read_lock held */ | 
|---|
| 510 | void xdp_return_frame_bulk(struct xdp_frame *xdpf, | 
|---|
| 511 | struct xdp_frame_bulk *bq) | 
|---|
| 512 | { | 
|---|
| 513 | if (xdpf->mem_type != MEM_TYPE_PAGE_POOL) { | 
|---|
| 514 | xdp_return_frame(xdpf); | 
|---|
| 515 | return; | 
|---|
| 516 | } | 
|---|
| 517 |  | 
|---|
| 518 | if (bq->count == XDP_BULK_QUEUE_SIZE) | 
|---|
| 519 | xdp_flush_frame_bulk(bq); | 
|---|
| 520 |  | 
|---|
| 521 | if (unlikely(xdp_frame_has_frags(xdpf))) { | 
|---|
| 522 | struct skb_shared_info *sinfo; | 
|---|
| 523 | int i; | 
|---|
| 524 |  | 
|---|
| 525 | sinfo = xdp_get_shared_info_from_frame(frame: xdpf); | 
|---|
| 526 | for (i = 0; i < sinfo->nr_frags; i++) { | 
|---|
| 527 | skb_frag_t *frag = &sinfo->frags[i]; | 
|---|
| 528 |  | 
|---|
| 529 | bq->q[bq->count++] = skb_frag_netmem(frag); | 
|---|
| 530 | if (bq->count == XDP_BULK_QUEUE_SIZE) | 
|---|
| 531 | xdp_flush_frame_bulk(bq); | 
|---|
| 532 | } | 
|---|
| 533 | } | 
|---|
| 534 | bq->q[bq->count++] = virt_to_netmem(data: xdpf->data); | 
|---|
| 535 | } | 
|---|
| 536 | EXPORT_SYMBOL_GPL(xdp_return_frame_bulk); | 
|---|
| 537 |  | 
|---|
| 538 | /** | 
|---|
| 539 | * xdp_return_frag -- free one XDP frag or decrement its refcount | 
|---|
| 540 | * @netmem: network memory reference to release | 
|---|
| 541 | * @xdp: &xdp_buff to release the frag for | 
|---|
| 542 | */ | 
|---|
| 543 | void xdp_return_frag(netmem_ref netmem, const struct xdp_buff *xdp) | 
|---|
| 544 | { | 
|---|
| 545 | __xdp_return(netmem, mem_type: xdp->rxq->mem.type, napi_direct: true, NULL); | 
|---|
| 546 | } | 
|---|
| 547 | EXPORT_SYMBOL_GPL(xdp_return_frag); | 
|---|
| 548 |  | 
|---|
| 549 | void xdp_return_buff(struct xdp_buff *xdp) | 
|---|
| 550 | { | 
|---|
| 551 | struct skb_shared_info *sinfo; | 
|---|
| 552 |  | 
|---|
| 553 | if (likely(!xdp_buff_has_frags(xdp))) | 
|---|
| 554 | goto out; | 
|---|
| 555 |  | 
|---|
| 556 | sinfo = xdp_get_shared_info_from_buff(xdp); | 
|---|
| 557 | for (u32 i = 0; i < sinfo->nr_frags; i++) | 
|---|
| 558 | __xdp_return(netmem: skb_frag_netmem(frag: &sinfo->frags[i]), | 
|---|
| 559 | mem_type: xdp->rxq->mem.type, napi_direct: true, xdp); | 
|---|
| 560 |  | 
|---|
| 561 | out: | 
|---|
| 562 | __xdp_return(netmem: virt_to_netmem(data: xdp->data), mem_type: xdp->rxq->mem.type, napi_direct: true, xdp); | 
|---|
| 563 | } | 
|---|
| 564 | EXPORT_SYMBOL_GPL(xdp_return_buff); | 
|---|
| 565 |  | 
|---|
| 566 | void xdp_attachment_setup(struct xdp_attachment_info *info, | 
|---|
| 567 | struct netdev_bpf *bpf) | 
|---|
| 568 | { | 
|---|
| 569 | if (info->prog) | 
|---|
| 570 | bpf_prog_put(prog: info->prog); | 
|---|
| 571 | info->prog = bpf->prog; | 
|---|
| 572 | info->flags = bpf->flags; | 
|---|
| 573 | } | 
|---|
| 574 | EXPORT_SYMBOL_GPL(xdp_attachment_setup); | 
|---|
| 575 |  | 
|---|
| 576 | struct xdp_frame *xdp_convert_zc_to_xdp_frame(struct xdp_buff *xdp) | 
|---|
| 577 | { | 
|---|
| 578 | unsigned int metasize, totsize; | 
|---|
| 579 | void *addr, *data_to_copy; | 
|---|
| 580 | struct xdp_frame *xdpf; | 
|---|
| 581 | struct page *page; | 
|---|
| 582 |  | 
|---|
| 583 | /* Clone into a MEM_TYPE_PAGE_ORDER0 xdp_frame. */ | 
|---|
| 584 | metasize = xdp_data_meta_unsupported(xdp) ? 0 : | 
|---|
| 585 | xdp->data - xdp->data_meta; | 
|---|
| 586 | totsize = xdp->data_end - xdp->data + metasize; | 
|---|
| 587 |  | 
|---|
| 588 | if (sizeof(*xdpf) + totsize > PAGE_SIZE) | 
|---|
| 589 | return NULL; | 
|---|
| 590 |  | 
|---|
| 591 | page = dev_alloc_page(); | 
|---|
| 592 | if (!page) | 
|---|
| 593 | return NULL; | 
|---|
| 594 |  | 
|---|
| 595 | addr = page_to_virt(page); | 
|---|
| 596 | xdpf = addr; | 
|---|
| 597 | memset(s: xdpf, c: 0, n: sizeof(*xdpf)); | 
|---|
| 598 |  | 
|---|
| 599 | addr += sizeof(*xdpf); | 
|---|
| 600 | data_to_copy = metasize ? xdp->data_meta : xdp->data; | 
|---|
| 601 | memcpy(to: addr, from: data_to_copy, len: totsize); | 
|---|
| 602 |  | 
|---|
| 603 | xdpf->data = addr + metasize; | 
|---|
| 604 | xdpf->len = totsize - metasize; | 
|---|
| 605 | xdpf->headroom = 0; | 
|---|
| 606 | xdpf->metasize = metasize; | 
|---|
| 607 | xdpf->frame_sz = PAGE_SIZE; | 
|---|
| 608 | xdpf->mem_type = MEM_TYPE_PAGE_ORDER0; | 
|---|
| 609 |  | 
|---|
| 610 | xsk_buff_free(xdp); | 
|---|
| 611 | return xdpf; | 
|---|
| 612 | } | 
|---|
| 613 | EXPORT_SYMBOL_GPL(xdp_convert_zc_to_xdp_frame); | 
|---|
| 614 |  | 
|---|
| 615 | /* Used by XDP_WARN macro, to avoid inlining WARN() in fast-path */ | 
|---|
| 616 | void xdp_warn(const char *msg, const char *func, const int line) | 
|---|
| 617 | { | 
|---|
| 618 | WARN(1, "XDP_WARN: %s(line:%d): %s\n", func, line, msg); | 
|---|
| 619 | }; | 
|---|
| 620 | EXPORT_SYMBOL_GPL(xdp_warn); | 
|---|
| 621 |  | 
|---|
| 622 | /** | 
|---|
| 623 | * xdp_build_skb_from_buff - create an skb from &xdp_buff | 
|---|
| 624 | * @xdp: &xdp_buff to convert to an skb | 
|---|
| 625 | * | 
|---|
| 626 | * Perform common operations to create a new skb to pass up the stack from | 
|---|
| 627 | * &xdp_buff: allocate an skb head from the NAPI percpu cache, initialize | 
|---|
| 628 | * skb data pointers and offsets, set the recycle bit if the buff is | 
|---|
| 629 | * PP-backed, Rx queue index, protocol and update frags info. | 
|---|
| 630 | * | 
|---|
| 631 | * Return: new &sk_buff on success, %NULL on error. | 
|---|
| 632 | */ | 
|---|
| 633 | struct sk_buff *xdp_build_skb_from_buff(const struct xdp_buff *xdp) | 
|---|
| 634 | { | 
|---|
| 635 | const struct xdp_rxq_info *rxq = xdp->rxq; | 
|---|
| 636 | const struct skb_shared_info *sinfo; | 
|---|
| 637 | struct sk_buff *skb; | 
|---|
| 638 | u32 nr_frags = 0; | 
|---|
| 639 | int metalen; | 
|---|
| 640 |  | 
|---|
| 641 | if (unlikely(xdp_buff_has_frags(xdp))) { | 
|---|
| 642 | sinfo = xdp_get_shared_info_from_buff(xdp); | 
|---|
| 643 | nr_frags = sinfo->nr_frags; | 
|---|
| 644 | } | 
|---|
| 645 |  | 
|---|
| 646 | skb = napi_build_skb(data: xdp->data_hard_start, frag_size: xdp->frame_sz); | 
|---|
| 647 | if (unlikely(!skb)) | 
|---|
| 648 | return NULL; | 
|---|
| 649 |  | 
|---|
| 650 | skb_reserve(skb, len: xdp->data - xdp->data_hard_start); | 
|---|
| 651 | __skb_put(skb, len: xdp->data_end - xdp->data); | 
|---|
| 652 |  | 
|---|
| 653 | metalen = xdp->data - xdp->data_meta; | 
|---|
| 654 | if (metalen > 0) | 
|---|
| 655 | skb_metadata_set(skb, meta_len: metalen); | 
|---|
| 656 |  | 
|---|
| 657 | if (rxq->mem.type == MEM_TYPE_PAGE_POOL) | 
|---|
| 658 | skb_mark_for_recycle(skb); | 
|---|
| 659 |  | 
|---|
| 660 | skb_record_rx_queue(skb, rx_queue: rxq->queue_index); | 
|---|
| 661 |  | 
|---|
| 662 | if (unlikely(nr_frags)) { | 
|---|
| 663 | u32 tsize; | 
|---|
| 664 |  | 
|---|
| 665 | tsize = sinfo->xdp_frags_truesize ? : nr_frags * xdp->frame_sz; | 
|---|
| 666 | xdp_update_skb_frags_info(skb, nr_frags, size: sinfo->xdp_frags_size, | 
|---|
| 667 | truesize: tsize, xdp_flags: xdp_buff_get_skb_flags(xdp)); | 
|---|
| 668 | } | 
|---|
| 669 |  | 
|---|
| 670 | skb->protocol = eth_type_trans(skb, dev: rxq->dev); | 
|---|
| 671 |  | 
|---|
| 672 | return skb; | 
|---|
| 673 | } | 
|---|
| 674 | EXPORT_SYMBOL_GPL(xdp_build_skb_from_buff); | 
|---|
| 675 |  | 
|---|
| 676 | /** | 
|---|
| 677 | * xdp_copy_frags_from_zc - copy frags from XSk buff to skb | 
|---|
| 678 | * @skb: skb to copy frags to | 
|---|
| 679 | * @xdp: XSk &xdp_buff from which the frags will be copied | 
|---|
| 680 | * @pp: &page_pool backing page allocation, if available | 
|---|
| 681 | * | 
|---|
| 682 | * Copy all frags from XSk &xdp_buff to the skb to pass it up the stack. | 
|---|
| 683 | * Allocate a new buffer for each frag, copy it and attach to the skb. | 
|---|
| 684 | * | 
|---|
| 685 | * Return: true on success, false on netmem allocation fail. | 
|---|
| 686 | */ | 
|---|
| 687 | static noinline bool xdp_copy_frags_from_zc(struct sk_buff *skb, | 
|---|
| 688 | const struct xdp_buff *xdp, | 
|---|
| 689 | struct page_pool *pp) | 
|---|
| 690 | { | 
|---|
| 691 | struct skb_shared_info *sinfo = skb_shinfo(skb); | 
|---|
| 692 | const struct skb_shared_info *xinfo; | 
|---|
| 693 | u32 nr_frags, tsize = 0; | 
|---|
| 694 | u32 flags = 0; | 
|---|
| 695 |  | 
|---|
| 696 | xinfo = xdp_get_shared_info_from_buff(xdp); | 
|---|
| 697 | nr_frags = xinfo->nr_frags; | 
|---|
| 698 |  | 
|---|
| 699 | for (u32 i = 0; i < nr_frags; i++) { | 
|---|
| 700 | const skb_frag_t *frag = &xinfo->frags[i]; | 
|---|
| 701 | u32 len = skb_frag_size(frag); | 
|---|
| 702 | u32 offset, truesize = len; | 
|---|
| 703 | struct page *page; | 
|---|
| 704 |  | 
|---|
| 705 | page = page_pool_dev_alloc(pool: pp, offset: &offset, size: &truesize); | 
|---|
| 706 | if (unlikely(!page)) { | 
|---|
| 707 | sinfo->nr_frags = i; | 
|---|
| 708 | return false; | 
|---|
| 709 | } | 
|---|
| 710 |  | 
|---|
| 711 | memcpy(page_address(page) + offset, from: skb_frag_address(frag), | 
|---|
| 712 | LARGEST_ALIGN(len)); | 
|---|
| 713 | __skb_fill_page_desc_noacc(shinfo: sinfo, i, page, off: offset, size: len); | 
|---|
| 714 |  | 
|---|
| 715 | tsize += truesize; | 
|---|
| 716 | if (page_is_pfmemalloc(page)) | 
|---|
| 717 | flags |= XDP_FLAGS_FRAGS_PF_MEMALLOC; | 
|---|
| 718 | } | 
|---|
| 719 |  | 
|---|
| 720 | xdp_update_skb_frags_info(skb, nr_frags, size: xinfo->xdp_frags_size, truesize: tsize, | 
|---|
| 721 | xdp_flags: flags); | 
|---|
| 722 |  | 
|---|
| 723 | return true; | 
|---|
| 724 | } | 
|---|
| 725 |  | 
|---|
| 726 | /** | 
|---|
| 727 | * xdp_build_skb_from_zc - create an skb from XSk &xdp_buff | 
|---|
| 728 | * @xdp: source XSk buff | 
|---|
| 729 | * | 
|---|
| 730 | * Similar to xdp_build_skb_from_buff(), but for XSk frames. Allocate an skb | 
|---|
| 731 | * head, new buffer for the head, copy the data and initialize the skb fields. | 
|---|
| 732 | * If there are frags, allocate new buffers for them and copy. | 
|---|
| 733 | * Buffers are allocated from the system percpu pools to try recycling them. | 
|---|
| 734 | * If new skb was built successfully, @xdp is returned to XSk pool's freelist. | 
|---|
| 735 | * On error, it remains untouched and the caller must take care of this. | 
|---|
| 736 | * | 
|---|
| 737 | * Return: new &sk_buff on success, %NULL on error. | 
|---|
| 738 | */ | 
|---|
| 739 | struct sk_buff *xdp_build_skb_from_zc(struct xdp_buff *xdp) | 
|---|
| 740 | { | 
|---|
| 741 | const struct xdp_rxq_info *rxq = xdp->rxq; | 
|---|
| 742 | u32 len = xdp->data_end - xdp->data_meta; | 
|---|
| 743 | u32 truesize = xdp->frame_sz; | 
|---|
| 744 | struct sk_buff *skb = NULL; | 
|---|
| 745 | struct page_pool *pp; | 
|---|
| 746 | int metalen; | 
|---|
| 747 | void *data; | 
|---|
| 748 |  | 
|---|
| 749 | if (!IS_ENABLED(CONFIG_PAGE_POOL)) | 
|---|
| 750 | return NULL; | 
|---|
| 751 |  | 
|---|
| 752 | local_lock_nested_bh(&system_page_pool.bh_lock); | 
|---|
| 753 | pp = this_cpu_read(system_page_pool.pool); | 
|---|
| 754 | data = page_pool_dev_alloc_va(pool: pp, size: &truesize); | 
|---|
| 755 | if (unlikely(!data)) | 
|---|
| 756 | goto out; | 
|---|
| 757 |  | 
|---|
| 758 | skb = napi_build_skb(data, frag_size: truesize); | 
|---|
| 759 | if (unlikely(!skb)) { | 
|---|
| 760 | page_pool_free_va(pool: pp, va: data, allow_direct: true); | 
|---|
| 761 | goto out; | 
|---|
| 762 | } | 
|---|
| 763 |  | 
|---|
| 764 | skb_mark_for_recycle(skb); | 
|---|
| 765 | skb_reserve(skb, len: xdp->data_meta - xdp->data_hard_start); | 
|---|
| 766 |  | 
|---|
| 767 | memcpy(to: __skb_put(skb, len), from: xdp->data_meta, LARGEST_ALIGN(len)); | 
|---|
| 768 |  | 
|---|
| 769 | metalen = xdp->data - xdp->data_meta; | 
|---|
| 770 | if (metalen > 0) { | 
|---|
| 771 | skb_metadata_set(skb, meta_len: metalen); | 
|---|
| 772 | __skb_pull(skb, len: metalen); | 
|---|
| 773 | } | 
|---|
| 774 |  | 
|---|
| 775 | skb_record_rx_queue(skb, rx_queue: rxq->queue_index); | 
|---|
| 776 |  | 
|---|
| 777 | if (unlikely(xdp_buff_has_frags(xdp)) && | 
|---|
| 778 | unlikely(!xdp_copy_frags_from_zc(skb, xdp, pp))) { | 
|---|
| 779 | napi_consume_skb(skb, budget: true); | 
|---|
| 780 | skb = NULL; | 
|---|
| 781 | goto out; | 
|---|
| 782 | } | 
|---|
| 783 |  | 
|---|
| 784 | xsk_buff_free(xdp); | 
|---|
| 785 |  | 
|---|
| 786 | skb->protocol = eth_type_trans(skb, dev: rxq->dev); | 
|---|
| 787 |  | 
|---|
| 788 | out: | 
|---|
| 789 | local_unlock_nested_bh(&system_page_pool.bh_lock); | 
|---|
| 790 | return skb; | 
|---|
| 791 | } | 
|---|
| 792 | EXPORT_SYMBOL_GPL(xdp_build_skb_from_zc); | 
|---|
| 793 |  | 
|---|
| 794 | struct sk_buff *__xdp_build_skb_from_frame(struct xdp_frame *xdpf, | 
|---|
| 795 | struct sk_buff *skb, | 
|---|
| 796 | struct net_device *dev) | 
|---|
| 797 | { | 
|---|
| 798 | struct skb_shared_info *sinfo = xdp_get_shared_info_from_frame(frame: xdpf); | 
|---|
| 799 | unsigned int headroom, frame_size; | 
|---|
| 800 | void *hard_start; | 
|---|
| 801 | u8 nr_frags; | 
|---|
| 802 |  | 
|---|
| 803 | /* xdp frags frame */ | 
|---|
| 804 | if (unlikely(xdp_frame_has_frags(xdpf))) | 
|---|
| 805 | nr_frags = sinfo->nr_frags; | 
|---|
| 806 |  | 
|---|
| 807 | /* Part of headroom was reserved to xdpf */ | 
|---|
| 808 | headroom = sizeof(*xdpf) + xdpf->headroom; | 
|---|
| 809 |  | 
|---|
| 810 | /* Memory size backing xdp_frame data already have reserved | 
|---|
| 811 | * room for build_skb to place skb_shared_info in tailroom. | 
|---|
| 812 | */ | 
|---|
| 813 | frame_size = xdpf->frame_sz; | 
|---|
| 814 |  | 
|---|
| 815 | hard_start = xdpf->data - headroom; | 
|---|
| 816 | skb = build_skb_around(skb, data: hard_start, frag_size: frame_size); | 
|---|
| 817 | if (unlikely(!skb)) | 
|---|
| 818 | return NULL; | 
|---|
| 819 |  | 
|---|
| 820 | skb_reserve(skb, len: headroom); | 
|---|
| 821 | __skb_put(skb, len: xdpf->len); | 
|---|
| 822 | if (xdpf->metasize) | 
|---|
| 823 | skb_metadata_set(skb, meta_len: xdpf->metasize); | 
|---|
| 824 |  | 
|---|
| 825 | if (unlikely(xdp_frame_has_frags(xdpf))) | 
|---|
| 826 | xdp_update_skb_frags_info(skb, nr_frags, size: sinfo->xdp_frags_size, | 
|---|
| 827 | truesize: nr_frags * xdpf->frame_sz, | 
|---|
| 828 | xdp_flags: xdp_frame_get_skb_flags(frame: xdpf)); | 
|---|
| 829 |  | 
|---|
| 830 | /* Essential SKB info: protocol and skb->dev */ | 
|---|
| 831 | skb->protocol = eth_type_trans(skb, dev); | 
|---|
| 832 |  | 
|---|
| 833 | /* Optional SKB info, currently missing: | 
|---|
| 834 | * - HW checksum info		(skb->ip_summed) | 
|---|
| 835 | * - HW RX hash			(skb_set_hash) | 
|---|
| 836 | * - RX ring dev queue index	(skb_record_rx_queue) | 
|---|
| 837 | */ | 
|---|
| 838 |  | 
|---|
| 839 | if (xdpf->mem_type == MEM_TYPE_PAGE_POOL) | 
|---|
| 840 | skb_mark_for_recycle(skb); | 
|---|
| 841 |  | 
|---|
| 842 | /* Allow SKB to reuse area used by xdp_frame */ | 
|---|
| 843 | xdp_scrub_frame(frame: xdpf); | 
|---|
| 844 |  | 
|---|
| 845 | return skb; | 
|---|
| 846 | } | 
|---|
| 847 | EXPORT_SYMBOL_GPL(__xdp_build_skb_from_frame); | 
|---|
| 848 |  | 
|---|
| 849 | struct sk_buff *xdp_build_skb_from_frame(struct xdp_frame *xdpf, | 
|---|
| 850 | struct net_device *dev) | 
|---|
| 851 | { | 
|---|
| 852 | struct sk_buff *skb; | 
|---|
| 853 |  | 
|---|
| 854 | skb = kmem_cache_alloc(net_hotdata.skbuff_cache, GFP_ATOMIC); | 
|---|
| 855 | if (unlikely(!skb)) | 
|---|
| 856 | return NULL; | 
|---|
| 857 |  | 
|---|
| 858 | memset(s: skb, c: 0, offsetof(struct sk_buff, tail)); | 
|---|
| 859 |  | 
|---|
| 860 | return __xdp_build_skb_from_frame(xdpf, skb, dev); | 
|---|
| 861 | } | 
|---|
| 862 | EXPORT_SYMBOL_GPL(xdp_build_skb_from_frame); | 
|---|
| 863 |  | 
|---|
| 864 | struct xdp_frame *xdpf_clone(struct xdp_frame *xdpf) | 
|---|
| 865 | { | 
|---|
| 866 | unsigned int headroom, totalsize; | 
|---|
| 867 | struct xdp_frame *nxdpf; | 
|---|
| 868 | struct page *page; | 
|---|
| 869 | void *addr; | 
|---|
| 870 |  | 
|---|
| 871 | headroom = xdpf->headroom + sizeof(*xdpf); | 
|---|
| 872 | totalsize = headroom + xdpf->len; | 
|---|
| 873 |  | 
|---|
| 874 | if (unlikely(totalsize > PAGE_SIZE)) | 
|---|
| 875 | return NULL; | 
|---|
| 876 | page = dev_alloc_page(); | 
|---|
| 877 | if (!page) | 
|---|
| 878 | return NULL; | 
|---|
| 879 | addr = page_to_virt(page); | 
|---|
| 880 |  | 
|---|
| 881 | memcpy(to: addr, from: xdpf, len: totalsize); | 
|---|
| 882 |  | 
|---|
| 883 | nxdpf = addr; | 
|---|
| 884 | nxdpf->data = addr + headroom; | 
|---|
| 885 | nxdpf->frame_sz = PAGE_SIZE; | 
|---|
| 886 | nxdpf->mem_type = MEM_TYPE_PAGE_ORDER0; | 
|---|
| 887 |  | 
|---|
| 888 | return nxdpf; | 
|---|
| 889 | } | 
|---|
| 890 |  | 
|---|
| 891 | __bpf_kfunc_start_defs(); | 
|---|
| 892 |  | 
|---|
| 893 | /** | 
|---|
| 894 | * bpf_xdp_metadata_rx_timestamp - Read XDP frame RX timestamp. | 
|---|
| 895 | * @ctx: XDP context pointer. | 
|---|
| 896 | * @timestamp: Return value pointer. | 
|---|
| 897 | * | 
|---|
| 898 | * Return: | 
|---|
| 899 | * * Returns 0 on success or ``-errno`` on error. | 
|---|
| 900 | * * ``-EOPNOTSUPP`` : means device driver does not implement kfunc | 
|---|
| 901 | * * ``-ENODATA``    : means no RX-timestamp available for this frame | 
|---|
| 902 | */ | 
|---|
| 903 | __bpf_kfunc int bpf_xdp_metadata_rx_timestamp(const struct xdp_md *ctx, u64 *timestamp) | 
|---|
| 904 | { | 
|---|
| 905 | return -EOPNOTSUPP; | 
|---|
| 906 | } | 
|---|
| 907 |  | 
|---|
| 908 | /** | 
|---|
| 909 | * bpf_xdp_metadata_rx_hash - Read XDP frame RX hash. | 
|---|
| 910 | * @ctx: XDP context pointer. | 
|---|
| 911 | * @hash: Return value pointer. | 
|---|
| 912 | * @rss_type: Return value pointer for RSS type. | 
|---|
| 913 | * | 
|---|
| 914 | * The RSS hash type (@rss_type) specifies what portion of packet headers NIC | 
|---|
| 915 | * hardware used when calculating RSS hash value.  The RSS type can be decoded | 
|---|
| 916 | * via &enum xdp_rss_hash_type either matching on individual L3/L4 bits | 
|---|
| 917 | * ``XDP_RSS_L*`` or by combined traditional *RSS Hashing Types* | 
|---|
| 918 | * ``XDP_RSS_TYPE_L*``. | 
|---|
| 919 | * | 
|---|
| 920 | * Return: | 
|---|
| 921 | * * Returns 0 on success or ``-errno`` on error. | 
|---|
| 922 | * * ``-EOPNOTSUPP`` : means device driver doesn't implement kfunc | 
|---|
| 923 | * * ``-ENODATA``    : means no RX-hash available for this frame | 
|---|
| 924 | */ | 
|---|
| 925 | __bpf_kfunc int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, u32 *hash, | 
|---|
| 926 | enum xdp_rss_hash_type *) | 
|---|
| 927 | { | 
|---|
| 928 | return -EOPNOTSUPP; | 
|---|
| 929 | } | 
|---|
| 930 |  | 
|---|
| 931 | /** | 
|---|
| 932 | * bpf_xdp_metadata_rx_vlan_tag - Get XDP packet outermost VLAN tag | 
|---|
| 933 | * @ctx: XDP context pointer. | 
|---|
| 934 | * @vlan_proto: Destination pointer for VLAN Tag protocol identifier (TPID). | 
|---|
| 935 | * @vlan_tci: Destination pointer for VLAN TCI (VID + DEI + PCP) | 
|---|
| 936 | * | 
|---|
| 937 | * In case of success, ``vlan_proto`` contains *Tag protocol identifier (TPID)*, | 
|---|
| 938 | * usually ``ETH_P_8021Q`` or ``ETH_P_8021AD``, but some networks can use | 
|---|
| 939 | * custom TPIDs. ``vlan_proto`` is stored in **network byte order (BE)** | 
|---|
| 940 | * and should be used as follows: | 
|---|
| 941 | * ``if (vlan_proto == bpf_htons(ETH_P_8021Q)) do_something();`` | 
|---|
| 942 | * | 
|---|
| 943 | * ``vlan_tci`` contains the remaining 16 bits of a VLAN tag. | 
|---|
| 944 | * Driver is expected to provide those in **host byte order (usually LE)**, | 
|---|
| 945 | * so the bpf program should not perform byte conversion. | 
|---|
| 946 | * According to 802.1Q standard, *VLAN TCI (Tag control information)* | 
|---|
| 947 | * is a bit field that contains: | 
|---|
| 948 | * *VLAN identifier (VID)* that can be read with ``vlan_tci & 0xfff``, | 
|---|
| 949 | * *Drop eligible indicator (DEI)* - 1 bit, | 
|---|
| 950 | * *Priority code point (PCP)* - 3 bits. | 
|---|
| 951 | * For detailed meaning of DEI and PCP, please refer to other sources. | 
|---|
| 952 | * | 
|---|
| 953 | * Return: | 
|---|
| 954 | * * Returns 0 on success or ``-errno`` on error. | 
|---|
| 955 | * * ``-EOPNOTSUPP`` : device driver doesn't implement kfunc | 
|---|
| 956 | * * ``-ENODATA``    : VLAN tag was not stripped or is not available | 
|---|
| 957 | */ | 
|---|
| 958 | __bpf_kfunc int bpf_xdp_metadata_rx_vlan_tag(const struct xdp_md *ctx, | 
|---|
| 959 | __be16 *vlan_proto, u16 *vlan_tci) | 
|---|
| 960 | { | 
|---|
| 961 | return -EOPNOTSUPP; | 
|---|
| 962 | } | 
|---|
| 963 |  | 
|---|
| 964 | __bpf_kfunc_end_defs(); | 
|---|
| 965 |  | 
|---|
| 966 | BTF_KFUNCS_START(xdp_metadata_kfunc_ids) | 
|---|
| 967 | #define XDP_METADATA_KFUNC(_, __, name, ___) BTF_ID_FLAGS(func, name, KF_TRUSTED_ARGS) | 
|---|
| 968 | XDP_METADATA_KFUNC_xxx | 
|---|
| 969 | #undef XDP_METADATA_KFUNC | 
|---|
| 970 | BTF_KFUNCS_END(xdp_metadata_kfunc_ids) | 
|---|
| 971 |  | 
|---|
| 972 | static const struct btf_kfunc_id_set xdp_metadata_kfunc_set = { | 
|---|
| 973 | .owner = THIS_MODULE, | 
|---|
| 974 | .set   = &xdp_metadata_kfunc_ids, | 
|---|
| 975 | }; | 
|---|
| 976 |  | 
|---|
| 977 | BTF_ID_LIST(xdp_metadata_kfunc_ids_unsorted) | 
|---|
| 978 | #define XDP_METADATA_KFUNC(name, _, str, __) BTF_ID(func, str) | 
|---|
| 979 | XDP_METADATA_KFUNC_xxx | 
|---|
| 980 | #undef XDP_METADATA_KFUNC | 
|---|
| 981 |  | 
|---|
| 982 | u32 bpf_xdp_metadata_kfunc_id(int id) | 
|---|
| 983 | { | 
|---|
| 984 | /* xdp_metadata_kfunc_ids is sorted and can't be used */ | 
|---|
| 985 | return xdp_metadata_kfunc_ids_unsorted[id]; | 
|---|
| 986 | } | 
|---|
| 987 |  | 
|---|
| 988 | bool bpf_dev_bound_kfunc_id(u32 btf_id) | 
|---|
| 989 | { | 
|---|
| 990 | return btf_id_set8_contains(set: &xdp_metadata_kfunc_ids, id: btf_id); | 
|---|
| 991 | } | 
|---|
| 992 |  | 
|---|
| 993 | static int __init xdp_metadata_init(void) | 
|---|
| 994 | { | 
|---|
| 995 | return register_btf_kfunc_id_set(prog_type: BPF_PROG_TYPE_XDP, s: &xdp_metadata_kfunc_set); | 
|---|
| 996 | } | 
|---|
| 997 | late_initcall(xdp_metadata_init); | 
|---|
| 998 |  | 
|---|
| 999 | void xdp_set_features_flag_locked(struct net_device *dev, xdp_features_t val) | 
|---|
| 1000 | { | 
|---|
| 1001 | val &= NETDEV_XDP_ACT_MASK; | 
|---|
| 1002 | if (dev->xdp_features == val) | 
|---|
| 1003 | return; | 
|---|
| 1004 |  | 
|---|
| 1005 | netdev_assert_locked_or_invisible(dev); | 
|---|
| 1006 | dev->xdp_features = val; | 
|---|
| 1007 |  | 
|---|
| 1008 | if (dev->reg_state == NETREG_REGISTERED) | 
|---|
| 1009 | call_netdevice_notifiers(val: NETDEV_XDP_FEAT_CHANGE, dev); | 
|---|
| 1010 | } | 
|---|
| 1011 | EXPORT_SYMBOL_GPL(xdp_set_features_flag_locked); | 
|---|
| 1012 |  | 
|---|
| 1013 | void xdp_set_features_flag(struct net_device *dev, xdp_features_t val) | 
|---|
| 1014 | { | 
|---|
| 1015 | netdev_lock(dev); | 
|---|
| 1016 | xdp_set_features_flag_locked(dev, val); | 
|---|
| 1017 | netdev_unlock(dev); | 
|---|
| 1018 | } | 
|---|
| 1019 | EXPORT_SYMBOL_GPL(xdp_set_features_flag); | 
|---|
| 1020 |  | 
|---|
| 1021 | void xdp_features_set_redirect_target_locked(struct net_device *dev, | 
|---|
| 1022 | bool support_sg) | 
|---|
| 1023 | { | 
|---|
| 1024 | xdp_features_t val = (dev->xdp_features | NETDEV_XDP_ACT_NDO_XMIT); | 
|---|
| 1025 |  | 
|---|
| 1026 | if (support_sg) | 
|---|
| 1027 | val |= NETDEV_XDP_ACT_NDO_XMIT_SG; | 
|---|
| 1028 | xdp_set_features_flag_locked(dev, val); | 
|---|
| 1029 | } | 
|---|
| 1030 | EXPORT_SYMBOL_GPL(xdp_features_set_redirect_target_locked); | 
|---|
| 1031 |  | 
|---|
| 1032 | void xdp_features_set_redirect_target(struct net_device *dev, bool support_sg) | 
|---|
| 1033 | { | 
|---|
| 1034 | netdev_lock(dev); | 
|---|
| 1035 | xdp_features_set_redirect_target_locked(dev, support_sg); | 
|---|
| 1036 | netdev_unlock(dev); | 
|---|
| 1037 | } | 
|---|
| 1038 | EXPORT_SYMBOL_GPL(xdp_features_set_redirect_target); | 
|---|
| 1039 |  | 
|---|
| 1040 | void xdp_features_clear_redirect_target_locked(struct net_device *dev) | 
|---|
| 1041 | { | 
|---|
| 1042 | xdp_features_t val = dev->xdp_features; | 
|---|
| 1043 |  | 
|---|
| 1044 | val &= ~(NETDEV_XDP_ACT_NDO_XMIT | NETDEV_XDP_ACT_NDO_XMIT_SG); | 
|---|
| 1045 | xdp_set_features_flag_locked(dev, val); | 
|---|
| 1046 | } | 
|---|
| 1047 | EXPORT_SYMBOL_GPL(xdp_features_clear_redirect_target_locked); | 
|---|
| 1048 |  | 
|---|
| 1049 | void xdp_features_clear_redirect_target(struct net_device *dev) | 
|---|
| 1050 | { | 
|---|
| 1051 | netdev_lock(dev); | 
|---|
| 1052 | xdp_features_clear_redirect_target_locked(dev); | 
|---|
| 1053 | netdev_unlock(dev); | 
|---|
| 1054 | } | 
|---|
| 1055 | EXPORT_SYMBOL_GPL(xdp_features_clear_redirect_target); | 
|---|
| 1056 |  | 
|---|