1/* SPDX-License-Identifier: GPL-2.0 */
2#ifndef MM_SLAB_H
3#define MM_SLAB_H
4
5#include <linux/reciprocal_div.h>
6#include <linux/list_lru.h>
7#include <linux/local_lock.h>
8#include <linux/random.h>
9#include <linux/kobject.h>
10#include <linux/sched/mm.h>
11#include <linux/memcontrol.h>
12#include <linux/kfence.h>
13#include <linux/kasan.h>
14
15/*
16 * Internal slab definitions
17 */
18
19#ifdef CONFIG_64BIT
20# ifdef system_has_cmpxchg128
21# define system_has_freelist_aba() system_has_cmpxchg128()
22# define try_cmpxchg_freelist try_cmpxchg128
23# endif
24#define this_cpu_try_cmpxchg_freelist this_cpu_try_cmpxchg128
25typedef u128 freelist_full_t;
26#else /* CONFIG_64BIT */
27# ifdef system_has_cmpxchg64
28# define system_has_freelist_aba() system_has_cmpxchg64()
29# define try_cmpxchg_freelist try_cmpxchg64
30# endif
31#define this_cpu_try_cmpxchg_freelist this_cpu_try_cmpxchg64
32typedef u64 freelist_full_t;
33#endif /* CONFIG_64BIT */
34
35#if defined(system_has_freelist_aba) && !defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
36#undef system_has_freelist_aba
37#endif
38
39/*
40 * Freelist pointer and counter to cmpxchg together, avoids the typical ABA
41 * problems with cmpxchg of just a pointer.
42 */
43typedef union {
44 struct {
45 void *freelist;
46 unsigned long counter;
47 };
48 freelist_full_t full;
49} freelist_aba_t;
50
51/* Reuses the bits in struct page */
52struct slab {
53 memdesc_flags_t flags;
54
55 struct kmem_cache *slab_cache;
56 union {
57 struct {
58 union {
59 struct list_head slab_list;
60 struct { /* For deferred deactivate_slab() */
61 struct llist_node llnode;
62 void *flush_freelist;
63 };
64#ifdef CONFIG_SLUB_CPU_PARTIAL
65 struct {
66 struct slab *next;
67 int slabs; /* Nr of slabs left */
68 };
69#endif
70 };
71 /* Double-word boundary */
72 union {
73 struct {
74 void *freelist; /* first free object */
75 union {
76 unsigned long counters;
77 struct {
78 unsigned inuse:16;
79 unsigned objects:15;
80 /*
81 * If slab debugging is enabled then the
82 * frozen bit can be reused to indicate
83 * that the slab was corrupted
84 */
85 unsigned frozen:1;
86 };
87 };
88 };
89#ifdef system_has_freelist_aba
90 freelist_aba_t freelist_counter;
91#endif
92 };
93 };
94 struct rcu_head rcu_head;
95 };
96
97 unsigned int __page_type;
98 atomic_t __page_refcount;
99#ifdef CONFIG_SLAB_OBJ_EXT
100 unsigned long obj_exts;
101#endif
102};
103
104#define SLAB_MATCH(pg, sl) \
105 static_assert(offsetof(struct page, pg) == offsetof(struct slab, sl))
106SLAB_MATCH(flags, flags);
107SLAB_MATCH(compound_head, slab_cache); /* Ensure bit 0 is clear */
108SLAB_MATCH(_refcount, __page_refcount);
109#ifdef CONFIG_MEMCG
110SLAB_MATCH(memcg_data, obj_exts);
111#elif defined(CONFIG_SLAB_OBJ_EXT)
112SLAB_MATCH(_unused_slab_obj_exts, obj_exts);
113#endif
114#undef SLAB_MATCH
115static_assert(sizeof(struct slab) <= sizeof(struct page));
116#if defined(system_has_freelist_aba)
117static_assert(IS_ALIGNED(offsetof(struct slab, freelist), sizeof(freelist_aba_t)));
118#endif
119
120/**
121 * folio_slab - Converts from folio to slab.
122 * @folio: The folio.
123 *
124 * Currently struct slab is a different representation of a folio where
125 * folio_test_slab() is true.
126 *
127 * Return: The slab which contains this folio.
128 */
129#define folio_slab(folio) (_Generic((folio), \
130 const struct folio *: (const struct slab *)(folio), \
131 struct folio *: (struct slab *)(folio)))
132
133/**
134 * slab_folio - The folio allocated for a slab
135 * @s: The slab.
136 *
137 * Slabs are allocated as folios that contain the individual objects and are
138 * using some fields in the first struct page of the folio - those fields are
139 * now accessed by struct slab. It is occasionally necessary to convert back to
140 * a folio in order to communicate with the rest of the mm. Please use this
141 * helper function instead of casting yourself, as the implementation may change
142 * in the future.
143 */
144#define slab_folio(s) (_Generic((s), \
145 const struct slab *: (const struct folio *)s, \
146 struct slab *: (struct folio *)s))
147
148/**
149 * page_slab - Converts from first struct page to slab.
150 * @p: The first (either head of compound or single) page of slab.
151 *
152 * A temporary wrapper to convert struct page to struct slab in situations where
153 * we know the page is the compound head, or single order-0 page.
154 *
155 * Long-term ideally everything would work with struct slab directly or go
156 * through folio to struct slab.
157 *
158 * Return: The slab which contains this page
159 */
160#define page_slab(p) (_Generic((p), \
161 const struct page *: (const struct slab *)(p), \
162 struct page *: (struct slab *)(p)))
163
164/**
165 * slab_page - The first struct page allocated for a slab
166 * @s: The slab.
167 *
168 * A convenience wrapper for converting slab to the first struct page of the
169 * underlying folio, to communicate with code not yet converted to folio or
170 * struct slab.
171 */
172#define slab_page(s) folio_page(slab_folio(s), 0)
173
174static inline void *slab_address(const struct slab *slab)
175{
176 return folio_address(slab_folio(slab));
177}
178
179static inline int slab_nid(const struct slab *slab)
180{
181 return memdesc_nid(mdf: slab->flags);
182}
183
184static inline pg_data_t *slab_pgdat(const struct slab *slab)
185{
186 return NODE_DATA(slab_nid(slab));
187}
188
189static inline struct slab *virt_to_slab(const void *addr)
190{
191 struct folio *folio = virt_to_folio(x: addr);
192
193 if (!folio_test_slab(folio))
194 return NULL;
195
196 return folio_slab(folio);
197}
198
199static inline int slab_order(const struct slab *slab)
200{
201 return folio_order(slab_folio(slab));
202}
203
204static inline size_t slab_size(const struct slab *slab)
205{
206 return PAGE_SIZE << slab_order(slab);
207}
208
209#ifdef CONFIG_SLUB_CPU_PARTIAL
210#define slub_percpu_partial(c) ((c)->partial)
211
212#define slub_set_percpu_partial(c, p) \
213({ \
214 slub_percpu_partial(c) = (p)->next; \
215})
216
217#define slub_percpu_partial_read_once(c) READ_ONCE(slub_percpu_partial(c))
218#else
219#define slub_percpu_partial(c) NULL
220
221#define slub_set_percpu_partial(c, p)
222
223#define slub_percpu_partial_read_once(c) NULL
224#endif // CONFIG_SLUB_CPU_PARTIAL
225
226/*
227 * Word size structure that can be atomically updated or read and that
228 * contains both the order and the number of objects that a slab of the
229 * given order would contain.
230 */
231struct kmem_cache_order_objects {
232 unsigned int x;
233};
234
235/*
236 * Slab cache management.
237 */
238struct kmem_cache {
239#ifndef CONFIG_SLUB_TINY
240 struct kmem_cache_cpu __percpu *cpu_slab;
241 struct lock_class_key lock_key;
242#endif
243 struct slub_percpu_sheaves __percpu *cpu_sheaves;
244 /* Used for retrieving partial slabs, etc. */
245 slab_flags_t flags;
246 unsigned long min_partial;
247 unsigned int size; /* Object size including metadata */
248 unsigned int object_size; /* Object size without metadata */
249 struct reciprocal_value reciprocal_size;
250 unsigned int offset; /* Free pointer offset */
251#ifdef CONFIG_SLUB_CPU_PARTIAL
252 /* Number of per cpu partial objects to keep around */
253 unsigned int cpu_partial;
254 /* Number of per cpu partial slabs to keep around */
255 unsigned int cpu_partial_slabs;
256#endif
257 unsigned int sheaf_capacity;
258 struct kmem_cache_order_objects oo;
259
260 /* Allocation and freeing of slabs */
261 struct kmem_cache_order_objects min;
262 gfp_t allocflags; /* gfp flags to use on each alloc */
263 int refcount; /* Refcount for slab cache destroy */
264 void (*ctor)(void *object); /* Object constructor */
265 unsigned int inuse; /* Offset to metadata */
266 unsigned int align; /* Alignment */
267 unsigned int red_left_pad; /* Left redzone padding size */
268 const char *name; /* Name (only for display!) */
269 struct list_head list; /* List of slab caches */
270#ifdef CONFIG_SYSFS
271 struct kobject kobj; /* For sysfs */
272#endif
273#ifdef CONFIG_SLAB_FREELIST_HARDENED
274 unsigned long random;
275#endif
276
277#ifdef CONFIG_NUMA
278 /*
279 * Defragmentation by allocating from a remote node.
280 */
281 unsigned int remote_node_defrag_ratio;
282#endif
283
284#ifdef CONFIG_SLAB_FREELIST_RANDOM
285 unsigned int *random_seq;
286#endif
287
288#ifdef CONFIG_KASAN_GENERIC
289 struct kasan_cache kasan_info;
290#endif
291
292#ifdef CONFIG_HARDENED_USERCOPY
293 unsigned int useroffset; /* Usercopy region offset */
294 unsigned int usersize; /* Usercopy region size */
295#endif
296
297 struct kmem_cache_node *node[MAX_NUMNODES];
298};
299
300#if defined(CONFIG_SYSFS) && !defined(CONFIG_SLUB_TINY)
301#define SLAB_SUPPORTS_SYSFS 1
302void sysfs_slab_unlink(struct kmem_cache *s);
303void sysfs_slab_release(struct kmem_cache *s);
304#else
305static inline void sysfs_slab_unlink(struct kmem_cache *s) { }
306static inline void sysfs_slab_release(struct kmem_cache *s) { }
307#endif
308
309void *fixup_red_left(struct kmem_cache *s, void *p);
310
311static inline void *nearest_obj(struct kmem_cache *cache,
312 const struct slab *slab, void *x)
313{
314 void *object = x - (x - slab_address(slab)) % cache->size;
315 void *last_object = slab_address(slab) +
316 (slab->objects - 1) * cache->size;
317 void *result = (unlikely(object > last_object)) ? last_object : object;
318
319 result = fixup_red_left(s: cache, p: result);
320 return result;
321}
322
323/* Determine object index from a given position */
324static inline unsigned int __obj_to_index(const struct kmem_cache *cache,
325 void *addr, void *obj)
326{
327 return reciprocal_divide(a: kasan_reset_tag(addr: obj) - addr,
328 R: cache->reciprocal_size);
329}
330
331static inline unsigned int obj_to_index(const struct kmem_cache *cache,
332 const struct slab *slab, void *obj)
333{
334 if (is_kfence_address(addr: obj))
335 return 0;
336 return __obj_to_index(cache, addr: slab_address(slab), obj);
337}
338
339static inline int objs_per_slab(const struct kmem_cache *cache,
340 const struct slab *slab)
341{
342 return slab->objects;
343}
344
345/*
346 * State of the slab allocator.
347 *
348 * This is used to describe the states of the allocator during bootup.
349 * Allocators use this to gradually bootstrap themselves. Most allocators
350 * have the problem that the structures used for managing slab caches are
351 * allocated from slab caches themselves.
352 */
353enum slab_state {
354 DOWN, /* No slab functionality yet */
355 PARTIAL, /* SLUB: kmem_cache_node available */
356 UP, /* Slab caches usable but not all extras yet */
357 FULL /* Everything is working */
358};
359
360extern enum slab_state slab_state;
361
362/* The slab cache mutex protects the management structures during changes */
363extern struct mutex slab_mutex;
364
365/* The list of all slab caches on the system */
366extern struct list_head slab_caches;
367
368/* The slab cache that manages slab cache information */
369extern struct kmem_cache *kmem_cache;
370
371/* A table of kmalloc cache names and sizes */
372extern const struct kmalloc_info_struct {
373 const char *name[NR_KMALLOC_TYPES];
374 unsigned int size;
375} kmalloc_info[];
376
377/* Kmalloc array related functions */
378void setup_kmalloc_cache_index_table(void);
379void create_kmalloc_caches(void);
380
381extern u8 kmalloc_size_index[24];
382
383static inline unsigned int size_index_elem(unsigned int bytes)
384{
385 return (bytes - 1) / 8;
386}
387
388/*
389 * Find the kmem_cache structure that serves a given size of
390 * allocation
391 *
392 * This assumes size is larger than zero and not larger than
393 * KMALLOC_MAX_CACHE_SIZE and the caller must check that.
394 */
395static inline struct kmem_cache *
396kmalloc_slab(size_t size, kmem_buckets *b, gfp_t flags, unsigned long caller)
397{
398 unsigned int index;
399
400 if (!b)
401 b = &kmalloc_caches[kmalloc_type(flags, caller)];
402 if (size <= 192)
403 index = kmalloc_size_index[size_index_elem(bytes: size)];
404 else
405 index = fls(x: size - 1);
406
407 return (*b)[index];
408}
409
410gfp_t kmalloc_fix_flags(gfp_t flags);
411
412/* Functions provided by the slab allocators */
413int do_kmem_cache_create(struct kmem_cache *s, const char *name,
414 unsigned int size, struct kmem_cache_args *args,
415 slab_flags_t flags);
416
417void __init kmem_cache_init(void);
418extern void create_boot_cache(struct kmem_cache *, const char *name,
419 unsigned int size, slab_flags_t flags,
420 unsigned int useroffset, unsigned int usersize);
421
422int slab_unmergeable(struct kmem_cache *s);
423struct kmem_cache *find_mergeable(unsigned size, unsigned align,
424 slab_flags_t flags, const char *name, void (*ctor)(void *));
425struct kmem_cache *
426__kmem_cache_alias(const char *name, unsigned int size, unsigned int align,
427 slab_flags_t flags, void (*ctor)(void *));
428
429slab_flags_t kmem_cache_flags(slab_flags_t flags, const char *name);
430
431static inline bool is_kmalloc_cache(struct kmem_cache *s)
432{
433 return (s->flags & SLAB_KMALLOC);
434}
435
436static inline bool is_kmalloc_normal(struct kmem_cache *s)
437{
438 if (!is_kmalloc_cache(s))
439 return false;
440 return !(s->flags & (SLAB_CACHE_DMA|SLAB_ACCOUNT|SLAB_RECLAIM_ACCOUNT));
441}
442
443bool __kfree_rcu_sheaf(struct kmem_cache *s, void *obj);
444void flush_all_rcu_sheaves(void);
445
446#define SLAB_CORE_FLAGS (SLAB_HWCACHE_ALIGN | SLAB_CACHE_DMA | \
447 SLAB_CACHE_DMA32 | SLAB_PANIC | \
448 SLAB_TYPESAFE_BY_RCU | SLAB_DEBUG_OBJECTS | \
449 SLAB_NOLEAKTRACE | SLAB_RECLAIM_ACCOUNT | \
450 SLAB_TEMPORARY | SLAB_ACCOUNT | \
451 SLAB_NO_USER_FLAGS | SLAB_KMALLOC | SLAB_NO_MERGE)
452
453#define SLAB_DEBUG_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \
454 SLAB_TRACE | SLAB_CONSISTENCY_CHECKS)
455
456#define SLAB_FLAGS_PERMITTED (SLAB_CORE_FLAGS | SLAB_DEBUG_FLAGS)
457
458bool __kmem_cache_empty(struct kmem_cache *);
459int __kmem_cache_shutdown(struct kmem_cache *);
460void __kmem_cache_release(struct kmem_cache *);
461int __kmem_cache_shrink(struct kmem_cache *);
462void slab_kmem_cache_release(struct kmem_cache *);
463
464struct seq_file;
465struct file;
466
467struct slabinfo {
468 unsigned long active_objs;
469 unsigned long num_objs;
470 unsigned long active_slabs;
471 unsigned long num_slabs;
472 unsigned long shared_avail;
473 unsigned int limit;
474 unsigned int batchcount;
475 unsigned int shared;
476 unsigned int objects_per_slab;
477 unsigned int cache_order;
478};
479
480void get_slabinfo(struct kmem_cache *s, struct slabinfo *sinfo);
481
482#ifdef CONFIG_SLUB_DEBUG
483#ifdef CONFIG_SLUB_DEBUG_ON
484DECLARE_STATIC_KEY_TRUE(slub_debug_enabled);
485#else
486DECLARE_STATIC_KEY_FALSE(slub_debug_enabled);
487#endif
488extern void print_tracking(struct kmem_cache *s, void *object);
489long validate_slab_cache(struct kmem_cache *s);
490static inline bool __slub_debug_enabled(void)
491{
492 return static_branch_unlikely(&slub_debug_enabled);
493}
494#else
495static inline void print_tracking(struct kmem_cache *s, void *object)
496{
497}
498static inline bool __slub_debug_enabled(void)
499{
500 return false;
501}
502#endif
503
504/*
505 * Returns true if any of the specified slab_debug flags is enabled for the
506 * cache. Use only for flags parsed by setup_slub_debug() as it also enables
507 * the static key.
508 */
509static inline bool kmem_cache_debug_flags(struct kmem_cache *s, slab_flags_t flags)
510{
511 if (IS_ENABLED(CONFIG_SLUB_DEBUG))
512 VM_WARN_ON_ONCE(!(flags & SLAB_DEBUG_FLAGS));
513 if (__slub_debug_enabled())
514 return s->flags & flags;
515 return false;
516}
517
518#if IS_ENABLED(CONFIG_SLUB_DEBUG) && IS_ENABLED(CONFIG_KUNIT)
519bool slab_in_kunit_test(void);
520#else
521static inline bool slab_in_kunit_test(void) { return false; }
522#endif
523
524#ifdef CONFIG_SLAB_OBJ_EXT
525
526/*
527 * slab_obj_exts - get the pointer to the slab object extension vector
528 * associated with a slab.
529 * @slab: a pointer to the slab struct
530 *
531 * Returns a pointer to the object extension vector associated with the slab,
532 * or NULL if no such vector has been associated yet.
533 */
534static inline struct slabobj_ext *slab_obj_exts(struct slab *slab)
535{
536 unsigned long obj_exts = READ_ONCE(slab->obj_exts);
537
538#ifdef CONFIG_MEMCG
539 /*
540 * obj_exts should be either NULL, a valid pointer with
541 * MEMCG_DATA_OBJEXTS bit set or be equal to OBJEXTS_ALLOC_FAIL.
542 */
543 VM_BUG_ON_PAGE(obj_exts && !(obj_exts & MEMCG_DATA_OBJEXTS) &&
544 obj_exts != OBJEXTS_ALLOC_FAIL, slab_page(slab));
545 VM_BUG_ON_PAGE(obj_exts & MEMCG_DATA_KMEM, slab_page(slab));
546#endif
547 return (struct slabobj_ext *)(obj_exts & ~OBJEXTS_FLAGS_MASK);
548}
549
550int alloc_slab_obj_exts(struct slab *slab, struct kmem_cache *s,
551 gfp_t gfp, bool new_slab);
552
553#else /* CONFIG_SLAB_OBJ_EXT */
554
555static inline struct slabobj_ext *slab_obj_exts(struct slab *slab)
556{
557 return NULL;
558}
559
560#endif /* CONFIG_SLAB_OBJ_EXT */
561
562static inline enum node_stat_item cache_vmstat_idx(struct kmem_cache *s)
563{
564 return (s->flags & SLAB_RECLAIM_ACCOUNT) ?
565 NR_SLAB_RECLAIMABLE_B : NR_SLAB_UNRECLAIMABLE_B;
566}
567
568#ifdef CONFIG_MEMCG
569bool __memcg_slab_post_alloc_hook(struct kmem_cache *s, struct list_lru *lru,
570 gfp_t flags, size_t size, void **p);
571void __memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab,
572 void **p, int objects, struct slabobj_ext *obj_exts);
573#endif
574
575void kvfree_rcu_cb(struct rcu_head *head);
576
577size_t __ksize(const void *objp);
578
579static inline size_t slab_ksize(const struct kmem_cache *s)
580{
581#ifdef CONFIG_SLUB_DEBUG
582 /*
583 * Debugging requires use of the padding between object
584 * and whatever may come after it.
585 */
586 if (s->flags & (SLAB_RED_ZONE | SLAB_POISON))
587 return s->object_size;
588#endif
589 if (s->flags & SLAB_KASAN)
590 return s->object_size;
591 /*
592 * If we have the need to store the freelist pointer
593 * back there or track user information then we can
594 * only use the space before that information.
595 */
596 if (s->flags & (SLAB_TYPESAFE_BY_RCU | SLAB_STORE_USER))
597 return s->inuse;
598 /*
599 * Else we can use all the padding etc for the allocation
600 */
601 return s->size;
602}
603
604#ifdef CONFIG_SLUB_DEBUG
605void dump_unreclaimable_slab(void);
606#else
607static inline void dump_unreclaimable_slab(void)
608{
609}
610#endif
611
612void ___cache_free(struct kmem_cache *cache, void *x, unsigned long addr);
613
614#ifdef CONFIG_SLAB_FREELIST_RANDOM
615int cache_random_seq_create(struct kmem_cache *cachep, unsigned int count,
616 gfp_t gfp);
617void cache_random_seq_destroy(struct kmem_cache *cachep);
618#else
619static inline int cache_random_seq_create(struct kmem_cache *cachep,
620 unsigned int count, gfp_t gfp)
621{
622 return 0;
623}
624static inline void cache_random_seq_destroy(struct kmem_cache *cachep) { }
625#endif /* CONFIG_SLAB_FREELIST_RANDOM */
626
627static inline bool slab_want_init_on_alloc(gfp_t flags, struct kmem_cache *c)
628{
629 if (static_branch_maybe(CONFIG_INIT_ON_ALLOC_DEFAULT_ON,
630 &init_on_alloc)) {
631 if (c->ctor)
632 return false;
633 if (c->flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON))
634 return flags & __GFP_ZERO;
635 return true;
636 }
637 return flags & __GFP_ZERO;
638}
639
640static inline bool slab_want_init_on_free(struct kmem_cache *c)
641{
642 if (static_branch_maybe(CONFIG_INIT_ON_FREE_DEFAULT_ON,
643 &init_on_free))
644 return !(c->ctor ||
645 (c->flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON)));
646 return false;
647}
648
649#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_SLUB_DEBUG)
650void debugfs_slab_release(struct kmem_cache *);
651#else
652static inline void debugfs_slab_release(struct kmem_cache *s) { }
653#endif
654
655#ifdef CONFIG_PRINTK
656#define KS_ADDRS_COUNT 16
657struct kmem_obj_info {
658 void *kp_ptr;
659 struct slab *kp_slab;
660 void *kp_objp;
661 unsigned long kp_data_offset;
662 struct kmem_cache *kp_slab_cache;
663 void *kp_ret;
664 void *kp_stack[KS_ADDRS_COUNT];
665 void *kp_free_stack[KS_ADDRS_COUNT];
666};
667void __kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct slab *slab);
668#endif
669
670void __check_heap_object(const void *ptr, unsigned long n,
671 const struct slab *slab, bool to_user);
672
673void defer_free_barrier(void);
674
675static inline bool slub_debug_orig_size(struct kmem_cache *s)
676{
677 return (kmem_cache_debug_flags(s, SLAB_STORE_USER) &&
678 (s->flags & SLAB_KMALLOC));
679}
680
681#ifdef CONFIG_SLUB_DEBUG
682void skip_orig_size_check(struct kmem_cache *s, const void *object);
683#endif
684
685#endif /* MM_SLAB_H */
686