slub.c source code [Linux/mm/slub.c]

1	// SPDX-License-Identifier: GPL-2.0
2	/*
3	* SLUB: A slab allocator that limits cache line use instead of queuing
4	* objects in per cpu and per node lists.
5	*
6	* The allocator synchronizes using per slab locks or atomic operations
7	* and only uses a centralized lock to manage a pool of partial slabs.
8	*
9	* (C) 2007 SGI, Christoph Lameter
10	* (C) 2011 Linux Foundation, Christoph Lameter
11	*/
12
13	#include <linux/mm.h>
14	#include <linux/swap.h> /* mm_account_reclaimed_pages() */
15	#include <linux/module.h>
16	#include <linux/bit_spinlock.h>
17	#include <linux/interrupt.h>
18	#include <linux/swab.h>
19	#include <linux/bitops.h>
20	#include <linux/slab.h>
21	#include "slab.h"
22	#include <linux/vmalloc.h>
23	#include <linux/proc_fs.h>
24	#include <linux/seq_file.h>
25	#include <linux/kasan.h>
26	#include <linux/node.h>
27	#include <linux/kmsan.h>
28	#include <linux/cpu.h>
29	#include <linux/cpuset.h>
30	#include <linux/mempolicy.h>
31	#include <linux/ctype.h>
32	#include <linux/stackdepot.h>
33	#include <linux/debugobjects.h>
34	#include <linux/kallsyms.h>
35	#include <linux/kfence.h>
36	#include <linux/memory.h>
37	#include <linux/math64.h>
38	#include <linux/fault-inject.h>
39	#include <linux/kmemleak.h>
40	#include <linux/stacktrace.h>
41	#include <linux/prefetch.h>
42	#include <linux/memcontrol.h>
43	#include <linux/random.h>
44	#include <kunit/test.h>
45	#include <kunit/test-bug.h>
46	#include <linux/sort.h>
47	#include <linux/irq_work.h>
48	#include <linux/kprobes.h>
49	#include <linux/debugfs.h>
50	#include <trace/events/kmem.h>
51
52	#include "internal.h"
53
54	/*
55	* Lock order:
56	* 1. slab_mutex (Global Mutex)
57	* 2. node->list_lock (Spinlock)
58	* 3. kmem_cache->cpu_slab->lock (Local lock)
59	* 4. slab_lock(slab) (Only on some arches)
60	* 5. object_map_lock (Only for debugging)
61	*
62	* slab_mutex
63	*
64	* The role of the slab_mutex is to protect the list of all the slabs
65	* and to synchronize major metadata changes to slab cache structures.
66	* Also synchronizes memory hotplug callbacks.
67	*
68	* slab_lock
69	*
70	* The slab_lock is a wrapper around the page lock, thus it is a bit
71	* spinlock.
72	*
73	* The slab_lock is only used on arches that do not have the ability
74	* to do a cmpxchg_double. It only protects:
75	*
76	* A. slab->freelist -> List of free objects in a slab
77	* B. slab->inuse -> Number of objects in use
78	* C. slab->objects -> Number of objects in slab
79	* D. slab->frozen -> frozen state
80	*
81	* Frozen slabs
82	*
83	* If a slab is frozen then it is exempt from list management. It is
84	* the cpu slab which is actively allocated from by the processor that
85	* froze it and it is not on any list. The processor that froze the
86	* slab is the one who can perform list operations on the slab. Other
87	* processors may put objects onto the freelist but the processor that
88	* froze the slab is the only one that can retrieve the objects from the
89	* slab's freelist.
90	*
91	* CPU partial slabs
92	*
93	* The partially empty slabs cached on the CPU partial list are used
94	* for performance reasons, which speeds up the allocation process.
95	* These slabs are not frozen, but are also exempt from list management,
96	* by clearing the SL_partial flag when moving out of the node
97	* partial list. Please see __slab_free() for more details.
98	*
99	* To sum up, the current scheme is:
100	* - node partial slab: SL_partial && !frozen
101	* - cpu partial slab: !SL_partial && !frozen
102	* - cpu slab: !SL_partial && frozen
103	* - full slab: !SL_partial && !frozen
104	*
105	* list_lock
106	*
107	* The list_lock protects the partial and full list on each node and
108	* the partial slab counter. If taken then no new slabs may be added or
109	* removed from the lists nor make the number of partial slabs be modified.
110	* (Note that the total number of slabs is an atomic value that may be
111	* modified without taking the list lock).
112	*
113	* The list_lock is a centralized lock and thus we avoid taking it as
114	* much as possible. As long as SLUB does not have to handle partial
115	* slabs, operations can continue without any centralized lock. F.e.
116	* allocating a long series of objects that fill up slabs does not require
117	* the list lock.
118	*
119	* For debug caches, all allocations are forced to go through a list_lock
120	* protected region to serialize against concurrent validation.
121	*
122	* cpu_slab->lock local lock
123	*
124	* This locks protect slowpath manipulation of all kmem_cache_cpu fields
125	* except the stat counters. This is a percpu structure manipulated only by
126	* the local cpu, so the lock protects against being preempted or interrupted
127	* by an irq. Fast path operations rely on lockless operations instead.
128	*
129	* On PREEMPT_RT, the local lock neither disables interrupts nor preemption
130	* which means the lockless fastpath cannot be used as it might interfere with
131	* an in-progress slow path operations. In this case the local lock is always
132	* taken but it still utilizes the freelist for the common operations.
133	*
134	* lockless fastpaths
135	*
136	* The fast path allocation (slab_alloc_node()) and freeing (do_slab_free())
137	* are fully lockless when satisfied from the percpu slab (and when
138	* cmpxchg_double is possible to use, otherwise slab_lock is taken).
139	* They also don't disable preemption or migration or irqs. They rely on
140	* the transaction id (tid) field to detect being preempted or moved to
141	* another cpu.
142	*
143	* irq, preemption, migration considerations
144	*
145	* Interrupts are disabled as part of list_lock or local_lock operations, or
146	* around the slab_lock operation, in order to make the slab allocator safe
147	* to use in the context of an irq.
148	*
149	* In addition, preemption (or migration on PREEMPT_RT) is disabled in the
150	* allocation slowpath, bulk allocation, and put_cpu_partial(), so that the
151	* local cpu doesn't change in the process and e.g. the kmem_cache_cpu pointer
152	* doesn't have to be revalidated in each section protected by the local lock.
153	*
154	* SLUB assigns one slab for allocation to each processor.
155	* Allocations only occur from these slabs called cpu slabs.
156	*
157	* Slabs with free elements are kept on a partial list and during regular
158	* operations no list for full slabs is used. If an object in a full slab is
159	* freed then the slab will show up again on the partial lists.
160	* We track full slabs for debugging purposes though because otherwise we
161	* cannot scan all objects.
162	*
163	* Slabs are freed when they become empty. Teardown and setup is
164	* minimal so we rely on the page allocators per cpu caches for
165	* fast frees and allocs.
166	*
167	* slab->frozen The slab is frozen and exempt from list processing.
168	* This means that the slab is dedicated to a purpose
169	* such as satisfying allocations for a specific
170	* processor. Objects may be freed in the slab while
171	* it is frozen but slab_free will then skip the usual
172	* list operations. It is up to the processor holding
173	* the slab to integrate the slab into the slab lists
174	* when the slab is no longer needed.
175	*
176	* One use of this flag is to mark slabs that are
177	* used for allocations. Then such a slab becomes a cpu
178	* slab. The cpu slab may be equipped with an additional
179	* freelist that allows lockless access to
180	* free objects in addition to the regular freelist
181	* that requires the slab lock.
182	*
183	* SLAB_DEBUG_FLAGS Slab requires special handling due to debug
184	* options set. This moves slab handling out of
185	* the fast path and disables lockless freelists.
186	*/
187
188	/**
189	* enum slab_flags - How the slab flags bits are used.
190	* @SL_locked: Is locked with slab_lock()
191	* @SL_partial: On the per-node partial list
192	* @SL_pfmemalloc: Was allocated from PF_MEMALLOC reserves
193	*
194	* The slab flags share space with the page flags but some bits have
195	* different interpretations. The high bits are used for information
196	* like zone/node/section.
197	*/
198	enum slab_flags {
199	SL_locked = PG_locked,
200	SL_partial = PG_workingset, / Historical reasons for this bit /
201	SL_pfmemalloc = PG_active, / Historical reasons for this bit /
202	};
203
204	/*
205	* We could simply use migrate_disable()/enable() but as long as it's a
206	* function call even on !PREEMPT_RT, use inline preempt_disable() there.
207	*/
208	#ifndef CONFIG_PREEMPT_RT
209	#define slub_get_cpu_ptr(var) get_cpu_ptr(var)
210	#define slub_put_cpu_ptr(var) put_cpu_ptr(var)
211	#define USE_LOCKLESS_FAST_PATH() (true)
212	#else
213	#define slub_get_cpu_ptr(var) \
214	({ \
215	migrate_disable(); \
216	this_cpu_ptr(var); \
217	})
218	#define slub_put_cpu_ptr(var) \
219	do { \
220	(void)(var); \
221	migrate_enable(); \
222	} while (0)
223	#define USE_LOCKLESS_FAST_PATH() (false)
224	#endif
225
226	#ifndef CONFIG_SLUB_TINY
227	#define __fastpath_inline __always_inline
228	#else
229	#define __fastpath_inline
230	#endif
231
232	#ifdef CONFIG_SLUB_DEBUG
233	#ifdef CONFIG_SLUB_DEBUG_ON
234	DEFINE_STATIC_KEY_TRUE(slub_debug_enabled);
235	#else
236	DEFINE_STATIC_KEY_FALSE(slub_debug_enabled);
237	#endif
238	#endif /* CONFIG_SLUB_DEBUG */
239
240	#ifdef CONFIG_NUMA
241	static DEFINE_STATIC_KEY_FALSE(strict_numa);
242	#endif
243
244	/ Structure holding parameters for get_partial() call chain /
245	struct partial_context {
246	gfp_t flags;
247	unsigned int orig_size;
248	void *object;
249	};
250
251	static inline bool kmem_cache_debug(struct kmem_cache *s)
252	{
253	return kmem_cache_debug_flags(s, SLAB_DEBUG_FLAGS);
254	}
255
256	void fixup_red_left(struct* kmem_cache s, void* *p)
257	{
258	if (kmem_cache_debug_flags(s, SLAB_RED_ZONE))
259	p += s->red_left_pad;
260
261	return p;
262	}
263
264	static inline bool kmem_cache_has_cpu_partial(struct kmem_cache *s)
265	{
266	#ifdef CONFIG_SLUB_CPU_PARTIAL
267	return !kmem_cache_debug(s);
268	#else
269	return false;
270	#endif
271	}
272
273	/*
274	* Issues still to be resolved:
275	*
276	* - Support PAGE_ALLOC_DEBUG. Should be easy to do.
277	*
278	* - Variable sizing of the per node arrays
279	*/
280
281	/ Enable to log cmpxchg failures /
282	#undef SLUB_DEBUG_CMPXCHG
283
284	#ifndef CONFIG_SLUB_TINY
285	/*
286	* Minimum number of partial slabs. These will be left on the partial
287	* lists even if they are empty. kmem_cache_shrink may reclaim them.
288	*/
289	#define MIN_PARTIAL 5
290
291	/*
292	* Maximum number of desirable partial slabs.
293	* The existence of more partial slabs makes kmem_cache_shrink
294	* sort the partial list by the number of objects in use.
295	*/
296	#define MAX_PARTIAL 10
297	#else
298	#define MIN_PARTIAL 0
299	#define MAX_PARTIAL 0
300	#endif
301
302	#define DEBUG_DEFAULT_FLAGS (SLAB_CONSISTENCY_CHECKS \| SLAB_RED_ZONE \| \
303	SLAB_POISON \| SLAB_STORE_USER)
304
305	/*
306	* These debug flags cannot use CMPXCHG because there might be consistency
307	* issues when checking or reading debug information
308	*/
309	#define SLAB_NO_CMPXCHG (SLAB_CONSISTENCY_CHECKS \| SLAB_STORE_USER \| \
310	SLAB_TRACE)
311
312
313	/*
314	* Debugging flags that require metadata to be stored in the slab. These get
315	* disabled when slab_debug=O is used and a cache's min order increases with
316	* metadata.
317	*/
318	#define DEBUG_METADATA_FLAGS (SLAB_RED_ZONE \| SLAB_POISON \| SLAB_STORE_USER)
319
320	#define OO_SHIFT 16
321	#define OO_MASK ((1 << OO_SHIFT) - 1)
322	#define MAX_OBJS_PER_PAGE 32767 /* since slab.objects is u15 */
323
324	/ Internal SLUB flags /
325	/ Poison object /
326	#define __OBJECT_POISON __SLAB_FLAG_BIT(_SLAB_OBJECT_POISON)
327	/ Use cmpxchg_double /
328
329	#ifdef system_has_freelist_aba
330	#define __CMPXCHG_DOUBLE __SLAB_FLAG_BIT(_SLAB_CMPXCHG_DOUBLE)
331	#else
332	#define __CMPXCHG_DOUBLE __SLAB_FLAG_UNUSED
333	#endif
334
335	/*
336	* Tracking user of a slab.
337	*/
338	#define TRACK_ADDRS_COUNT 16
339	struct track {
340	unsigned long addr; / Called from address /
341	#ifdef CONFIG_STACKDEPOT
342	depot_stack_handle_t handle;
343	#endif
344	int cpu; / Was running on cpu /
345	int pid; / Pid context /
346	unsigned long when; / When did the operation occur /
347	};
348
349	enum track_item { TRACK_ALLOC, TRACK_FREE };
350
351	#ifdef SLAB_SUPPORTS_SYSFS
352	static int sysfs_slab_add(struct kmem_cache *);
353	static int sysfs_slab_alias(struct kmem_cache , const* char *);
354	#else
355	static inline int sysfs_slab_add(struct kmem_cache s) { return* `0`; }
356	static inline int sysfs_slab_alias(struct kmem_cache s, const* char *p)
357	{ return `0`; }
358	#endif
359
360	#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_SLUB_DEBUG)
361	static void debugfs_slab_add(struct kmem_cache *);
362	#else
363	static inline void debugfs_slab_add(struct kmem_cache *s) { }
364	#endif
365
366	enum stat_item {
367	ALLOC_PCS, / Allocation from percpu sheaf /
368	ALLOC_FASTPATH, / Allocation from cpu slab /
369	ALLOC_SLOWPATH, / Allocation by getting a new cpu slab /
370	FREE_PCS, / Free to percpu sheaf /
371	FREE_RCU_SHEAF, / Free to rcu_free sheaf /
372	FREE_RCU_SHEAF_FAIL, / Failed to free to a rcu_free sheaf /
373	FREE_FASTPATH, / Free to cpu slab /
374	FREE_SLOWPATH, / Freeing not to cpu slab /
375	FREE_FROZEN, / Freeing to frozen slab /
376	FREE_ADD_PARTIAL, / Freeing moves slab to partial list /
377	FREE_REMOVE_PARTIAL, / Freeing removes last object /
378	ALLOC_FROM_PARTIAL, / Cpu slab acquired from node partial list /
379	ALLOC_SLAB, / Cpu slab acquired from page allocator /
380	ALLOC_REFILL, / Refill cpu slab from slab freelist /
381	ALLOC_NODE_MISMATCH, / Switching cpu slab /
382	FREE_SLAB, / Slab freed to the page allocator /
383	CPUSLAB_FLUSH, / Abandoning of the cpu slab /
384	DEACTIVATE_FULL, / Cpu slab was full when deactivated /
385	DEACTIVATE_EMPTY, / Cpu slab was empty when deactivated /
386	DEACTIVATE_TO_HEAD, / Cpu slab was moved to the head of partials /
387	DEACTIVATE_TO_TAIL, / Cpu slab was moved to the tail of partials /
388	DEACTIVATE_REMOTE_FREES,/ Slab contained remotely freed objects /
389	DEACTIVATE_BYPASS, / Implicit deactivation /
390	ORDER_FALLBACK, / Number of times fallback was necessary /
391	CMPXCHG_DOUBLE_CPU_FAIL,/ Failures of this_cpu_cmpxchg_double /
392	CMPXCHG_DOUBLE_FAIL, / Failures of slab freelist update /
393	CPU_PARTIAL_ALLOC, / Used cpu partial on alloc /
394	CPU_PARTIAL_FREE, / Refill cpu partial on free /
395	CPU_PARTIAL_NODE, / Refill cpu partial from node partial /
396	CPU_PARTIAL_DRAIN, / Drain cpu partial to node partial /
397	SHEAF_FLUSH, / Objects flushed from a sheaf /
398	SHEAF_REFILL, / Objects refilled to a sheaf /
399	SHEAF_ALLOC, / Allocation of an empty sheaf /
400	SHEAF_FREE, / Freeing of an empty sheaf /
401	BARN_GET, / Got full sheaf from barn /
402	BARN_GET_FAIL, / Failed to get full sheaf from barn /
403	BARN_PUT, / Put full sheaf to barn /
404	BARN_PUT_FAIL, / Failed to put full sheaf to barn /
405	SHEAF_PREFILL_FAST, / Sheaf prefill grabbed the spare sheaf /
406	SHEAF_PREFILL_SLOW, / Sheaf prefill found no spare sheaf /
407	SHEAF_PREFILL_OVERSIZE, / Allocation of oversize sheaf for prefill /
408	SHEAF_RETURN_FAST, / Sheaf return reattached spare sheaf /
409	SHEAF_RETURN_SLOW, / Sheaf return could not reattach spare /
410	NR_SLUB_STAT_ITEMS
411	};
412
413	#ifndef CONFIG_SLUB_TINY
414	/*
415	* When changing the layout, make sure freelist and tid are still compatible
416	* with this_cpu_cmpxchg_double() alignment requirements.
417	*/
418	struct kmem_cache_cpu {
419	union {
420	struct {
421	void *freelist; /* Pointer to next available object /
422	unsigned long tid; / Globally unique transaction id /
423	};
424	freelist_aba_t freelist_tid;
425	};
426	struct slab slab; /* The slab from which we are allocating /
427	#ifdef CONFIG_SLUB_CPU_PARTIAL
428	struct slab partial; /* Partially allocated slabs /
429	#endif
430	local_trylock_t lock; / Protects the fields above /
431	#ifdef CONFIG_SLUB_STATS
432	unsigned int stat[NR_SLUB_STAT_ITEMS];
433	#endif
434	};
435	#endif /* CONFIG_SLUB_TINY */
436
437	static inline void stat(const struct kmem_cache s, enum* stat_item si)
438	{
439	#ifdef CONFIG_SLUB_STATS
440	/*
441	* The rmw is racy on a preemptible kernel but this is acceptable, so
442	* avoid this_cpu_add()'s irq-disable overhead.
443	*/
444	raw_cpu_inc(s->cpu_slab->stat[si]);
445	#endif
446	}
447
448	static inline
449	void stat_add(const struct kmem_cache s, enum* stat_item si, int v)
450	{
451	#ifdef CONFIG_SLUB_STATS
452	raw_cpu_add(s->cpu_slab->stat[si], v);
453	#endif
454	}
455
456	#define MAX_FULL_SHEAVES 10
457	#define MAX_EMPTY_SHEAVES 10
458
459	struct node_barn {
460	spinlock_t lock;
461	struct list_head sheaves_full;
462	struct list_head sheaves_empty;
463	unsigned int nr_full;
464	unsigned int nr_empty;
465	};
466
467	struct slab_sheaf {
468	union {
469	struct rcu_head rcu_head;
470	struct list_head barn_list;
471	/ only used for prefilled sheafs /
472	unsigned int capacity;
473	};
474	struct kmem_cache *cache;
475	unsigned int size;
476	int node; / only used for rcu_sheaf /
477	void *objects[];
478	};
479
480	struct slub_percpu_sheaves {
481	local_trylock_t lock;
482	struct slab_sheaf main; /* never NULL when unlocked /
483	struct slab_sheaf spare; /* empty or full, may be NULL /
484	struct slab_sheaf rcu_free; /* for batching kfree_rcu() /
485	};
486
487	/*
488	* The slab lists for all objects.
489	*/
490	struct kmem_cache_node {
491	spinlock_t list_lock;
492	unsigned long nr_partial;
493	struct list_head partial;
494	#ifdef CONFIG_SLUB_DEBUG
495	atomic_long_t nr_slabs;
496	atomic_long_t total_objects;
497	struct list_head full;
498	#endif
499	struct node_barn *barn;
500	};
501
502	static inline struct kmem_cache_node get_node(struct* kmem_cache s, int* node)
503	{
504	return s->node[node];
505	}
506
507	/*
508	* Get the barn of the current cpu's closest memory node. It may not exist on
509	* systems with memoryless nodes but without CONFIG_HAVE_MEMORYLESS_NODES
510	*/
511	static inline struct node_barn get_barn(struct* kmem_cache *s)
512	{
513	struct kmem_cache_node *n = get_node(s, node: numa_mem_id());
514
515	if (!n)
516	return NULL;
517
518	return n->barn;
519	}
520
521	/*
522	* Iterator over all nodes. The body will be executed for each node that has
523	* a kmem_cache_node structure allocated (which is true for all online nodes)
524	*/
525	#define for_each_kmem_cache_node(__s, __node, __n) \
526	for (__node = 0; __node < nr_node_ids; __node++) \
527	if ((__n = get_node(__s, __node)))
528
529	/*
530	* Tracks for which NUMA nodes we have kmem_cache_nodes allocated.
531	* Corresponds to node_state[N_MEMORY], but can temporarily
532	* differ during memory hotplug/hotremove operations.
533	* Protected by slab_mutex.
534	*/
535	static nodemask_t slab_nodes;
536
537	/*
538	* Workqueue used for flush_cpu_slab().
539	*/
540	static struct workqueue_struct *flushwq;
541
542	struct slub_flush_work {
543	struct work_struct work;
544	struct kmem_cache *s;
545	bool skip;
546	};
547
548	static DEFINE_MUTEX(flush_lock);
549	static DEFINE_PER_CPU(struct slub_flush_work, slub_flush);
550
551	/********************************************************************
552	* Core slab cache functions
553	*******************************************************************/
554
555	/*
556	* Returns freelist pointer (ptr). With hardening, this is obfuscated
557	* with an XOR of the address where the pointer is held and a per-cache
558	* random number.
559	*/
560	static inline freeptr_t freelist_ptr_encode(const struct kmem_cache *s,
561	void ptr, unsigned* long ptr_addr)
562	{
563	unsigned long encoded;
564
565	#ifdef CONFIG_SLAB_FREELIST_HARDENED
566	encoded = (unsigned long)ptr ^ s->random ^ swab(ptr_addr);
567	#else
568	encoded = (unsigned long)ptr;
569	#endif
570	return (freeptr_t){.v = encoded};
571	}
572
573	static inline void freelist_ptr_decode(const* struct kmem_cache *s,
574	freeptr_t ptr, unsigned long ptr_addr)
575	{
576	void *decoded;
577
578	#ifdef CONFIG_SLAB_FREELIST_HARDENED
579	decoded = (void *)(ptr.v ^ s->random ^ swab(ptr_addr));
580	#else
581	decoded = (void *)ptr.v;
582	#endif
583	return decoded;
584	}
585
586	static inline void get_freepointer(struct* kmem_cache s, void* *object)
587	{
588	unsigned long ptr_addr;
589	freeptr_t p;
590
591	object = kasan_reset_tag(addr: object);
592	ptr_addr = (unsigned long)object + s->offset;
593	p = (freeptr_t )(ptr_addr);
594	return freelist_ptr_decode(s, ptr: p, ptr_addr);
595	}
596
597	#ifndef CONFIG_SLUB_TINY
598	static void prefetch_freepointer(const struct kmem_cache s, void* *object)
599	{
600	prefetchw(x: object + s->offset);
601	}
602	#endif
603
604	/*
605	* When running under KMSAN, get_freepointer_safe() may return an uninitialized
606	* pointer value in the case the current thread loses the race for the next
607	* memory chunk in the freelist. In that case this_cpu_cmpxchg_double() in
608	* slab_alloc_node() will fail, so the uninitialized value won't be used, but
609	* KMSAN will still check all arguments of cmpxchg because of imperfect
610	* handling of inline assembly.
611	* To work around this problem, we apply __no_kmsan_checks to ensure that
612	* get_freepointer_safe() returns initialized memory.
613	*/
614	__no_kmsan_checks
615	static inline void get_freepointer_safe(struct* kmem_cache s, void* *object)
616	{
617	unsigned long freepointer_addr;
618	freeptr_t p;
619
620	if (!debug_pagealloc_enabled_static())
621	return get_freepointer(s, object);
622
623	object = kasan_reset_tag(addr: object);
624	freepointer_addr = (unsigned long)object + s->offset;
625	copy_from_kernel_nofault(dst: &p, src: (freeptr_t )freepointer_addr, size: sizeof*(p));
626	return freelist_ptr_decode(s, ptr: p, ptr_addr: freepointer_addr);
627	}
628
629	static inline void set_freepointer(struct kmem_cache s, void* object, void* *fp)
630	{
631	unsigned long freeptr_addr = (unsigned long)object + s->offset;
632
633	#ifdef CONFIG_SLAB_FREELIST_HARDENED
634	BUG_ON(object == fp); / naive detection of double free or corruption /
635	#endif
636
637	freeptr_addr = (unsigned long)kasan_reset_tag(addr: (void *)freeptr_addr);
638	(freeptr_t )freeptr_addr = freelist_ptr_encode(s, ptr: fp, ptr_addr: freeptr_addr);
639	}
640
641	/*
642	* See comment in calculate_sizes().
643	*/
644	static inline bool freeptr_outside_object(struct kmem_cache *s)
645	{
646	return s->offset >= s->inuse;
647	}
648
649	/*
650	* Return offset of the end of info block which is inuse + free pointer if
651	* not overlapping with object.
652	*/
653	static inline unsigned int get_info_end(struct kmem_cache *s)
654	{
655	if (freeptr_outside_object(s))
656	return s->inuse + sizeof(void *);
657	else
658	return s->inuse;
659	}
660
661	/ Loop over all objects in a slab /
662	#define for_each_object(__p, __s, __addr, __objects) \
663	for (__p = fixup_red_left(__s, __addr); \
664	__p < (__addr) + (__objects) * (__s)->size; \
665	__p += (__s)->size)
666
667	static inline unsigned int order_objects(unsigned int order, unsigned int size)
668	{
669	return ((unsigned int)PAGE_SIZE << order) / size;
670	}
671
672	static inline struct kmem_cache_order_objects oo_make(unsigned int order,
673	unsigned int size)
674	{
675	struct kmem_cache_order_objects x = {
676	(order << OO_SHIFT) + order_objects(order, size)
677	};
678
679	return x;
680	}
681
682	static inline unsigned int oo_order(struct kmem_cache_order_objects x)
683	{
684	return x.x >> OO_SHIFT;
685	}
686
687	static inline unsigned int oo_objects(struct kmem_cache_order_objects x)
688	{
689	return x.x & OO_MASK;
690	}
691
692	#ifdef CONFIG_SLUB_CPU_PARTIAL
693	static void slub_set_cpu_partial(struct kmem_cache s, unsigned* int nr_objects)
694	{
695	unsigned int nr_slabs;
696
697	s->cpu_partial = nr_objects;
698
699	/*
700	* We take the number of objects but actually limit the number of
701	* slabs on the per cpu partial list, in order to limit excessive
702	* growth of the list. For simplicity we assume that the slabs will
703	* be half-full.
704	*/
705	nr_slabs = DIV_ROUND_UP(nr_objects * `2`, oo_objects(s->oo));
706	s->cpu_partial_slabs = nr_slabs;
707	}
708
709	static inline unsigned int slub_get_cpu_partial(struct kmem_cache *s)
710	{
711	return s->cpu_partial_slabs;
712	}
713	#else
714	static inline void
715	slub_set_cpu_partial(struct kmem_cache s, unsigned* int nr_objects)
716	{
717	}
718
719	static inline unsigned int slub_get_cpu_partial(struct kmem_cache *s)
720	{
721	return `0`;
722	}
723	#endif /* CONFIG_SLUB_CPU_PARTIAL */
724
725	/*
726	* If network-based swap is enabled, slub must keep track of whether memory
727	* were allocated from pfmemalloc reserves.
728	*/
729	static inline bool slab_test_pfmemalloc(const struct slab *slab)
730	{
731	return test_bit(SL_pfmemalloc, &slab->flags.f);
732	}
733
734	static inline void slab_set_pfmemalloc(struct slab *slab)
735	{
736	set_bit(nr: SL_pfmemalloc, addr: &slab->flags.f);
737	}
738
739	static inline void __slab_clear_pfmemalloc(struct slab *slab)
740	{
741	__clear_bit(SL_pfmemalloc, &slab->flags.f);
742	}
743
744	/*
745	* Per slab locking using the pagelock
746	*/
747	static __always_inline void slab_lock(struct slab *slab)
748	{
749	bit_spin_lock(bitnum: SL_locked, addr: &slab->flags.f);
750	}
751
752	static __always_inline void slab_unlock(struct slab *slab)
753	{
754	bit_spin_unlock(bitnum: SL_locked, addr: &slab->flags.f);
755	}
756
757	static inline bool
758	__update_freelist_fast(struct slab *slab,
759	void freelist_old, unsigned* long counters_old,
760	void freelist_new, unsigned* long counters_new)
761	{
762	#ifdef system_has_freelist_aba
763	freelist_aba_t old = { .freelist = freelist_old, .counter = counters_old };
764	freelist_aba_t new = { .freelist = freelist_new, .counter = counters_new };
765
766	return try_cmpxchg_freelist(&slab->freelist_counter.full, &old.full, new.full);
767	#else
768	return false;
769	#endif
770	}
771
772	static inline bool
773	__update_freelist_slow(struct slab *slab,
774	void freelist_old, unsigned* long counters_old,
775	void freelist_new, unsigned* long counters_new)
776	{
777	bool ret = false;
778
779	slab_lock(slab);
780	if (slab->freelist == freelist_old &&
781	slab->counters == counters_old) {
782	slab->freelist = freelist_new;
783	slab->counters = counters_new;
784	ret = true;
785	}
786	slab_unlock(slab);
787
788	return ret;
789	}
790
791	/*
792	* Interrupts must be disabled (for the fallback code to work right), typically
793	* by an _irqsave() lock variant. On PREEMPT_RT the preempt_disable(), which is
794	* part of bit_spin_lock(), is sufficient because the policy is not to allow any
795	* allocation/ free operation in hardirq context. Therefore nothing can
796	* interrupt the operation.
797	*/
798	static inline bool __slab_update_freelist(struct kmem_cache s, struct* slab *slab,
799	void freelist_old, unsigned* long counters_old,
800	void freelist_new, unsigned* long counters_new,
801	const char *n)
802	{
803	bool ret;
804
805	if (USE_LOCKLESS_FAST_PATH())
806	lockdep_assert_irqs_disabled();
807
808	if (s->flags & __CMPXCHG_DOUBLE) {
809	ret = __update_freelist_fast(slab, freelist_old, counters_old,
810	freelist_new, counters_new);
811	} else {
812	ret = __update_freelist_slow(slab, freelist_old, counters_old,
813	freelist_new, counters_new);
814	}
815	if (likely(ret))
816	return true;
817
818	cpu_relax();
819	stat(s, si: CMPXCHG_DOUBLE_FAIL);
820
821	#ifdef SLUB_DEBUG_CMPXCHG
822	pr_info("%s %s: cmpxchg double redo ", n, s->name);
823	#endif
824
825	return false;
826	}
827
828	static inline bool slab_update_freelist(struct kmem_cache s, struct* slab *slab,
829	void freelist_old, unsigned* long counters_old,
830	void freelist_new, unsigned* long counters_new,
831	const char *n)
832	{
833	bool ret;
834
835	if (s->flags & __CMPXCHG_DOUBLE) {
836	ret = __update_freelist_fast(slab, freelist_old, counters_old,
837	freelist_new, counters_new);
838	} else {
839	unsigned long flags;
840
841	local_irq_save(flags);
842	ret = __update_freelist_slow(slab, freelist_old, counters_old,
843	freelist_new, counters_new);
844	local_irq_restore(flags);
845	}
846	if (likely(ret))
847	return true;
848
849	cpu_relax();
850	stat(s, si: CMPXCHG_DOUBLE_FAIL);
851
852	#ifdef SLUB_DEBUG_CMPXCHG
853	pr_info("%s %s: cmpxchg double redo ", n, s->name);
854	#endif
855
856	return false;
857	}
858
859	/*
860	* kmalloc caches has fixed sizes (mostly power of 2), and kmalloc() API
861	* family will round up the real request size to these fixed ones, so
862	* there could be an extra area than what is requested. Save the original
863	* request size in the meta data area, for better debug and sanity check.
864	*/
865	static inline void set_orig_size(struct kmem_cache *s,
866	void object, unsigned* int orig_size)
867	{
868	void *p = kasan_reset_tag(addr: object);
869
870	if (!slub_debug_orig_size(s))
871	return;
872
873	p += get_info_end(s);
874	p += sizeof(struct track) * `2`;
875
876	(unsigned* int *)p = orig_size;
877	}
878
879	static inline unsigned int get_orig_size(struct kmem_cache s, void* *object)
880	{
881	void *p = kasan_reset_tag(addr: object);
882
883	if (is_kfence_address(addr: object))
884	return kfence_ksize(addr: object);
885
886	if (!slub_debug_orig_size(s))
887	return s->object_size;
888
889	p += get_info_end(s);
890	p += sizeof(struct track) * `2`;
891
892	return (unsigned* int *)p;
893	}
894
895	#ifdef CONFIG_SLUB_DEBUG
896
897	/*
898	* For debugging context when we want to check if the struct slab pointer
899	* appears to be valid.
900	*/
901	static inline bool validate_slab_ptr(struct slab *slab)
902	{
903	return PageSlab(slab_page(slab));
904	}
905
906	static unsigned long object_map[BITS_TO_LONGS(MAX_OBJS_PER_PAGE)];
907	static DEFINE_SPINLOCK(object_map_lock);
908
909	static void __fill_map(unsigned long obj_map, struct* kmem_cache *s,
910	struct slab *slab)
911	{
912	void *addr = slab_address(slab);
913	void *p;
914
915	bitmap_zero(dst: obj_map, nbits: slab->objects);
916
917	for (p = slab->freelist; p; p = get_freepointer(s, object: p))
918	set_bit(nr: __obj_to_index(cache: s, addr, obj: p), addr: obj_map);
919	}
920
921	#if IS_ENABLED(CONFIG_KUNIT)
922	static bool slab_add_kunit_errors(void)
923	{
924	struct kunit_resource *resource;
925
926	if (!kunit_get_current_test())
927	return false;
928
929	resource = kunit_find_named_resource(current->kunit_test, "slab_errors");
930	if (!resource)
931	return false;
932
933	((int* *)resource->data)++;
934	kunit_put_resource(resource);
935	return true;
936	}
937
938	bool slab_in_kunit_test(void)
939	{
940	struct kunit_resource *resource;
941
942	if (!kunit_get_current_test())
943	return false;
944
945	resource = kunit_find_named_resource(current->kunit_test, "slab_errors");
946	if (!resource)
947	return false;
948
949	kunit_put_resource(resource);
950	return true;
951	}
952	#else
953	static inline bool slab_add_kunit_errors(void) { return false; }
954	#endif
955
956	static inline unsigned int size_from_object(struct kmem_cache *s)
957	{
958	if (s->flags & SLAB_RED_ZONE)
959	return s->size - s->red_left_pad;
960
961	return s->size;
962	}
963
964	static inline void restore_red_left(struct* kmem_cache s, void* *p)
965	{
966	if (s->flags & SLAB_RED_ZONE)
967	p -= s->red_left_pad;
968
969	return p;
970	}
971
972	/*
973	* Debug settings:
974	*/
975	#if defined(CONFIG_SLUB_DEBUG_ON)
976	static slab_flags_t slub_debug = DEBUG_DEFAULT_FLAGS;
977	#else
978	static slab_flags_t slub_debug;
979	#endif
980
981	static char *slub_debug_string;
982	static int disable_higher_order_debug;
983
984	/*
985	* slub is about to manipulate internal object metadata. This memory lies
986	* outside the range of the allocated object, so accessing it would normally
987	* be reported by kasan as a bounds error. metadata_access_enable() is used
988	* to tell kasan that these accesses are OK.
989	*/
990	static inline void metadata_access_enable(void)
991	{
992	kasan_disable_current();
993	kmsan_disable_current();
994	}
995
996	static inline void metadata_access_disable(void)
997	{
998	kmsan_enable_current();
999	kasan_enable_current();
1000	}
1001
1002	/*
1003	* Object debugging
1004	*/
1005
1006	/ Verify that a pointer has an address that is valid within a slab page /
1007	static inline int check_valid_pointer(struct kmem_cache *s,
1008	struct slab slab, void* *object)
1009	{
1010	void *base;
1011
1012	if (!object)
1013	return `1`;
1014
1015	base = slab_address(slab);
1016	object = kasan_reset_tag(addr: object);
1017	object = restore_red_left(s, p: object);
1018	if (object < base \|\| object >= base + slab->objects * s->size \|\|
1019	(object - base) % s->size) {
1020	return `0`;
1021	}
1022
1023	return `1`;
1024	}
1025
1026	static void print_section(char level, char* text, u8 addr,
1027	unsigned int length)
1028	{
1029	metadata_access_enable();
1030	print_hex_dump(level, prefix_str: text, prefix_type: DUMP_PREFIX_ADDRESS,
1031	rowsize: `16`, groupsize: `1`, buf: kasan_reset_tag(addr: (void *)addr), len: length, ascii: `1`);
1032	metadata_access_disable();
1033	}
1034
1035	static struct track get_track(struct* kmem_cache s, void* *object,
1036	enum track_item alloc)
1037	{
1038	struct track *p;
1039
1040	p = object + get_info_end(s);
1041
1042	return kasan_reset_tag(addr: p + alloc);
1043	}
1044
1045	#ifdef CONFIG_STACKDEPOT
1046	static noinline depot_stack_handle_t set_track_prepare(gfp_t gfp_flags)
1047	{
1048	depot_stack_handle_t handle;
1049	unsigned long entries[TRACK_ADDRS_COUNT];
1050	unsigned int nr_entries;
1051
1052	nr_entries = stack_trace_save(store: entries, ARRAY_SIZE(entries), skipnr: `3`);
1053	handle = stack_depot_save(entries, nr_entries, alloc_flags: gfp_flags);
1054
1055	return handle;
1056	}
1057	#else
1058	static inline depot_stack_handle_t set_track_prepare(gfp_t gfp_flags)
1059	{
1060	return `0`;
1061	}
1062	#endif
1063
1064	static void set_track_update(struct kmem_cache s, void* *object,
1065	enum track_item alloc, unsigned long addr,
1066	depot_stack_handle_t handle)
1067	{
1068	struct track *p = get_track(s, object, alloc);
1069
1070	#ifdef CONFIG_STACKDEPOT
1071	p->handle = handle;
1072	#endif
1073	p->addr = addr;
1074	p->cpu = smp_processor_id();
1075	p->pid = current->pid;
1076	p->when = jiffies;
1077	}
1078
1079	static __always_inline void set_track(struct kmem_cache s, void* *object,
1080	enum track_item alloc, unsigned long addr, gfp_t gfp_flags)
1081	{
1082	depot_stack_handle_t handle = set_track_prepare(gfp_flags);
1083
1084	set_track_update(s, object, alloc, addr, handle);
1085	}
1086
1087	static void init_tracking(struct kmem_cache s, void* *object)
1088	{
1089	struct track *p;
1090
1091	if (!(s->flags & SLAB_STORE_USER))
1092	return;
1093
1094	p = get_track(s, object, alloc: TRACK_ALLOC);
1095	memset(s: p, c: `0`, n: `2`*sizeof(struct track));
1096	}
1097
1098	static void print_track(const char s, struct* track t, unsigned* long pr_time)
1099	{
1100	depot_stack_handle_t handle __maybe_unused;
1101
1102	if (!t->addr)
1103	return;
1104
1105	pr_err("%s in %pS age=%lu cpu=%u pid=%d\n",
1106	s, (void *)t->addr, pr_time - t->when, t->cpu, t->pid);
1107	#ifdef CONFIG_STACKDEPOT
1108	handle = READ_ONCE(t->handle);
1109	if (handle)
1110	stack_depot_print(stack: handle);
1111	else
1112	pr_err("object allocation/free stack trace missing\n");
1113	#endif
1114	}
1115
1116	void print_tracking(struct kmem_cache s, void* *object)
1117	{
1118	unsigned long pr_time = jiffies;
1119	if (!(s->flags & SLAB_STORE_USER))
1120	return;
1121
1122	print_track(s: "Allocated", t: get_track(s, object, alloc: TRACK_ALLOC), pr_time);
1123	print_track(s: "Freed", t: get_track(s, object, alloc: TRACK_FREE), pr_time);
1124	}
1125
1126	static void print_slab_info(const struct slab *slab)
1127	{
1128	pr_err("Slab 0x%p objects=%u used=%u fp=0x%p flags=%pGp\n",
1129	slab, slab->objects, slab->inuse, slab->freelist,
1130	&slab->flags.f);
1131	}
1132
1133	void skip_orig_size_check(struct kmem_cache s, const* void *object)
1134	{
1135	set_orig_size(s, object: (void *)object, orig_size: s->object_size);
1136	}
1137
1138	static void __slab_bug(struct kmem_cache s, const* char *fmt, va_list argsp)
1139	{
1140	struct va_format vaf;
1141	va_list args;
1142
1143	va_copy(args, argsp);
1144	vaf.fmt = fmt;
1145	vaf.va = &args;
1146	pr_err("=============================================================================\n");
1147	pr_err("BUG %s (%s): %pV\n", s ? s->name : "<unknown>", print_tainted(), &vaf);
1148	pr_err("-----------------------------------------------------------------------------\n\n");
1149	va_end(args);
1150	}
1151
1152	static void slab_bug(struct kmem_cache s, const* char *fmt, ...)
1153	{
1154	va_list args;
1155
1156	va_start(args, fmt);
1157	__slab_bug(s, fmt, argsp: args);
1158	va_end(args);
1159	}
1160
1161	__printf(`2`, `3`)
1162	static void slab_fix(struct kmem_cache s, const* char *fmt, ...)
1163	{
1164	struct va_format vaf;
1165	va_list args;
1166
1167	if (slab_add_kunit_errors())
1168	return;
1169
1170	va_start(args, fmt);
1171	vaf.fmt = fmt;
1172	vaf.va = &args;
1173	pr_err("FIX %s: %pV\n", s->name, &vaf);
1174	va_end(args);
1175	}
1176
1177	static void print_trailer(struct kmem_cache s, struct* slab slab, u8 p)
1178	{
1179	unsigned int off; / Offset of last byte /
1180	u8 *addr = slab_address(slab);
1181
1182	print_tracking(s, object: p);
1183
1184	print_slab_info(slab);
1185
1186	pr_err("Object 0x%p @offset=%tu fp=0x%p\n\n",
1187	p, p - addr, get_freepointer(s, p));
1188
1189	if (s->flags & SLAB_RED_ZONE)
1190	print_section(KERN_ERR, text: "Redzone ", addr: p - s->red_left_pad,
1191	length: s->red_left_pad);
1192	else if (p > addr + `16`)
1193	print_section(KERN_ERR, text: "Bytes b4 ", addr: p - `16`, length: `16`);
1194
1195	print_section(KERN_ERR, text: "Object ", addr: p,
1196	min_t(unsigned int, s->object_size, PAGE_SIZE));
1197	if (s->flags & SLAB_RED_ZONE)
1198	print_section(KERN_ERR, text: "Redzone ", addr: p + s->object_size,
1199	length: s->inuse - s->object_size);
1200
1201	off = get_info_end(s);
1202
1203	if (s->flags & SLAB_STORE_USER)
1204	off += `2` * sizeof(struct track);
1205
1206	if (slub_debug_orig_size(s))
1207	off += sizeof(unsigned int);
1208
1209	off += kasan_metadata_size(cache: s, in_object: false);
1210
1211	if (off != size_from_object(s))
1212	/ Beginning of the filler is the free pointer /
1213	print_section(KERN_ERR, text: "Padding ", addr: p + off,
1214	length: size_from_object(s) - off);
1215	}
1216
1217	static void object_err(struct kmem_cache s, struct* slab *slab,
1218	u8 object, const* char *reason)
1219	{
1220	if (slab_add_kunit_errors())
1221	return;
1222
1223	slab_bug(s, fmt: reason);
1224	if (!object \|\| !check_valid_pointer(s, slab, object)) {
1225	print_slab_info(slab);
1226	pr_err("Invalid pointer 0x%p\n", object);
1227	} else {
1228	print_trailer(s, slab, p: object);
1229	}
1230	add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
1231
1232	WARN_ON(`1`);
1233	}
1234
1235	static bool freelist_corrupted(struct kmem_cache s, struct* slab *slab,
1236	void *freelist, void* *nextfree)
1237	{
1238	if ((s->flags & SLAB_CONSISTENCY_CHECKS) &&
1239	!check_valid_pointer(s, slab, object: nextfree) && freelist) {
1240	object_err(s, slab, object: *freelist, reason: "Freechain corrupt");
1241	*freelist = NULL;
1242	slab_fix(s, fmt: "Isolate corrupted freechain");
1243	return true;
1244	}
1245
1246	return false;
1247	}
1248
1249	static void __slab_err(struct slab *slab)
1250	{
1251	if (slab_in_kunit_test())
1252	return;
1253
1254	print_slab_info(slab);
1255	add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
1256
1257	WARN_ON(`1`);
1258	}
1259
1260	static __printf(`3`, `4`) void slab_err(struct kmem_cache s, struct* slab *slab,
1261	const char *fmt, ...)
1262	{
1263	va_list args;
1264
1265	if (slab_add_kunit_errors())
1266	return;
1267
1268	va_start(args, fmt);
1269	__slab_bug(s, fmt, argsp: args);
1270	va_end(args);
1271
1272	__slab_err(slab);
1273	}
1274
1275	static void init_object(struct kmem_cache s, void* *object, u8 val)
1276	{
1277	u8 *p = kasan_reset_tag(addr: object);
1278	unsigned int poison_size = s->object_size;
1279
1280	if (s->flags & SLAB_RED_ZONE) {
1281	/*
1282	* Here and below, avoid overwriting the KMSAN shadow. Keeping
1283	* the shadow makes it possible to distinguish uninit-value
1284	* from use-after-free.
1285	*/
1286	memset_no_sanitize_memory(s: p - s->red_left_pad, c: val,
1287	n: s->red_left_pad);
1288
1289	if (slub_debug_orig_size(s) && val == SLUB_RED_ACTIVE) {
1290	/*
1291	* Redzone the extra allocated space by kmalloc than
1292	* requested, and the poison size will be limited to
1293	* the original request size accordingly.
1294	*/
1295	poison_size = get_orig_size(s, object);
1296	}
1297	}
1298
1299	if (s->flags & __OBJECT_POISON) {
1300	memset_no_sanitize_memory(s: p, POISON_FREE, n: poison_size - `1`);
1301	memset_no_sanitize_memory(s: p + poison_size - `1`, POISON_END, n: `1`);
1302	}
1303
1304	if (s->flags & SLAB_RED_ZONE)
1305	memset_no_sanitize_memory(s: p + poison_size, c: val,
1306	n: s->inuse - poison_size);
1307	}
1308
1309	static void restore_bytes(struct kmem_cache s, const* char *message, u8 data,
1310	void from, void* *to)
1311	{
1312	slab_fix(s, fmt: "Restoring %s 0x%p-0x%p=0x%x", message, from, to - `1`, data);
1313	memset(s: from, c: data, n: to - from);
1314	}
1315
1316	#ifdef CONFIG_KMSAN
1317	#define pad_check_attributes noinline __no_kmsan_checks
1318	#else
1319	#define pad_check_attributes
1320	#endif
1321
1322	static pad_check_attributes int
1323	check_bytes_and_report(struct kmem_cache s, struct* slab *slab,
1324	u8 object, const* char what, u8 start, unsigned int value,
1325	unsigned int bytes, bool slab_obj_print)
1326	{
1327	u8 *fault;
1328	u8 *end;
1329	u8 *addr = slab_address(slab);
1330
1331	metadata_access_enable();
1332	fault = memchr_inv(s: kasan_reset_tag(addr: start), c: value, n: bytes);
1333	metadata_access_disable();
1334	if (!fault)
1335	return `1`;
1336
1337	end = start + bytes;
1338	while (end > fault && end[-`1`] == value)
1339	end--;
1340
1341	if (slab_add_kunit_errors())
1342	goto skip_bug_print;
1343
1344	pr_err("[%s overwritten] 0x%p-0x%p @offset=%tu. First byte 0x%x instead of 0x%x\n",
1345	what, fault, end - `1`, fault - addr, fault[`0`], value);
1346
1347	if (slab_obj_print)
1348	object_err(s, slab, object, reason: "Object corrupt");
1349
1350	skip_bug_print:
1351	restore_bytes(s, message: what, data: value, from: fault, to: end);
1352	return `0`;
1353	}
1354
1355	/*
1356	* Object layout:
1357	*
1358	* object address
1359	* Bytes of the object to be managed.
1360	* If the freepointer may overlay the object then the free
1361	* pointer is at the middle of the object.
1362	*
1363	* Poisoning uses 0x6b (POISON_FREE) and the last byte is
1364	* 0xa5 (POISON_END)
1365	*
1366	* object + s->object_size
1367	* Padding to reach word boundary. This is also used for Redzoning.
1368	* Padding is extended by another word if Redzoning is enabled and
1369	* object_size == inuse.
1370	*
1371	* We fill with 0xbb (SLUB_RED_INACTIVE) for inactive objects and with
1372	* 0xcc (SLUB_RED_ACTIVE) for objects in use.
1373	*
1374	* object + s->inuse
1375	* Meta data starts here.
1376	*
1377	* A. Free pointer (if we cannot overwrite object on free)
1378	* B. Tracking data for SLAB_STORE_USER
1379	* C. Original request size for kmalloc object (SLAB_STORE_USER enabled)
1380	* D. Padding to reach required alignment boundary or at minimum
1381	* one word if debugging is on to be able to detect writes
1382	* before the word boundary.
1383	*
1384	* Padding is done using 0x5a (POISON_INUSE)
1385	*
1386	* object + s->size
1387	* Nothing is used beyond s->size.
1388	*
1389	* If slabcaches are merged then the object_size and inuse boundaries are mostly
1390	* ignored. And therefore no slab options that rely on these boundaries
1391	* may be used with merged slabcaches.
1392	*/
1393
1394	static int check_pad_bytes(struct kmem_cache s, struct* slab slab, u8 p)
1395	{
1396	unsigned long off = get_info_end(s); / The end of info /
1397
1398	if (s->flags & SLAB_STORE_USER) {
1399	/ We also have user information there /
1400	off += `2` * sizeof(struct track);
1401
1402	if (s->flags & SLAB_KMALLOC)
1403	off += sizeof(unsigned int);
1404	}
1405
1406	off += kasan_metadata_size(cache: s, in_object: false);
1407
1408	if (size_from_object(s) == off)
1409	return `1`;
1410
1411	return check_bytes_and_report(s, slab, object: p, what: "Object padding",
1412	start: p + off, POISON_INUSE, bytes: size_from_object(s) - off, slab_obj_print: true);
1413	}
1414
1415	/ Check the pad bytes at the end of a slab page /
1416	static pad_check_attributes void
1417	slab_pad_check(struct kmem_cache s, struct* slab *slab)
1418	{
1419	u8 *start;
1420	u8 *fault;
1421	u8 *end;
1422	u8 *pad;
1423	int length;
1424	int remainder;
1425
1426	if (!(s->flags & SLAB_POISON))
1427	return;
1428
1429	start = slab_address(slab);
1430	length = slab_size(slab);
1431	end = start + length;
1432	remainder = length % s->size;
1433	if (!remainder)
1434	return;
1435
1436	pad = end - remainder;
1437	metadata_access_enable();
1438	fault = memchr_inv(s: kasan_reset_tag(addr: pad), POISON_INUSE, n: remainder);
1439	metadata_access_disable();
1440	if (!fault)
1441	return;
1442	while (end > fault && end[-`1`] == POISON_INUSE)
1443	end--;
1444
1445	slab_bug(s, fmt: "Padding overwritten. 0x%p-0x%p @offset=%tu",
1446	fault, end - `1`, fault - start);
1447	print_section(KERN_ERR, text: "Padding ", addr: pad, length: remainder);
1448	__slab_err(slab);
1449
1450	restore_bytes(s, message: "slab padding", POISON_INUSE, from: fault, to: end);
1451	}
1452
1453	static int check_object(struct kmem_cache s, struct* slab *slab,
1454	void *object, u8 val)
1455	{
1456	u8 *p = object;
1457	u8 *endobject = object + s->object_size;
1458	unsigned int orig_size, kasan_meta_size;
1459	int ret = `1`;
1460
1461	if (s->flags & SLAB_RED_ZONE) {
1462	if (!check_bytes_and_report(s, slab, object, what: "Left Redzone",
1463	start: object - s->red_left_pad, value: val, bytes: s->red_left_pad, slab_obj_print: ret))
1464	ret = `0`;
1465
1466	if (!check_bytes_and_report(s, slab, object, what: "Right Redzone",
1467	start: endobject, value: val, bytes: s->inuse - s->object_size, slab_obj_print: ret))
1468	ret = `0`;
1469
1470	if (slub_debug_orig_size(s) && val == SLUB_RED_ACTIVE) {
1471	orig_size = get_orig_size(s, object);
1472
1473	if (s->object_size > orig_size &&
1474	!check_bytes_and_report(s, slab, object,
1475	what: "kmalloc Redzone", start: p + orig_size,
1476	value: val, bytes: s->object_size - orig_size, slab_obj_print: ret)) {
1477	ret = `0`;
1478	}
1479	}
1480	} else {
1481	if ((s->flags & SLAB_POISON) && s->object_size < s->inuse) {
1482	if (!check_bytes_and_report(s, slab, object: p, what: "Alignment padding",
1483	start: endobject, POISON_INUSE,
1484	bytes: s->inuse - s->object_size, slab_obj_print: ret))
1485	ret = `0`;
1486	}
1487	}
1488
1489	if (s->flags & SLAB_POISON) {
1490	if (val != SLUB_RED_ACTIVE && (s->flags & __OBJECT_POISON)) {
1491	/*
1492	* KASAN can save its free meta data inside of the
1493	* object at offset 0. Thus, skip checking the part of
1494	* the redzone that overlaps with the meta data.
1495	*/
1496	kasan_meta_size = kasan_metadata_size(cache: s, in_object: true);
1497	if (kasan_meta_size < s->object_size - `1` &&
1498	!check_bytes_and_report(s, slab, object: p, what: "Poison",
1499	start: p + kasan_meta_size, POISON_FREE,
1500	bytes: s->object_size - kasan_meta_size - `1`, slab_obj_print: ret))
1501	ret = `0`;
1502	if (kasan_meta_size < s->object_size &&
1503	!check_bytes_and_report(s, slab, object: p, what: "End Poison",
1504	start: p + s->object_size - `1`, POISON_END, bytes: `1`, slab_obj_print: ret))
1505	ret = `0`;
1506	}
1507	/*
1508	* check_pad_bytes cleans up on its own.
1509	*/
1510	if (!check_pad_bytes(s, slab, p))
1511	ret = `0`;
1512	}
1513
1514	/*
1515	* Cannot check freepointer while object is allocated if
1516	* object and freepointer overlap.
1517	*/
1518	if ((freeptr_outside_object(s) \|\| val != SLUB_RED_ACTIVE) &&
1519	!check_valid_pointer(s, slab, object: get_freepointer(s, object: p))) {
1520	object_err(s, slab, object: p, reason: "Freepointer corrupt");
1521	/*
1522	* No choice but to zap it and thus lose the remainder
1523	* of the free objects in this slab. May cause
1524	* another error because the object count is now wrong.
1525	*/
1526	set_freepointer(s, object: p, NULL);
1527	ret = `0`;
1528	}
1529
1530	return ret;
1531	}
1532
1533	/*
1534	* Checks if the slab state looks sane. Assumes the struct slab pointer
1535	* was either obtained in a way that ensures it's valid, or validated
1536	* by validate_slab_ptr()
1537	*/
1538	static int check_slab(struct kmem_cache s, struct* slab *slab)
1539	{
1540	int maxobj;
1541
1542	maxobj = order_objects(order: slab_order(slab), size: s->size);
1543	if (slab->objects > maxobj) {
1544	slab_err(s, slab, fmt: "objects %u > max %u",
1545	slab->objects, maxobj);
1546	return `0`;
1547	}
1548	if (slab->inuse > slab->objects) {
1549	slab_err(s, slab, fmt: "inuse %u > max %u",
1550	slab->inuse, slab->objects);
1551	return `0`;
1552	}
1553	if (slab->frozen) {
1554	slab_err(s, slab, fmt: "Slab disabled since SLUB metadata consistency check failed");
1555	return `0`;
1556	}
1557
1558	/ Slab_pad_check fixes things up after itself /
1559	slab_pad_check(s, slab);
1560	return `1`;
1561	}
1562
1563	/*
1564	* Determine if a certain object in a slab is on the freelist. Must hold the
1565	* slab lock to guarantee that the chains are in a consistent state.
1566	*/
1567	static bool on_freelist(struct kmem_cache s, struct* slab slab, void* *search)
1568	{
1569	int nr = `0`;
1570	void *fp;
1571	void *object = NULL;
1572	int max_objects;
1573
1574	fp = slab->freelist;
1575	while (fp && nr <= slab->objects) {
1576	if (fp == search)
1577	return true;
1578	if (!check_valid_pointer(s, slab, object: fp)) {
1579	if (object) {
1580	object_err(s, slab, object,
1581	reason: "Freechain corrupt");
1582	set_freepointer(s, object, NULL);
1583	break;
1584	} else {
1585	slab_err(s, slab, fmt: "Freepointer corrupt");
1586	slab->freelist = NULL;
1587	slab->inuse = slab->objects;
1588	slab_fix(s, fmt: "Freelist cleared");
1589	return false;
1590	}
1591	}
1592	object = fp;
1593	fp = get_freepointer(s, object);
1594	nr++;
1595	}
1596
1597	if (nr > slab->objects) {
1598	slab_err(s, slab, fmt: "Freelist cycle detected");
1599	slab->freelist = NULL;
1600	slab->inuse = slab->objects;
1601	slab_fix(s, fmt: "Freelist cleared");
1602	return false;
1603	}
1604
1605	max_objects = order_objects(order: slab_order(slab), size: s->size);
1606	if (max_objects > MAX_OBJS_PER_PAGE)
1607	max_objects = MAX_OBJS_PER_PAGE;
1608
1609	if (slab->objects != max_objects) {
1610	slab_err(s, slab, fmt: "Wrong number of objects. Found %d but should be %d",
1611	slab->objects, max_objects);
1612	slab->objects = max_objects;
1613	slab_fix(s, fmt: "Number of objects adjusted");
1614	}
1615	if (slab->inuse != slab->objects - nr) {
1616	slab_err(s, slab, fmt: "Wrong object count. Counter is %d but counted were %d",
1617	slab->inuse, slab->objects - nr);
1618	slab->inuse = slab->objects - nr;
1619	slab_fix(s, fmt: "Object count adjusted");
1620	}
1621	return search == NULL;
1622	}
1623
1624	static void trace(struct kmem_cache s, struct* slab slab, void* *object,
1625	int alloc)
1626	{
1627	if (s->flags & SLAB_TRACE) {
1628	pr_info("TRACE %s %s 0x%p inuse=%d fp=0x%p\n",
1629	s->name,
1630	alloc ? "alloc" : "free",
1631	object, slab->inuse,
1632	slab->freelist);
1633
1634	if (!alloc)
1635	print_section(KERN_INFO, text: "Object ", addr: (void *)object,
1636	length: s->object_size);
1637
1638	dump_stack();
1639	}
1640	}
1641
1642	/*
1643	* Tracking of fully allocated slabs for debugging purposes.
1644	*/
1645	static void add_full(struct kmem_cache *s,
1646	struct kmem_cache_node n, struct* slab *slab)
1647	{
1648	if (!(s->flags & SLAB_STORE_USER))
1649	return;
1650
1651	lockdep_assert_held(&n->list_lock);
1652	list_add(new: &slab->slab_list, head: &n->full);
1653	}
1654
1655	static void remove_full(struct kmem_cache s, struct* kmem_cache_node n, struct* slab *slab)
1656	{
1657	if (!(s->flags & SLAB_STORE_USER))
1658	return;
1659
1660	lockdep_assert_held(&n->list_lock);
1661	list_del(entry: &slab->slab_list);
1662	}
1663
1664	static inline unsigned long node_nr_slabs(struct kmem_cache_node *n)
1665	{
1666	return atomic_long_read(v: &n->nr_slabs);
1667	}
1668
1669	static inline void inc_slabs_node(struct kmem_cache s, int* node, int objects)
1670	{
1671	struct kmem_cache_node *n = get_node(s, node);
1672
1673	atomic_long_inc(v: &n->nr_slabs);
1674	atomic_long_add(i: objects, v: &n->total_objects);
1675	}
1676	static inline void dec_slabs_node(struct kmem_cache s, int* node, int objects)
1677	{
1678	struct kmem_cache_node *n = get_node(s, node);
1679
1680	atomic_long_dec(v: &n->nr_slabs);
1681	atomic_long_sub(i: objects, v: &n->total_objects);
1682	}
1683
1684	/ Object debug checks for alloc/free paths /
1685	static void setup_object_debug(struct kmem_cache s, void* *object)
1686	{
1687	if (!kmem_cache_debug_flags(s, SLAB_STORE_USER\|SLAB_RED_ZONE\|__OBJECT_POISON))
1688	return;
1689
1690	init_object(s, object, SLUB_RED_INACTIVE);
1691	init_tracking(s, object);
1692	}
1693
1694	static
1695	void setup_slab_debug(struct kmem_cache s, struct* slab slab, void* *addr)
1696	{
1697	if (!kmem_cache_debug_flags(s, SLAB_POISON))
1698	return;
1699
1700	metadata_access_enable();
1701	memset(s: kasan_reset_tag(addr), POISON_INUSE, n: slab_size(slab));
1702	metadata_access_disable();
1703	}
1704
1705	static inline int alloc_consistency_checks(struct kmem_cache *s,
1706	struct slab slab, void* *object)
1707	{
1708	if (!check_slab(s, slab))
1709	return `0`;
1710
1711	if (!check_valid_pointer(s, slab, object)) {
1712	object_err(s, slab, object, reason: "Freelist Pointer check fails");
1713	return `0`;
1714	}
1715
1716	if (!check_object(s, slab, object, SLUB_RED_INACTIVE))
1717	return `0`;
1718
1719	return `1`;
1720	}
1721
1722	static noinline bool alloc_debug_processing(struct kmem_cache *s,
1723	struct slab slab, void* object, int* orig_size)
1724	{
1725	if (s->flags & SLAB_CONSISTENCY_CHECKS) {
1726	if (!alloc_consistency_checks(s, slab, object))
1727	goto bad;
1728	}
1729
1730	/ Success. Perform special debug activities for allocs /
1731	trace(s, slab, object, alloc: `1`);
1732	set_orig_size(s, object, orig_size);
1733	init_object(s, object, SLUB_RED_ACTIVE);
1734	return true;
1735
1736	bad:
1737	/*
1738	* Let's do the best we can to avoid issues in the future. Marking all
1739	* objects as used avoids touching the remaining objects.
1740	*/
1741	slab_fix(s, fmt: "Marking all objects used");
1742	slab->inuse = slab->objects;
1743	slab->freelist = NULL;
1744	slab->frozen = `1`; / mark consistency-failed slab as frozen /
1745
1746	return false;
1747	}
1748
1749	static inline int free_consistency_checks(struct kmem_cache *s,
1750	struct slab slab, void* object, unsigned* long addr)
1751	{
1752	if (!check_valid_pointer(s, slab, object)) {
1753	slab_err(s, slab, fmt: "Invalid object pointer 0x%p", object);
1754	return `0`;
1755	}
1756
1757	if (on_freelist(s, slab, search: object)) {
1758	object_err(s, slab, object, reason: "Object already free");
1759	return `0`;
1760	}
1761
1762	if (!check_object(s, slab, object, SLUB_RED_ACTIVE))
1763	return `0`;
1764
1765	if (unlikely(s != slab->slab_cache)) {
1766	if (!slab->slab_cache) {
1767	slab_err(NULL, slab, fmt: "No slab cache for object 0x%p",
1768	object);
1769	} else {
1770	object_err(s, slab, object,
1771	reason: "page slab pointer corrupt.");
1772	}
1773	return `0`;
1774	}
1775	return `1`;
1776	}
1777
1778	/*
1779	* Parse a block of slab_debug options. Blocks are delimited by ';'
1780	*
1781	* @str: start of block
1782	* @flags: returns parsed flags, or DEBUG_DEFAULT_FLAGS if none specified
1783	* @slabs: return start of list of slabs, or NULL when there's no list
1784	* @init: assume this is initial parsing and not per-kmem-create parsing
1785	*
1786	* returns the start of next block if there's any, or NULL
1787	*/
1788	static char *
1789	parse_slub_debug_flags(char str, slab_flags_t flags, char **slabs, bool init)
1790	{
1791	bool higher_order_disable = false;
1792
1793	/ Skip any completely empty blocks /
1794	while (str && str == `';'`)
1795	str++;
1796
1797	if (*str == `','`) {
1798	/*
1799	* No options but restriction on slabs. This means full
1800	* debugging for slabs matching a pattern.
1801	*/
1802	*flags = DEBUG_DEFAULT_FLAGS;
1803	goto check_slabs;
1804	}
1805	*flags = `0`;
1806
1807	/ Determine which debug features should be switched on /
1808	for (; str && str != `','` && *str != `';'`; str++) {
1809	switch (tolower(*str)) {
1810	case `'-'`:
1811	*flags = `0`;
1812	break;
1813	case `'f'`:
1814	*flags \|= SLAB_CONSISTENCY_CHECKS;
1815	break;
1816	case `'z'`:
1817	*flags \|= SLAB_RED_ZONE;
1818	break;
1819	case `'p'`:
1820	*flags \|= SLAB_POISON;
1821	break;
1822	case `'u'`:
1823	*flags \|= SLAB_STORE_USER;
1824	break;
1825	case `'t'`:
1826	*flags \|= SLAB_TRACE;
1827	break;
1828	case `'a'`:
1829	*flags \|= SLAB_FAILSLAB;
1830	break;
1831	case `'o'`:
1832	/*
1833	* Avoid enabling debugging on caches if its minimum
1834	* order would increase as a result.
1835	*/
1836	higher_order_disable = true;
1837	break;
1838	default:
1839	if (init)
1840	pr_err("slab_debug option '%c' unknown. skipped\n", *str);
1841	}
1842	}
1843	check_slabs:
1844	if (*str == `','`)
1845	*slabs = ++str;
1846	else
1847	*slabs = NULL;
1848
1849	/ Skip over the slab list /
1850	while (str && str != `';'`)
1851	str++;
1852
1853	/ Skip any completely empty blocks /
1854	while (str && str == `';'`)
1855	str++;
1856
1857	if (init && higher_order_disable)
1858	disable_higher_order_debug = `1`;
1859
1860	if (*str)
1861	return str;
1862	else
1863	return NULL;
1864	}
1865
1866	static int __init setup_slub_debug(char *str)
1867	{
1868	slab_flags_t flags;
1869	slab_flags_t global_flags;
1870	char *saved_str;
1871	char *slab_list;
1872	bool global_slub_debug_changed = false;
1873	bool slab_list_specified = false;
1874
1875	global_flags = DEBUG_DEFAULT_FLAGS;
1876	if (str++ != `'='` \|\| !str)
1877	/*
1878	* No options specified. Switch on full debugging.
1879	*/
1880	goto out;
1881
1882	saved_str = str;
1883	while (str) {
1884	str = parse_slub_debug_flags(str, flags: &flags, slabs: &slab_list, init: true);
1885
1886	if (!slab_list) {
1887	global_flags = flags;
1888	global_slub_debug_changed = true;
1889	} else {
1890	slab_list_specified = true;
1891	if (flags & SLAB_STORE_USER)
1892	stack_depot_request_early_init();
1893	}
1894	}
1895
1896	/*
1897	* For backwards compatibility, a single list of flags with list of
1898	* slabs means debugging is only changed for those slabs, so the global
1899	* slab_debug should be unchanged (0 or DEBUG_DEFAULT_FLAGS, depending
1900	* on CONFIG_SLUB_DEBUG_ON). We can extended that to multiple lists as
1901	* long as there is no option specifying flags without a slab list.
1902	*/
1903	if (slab_list_specified) {
1904	if (!global_slub_debug_changed)
1905	global_flags = slub_debug;
1906	slub_debug_string = saved_str;
1907	}
1908	out:
1909	slub_debug = global_flags;
1910	if (slub_debug & SLAB_STORE_USER)
1911	stack_depot_request_early_init();
1912	if (slub_debug != `0` \|\| slub_debug_string)
1913	static_branch_enable(&slub_debug_enabled);
1914	else
1915	static_branch_disable(&slub_debug_enabled);
1916	if ((static_branch_unlikely(&init_on_alloc) \|\|
1917	static_branch_unlikely(&init_on_free)) &&
1918	(slub_debug & SLAB_POISON))
1919	pr_info("mem auto-init: SLAB_POISON will take precedence over init_on_alloc/init_on_free\n");
1920	return `1`;
1921	}
1922
1923	__setup("slab_debug", setup_slub_debug);
1924	__setup_param("slub_debug", slub_debug, setup_slub_debug, `0`);
1925
1926	/*
1927	* kmem_cache_flags - apply debugging options to the cache
1928	* @flags: flags to set
1929	* @name: name of the cache
1930	*
1931	* Debug option(s) are applied to @flags. In addition to the debug
1932	* option(s), if a slab name (or multiple) is specified i.e.
1933	* slab_debug=<Debug-Options>,<slab name1>,<slab name2> ...
1934	* then only the select slabs will receive the debug option(s).
1935	*/
1936	slab_flags_t kmem_cache_flags(slab_flags_t flags, const char *name)
1937	{
1938	char *iter;
1939	size_t len;
1940	char *next_block;
1941	slab_flags_t block_flags;
1942	slab_flags_t slub_debug_local = slub_debug;
1943
1944	if (flags & SLAB_NO_USER_FLAGS)
1945	return flags;
1946
1947	/*
1948	* If the slab cache is for debugging (e.g. kmemleak) then
1949	* don't store user (stack trace) information by default,
1950	* but let the user enable it via the command line below.
1951	*/
1952	if (flags & SLAB_NOLEAKTRACE)
1953	slub_debug_local &= ~SLAB_STORE_USER;
1954
1955	len = strlen(name);
1956	next_block = slub_debug_string;
1957	/ Go through all blocks of debug options, see if any matches our slab's name /
1958	while (next_block) {
1959	next_block = parse_slub_debug_flags(str: next_block, flags: &block_flags, slabs: &iter, init: false);
1960	if (!iter)
1961	continue;
1962	/ Found a block that has a slab list, search it /
1963	while (*iter) {
1964	char end, glob;
1965	size_t cmplen;
1966
1967	end = strchrnul(iter, `','`);
1968	if (next_block && next_block < end)
1969	end = next_block - `1`;
1970
1971	glob = strnchr(iter, end - iter, `'*'`);
1972	if (glob)
1973	cmplen = glob - iter;
1974	else
1975	cmplen = max_t(size_t, len, (end - iter));
1976
1977	if (!strncmp(name, iter, cmplen)) {
1978	flags \|= block_flags;
1979	return flags;
1980	}
1981
1982	if (!end \|\| end == `';'`)
1983	break;
1984	iter = end + `1`;
1985	}
1986	}
1987
1988	return flags \| slub_debug_local;
1989	}
1990	#else /* !CONFIG_SLUB_DEBUG */
1991	static inline void setup_object_debug(struct kmem_cache s, void* *object) {}
1992	static inline
1993	void setup_slab_debug(struct kmem_cache s, struct* slab slab, void* *addr) {}
1994
1995	static inline bool alloc_debug_processing(struct kmem_cache *s,
1996	struct slab slab, void* object, int* orig_size) { return true; }
1997
1998	static inline bool free_debug_processing(struct kmem_cache *s,
1999	struct slab slab, void* head, void* tail, int* *bulk_cnt,
2000	unsigned long addr, depot_stack_handle_t handle) { return true; }
2001
2002	static inline void slab_pad_check(struct kmem_cache s, struct* slab *slab) {}
2003	static inline int check_object(struct kmem_cache s, struct* slab *slab,
2004	void object, u8 val) { return* `1`; }
2005	static inline depot_stack_handle_t set_track_prepare(gfp_t gfp_flags) { return `0`; }
2006	static inline void set_track(struct kmem_cache s, void* *object,
2007	enum track_item alloc, unsigned long addr, gfp_t gfp_flags) {}
2008	static inline void add_full(struct kmem_cache s, struct* kmem_cache_node *n,
2009	struct slab *slab) {}
2010	static inline void remove_full(struct kmem_cache s, struct* kmem_cache_node *n,
2011	struct slab *slab) {}
2012	slab_flags_t kmem_cache_flags(slab_flags_t flags, const char *name)
2013	{
2014	return flags;
2015	}
2016	#define slub_debug 0
2017
2018	#define disable_higher_order_debug 0
2019
2020	static inline unsigned long node_nr_slabs(struct kmem_cache_node *n)
2021	{ return `0`; }
2022	static inline void inc_slabs_node(struct kmem_cache s, int* node,
2023	int objects) {}
2024	static inline void dec_slabs_node(struct kmem_cache s, int* node,
2025	int objects) {}
2026	#ifndef CONFIG_SLUB_TINY
2027	static bool freelist_corrupted(struct kmem_cache s, struct* slab *slab,
2028	void *freelist, void* *nextfree)
2029	{
2030	return false;
2031	}
2032	#endif
2033	#endif /* CONFIG_SLUB_DEBUG */
2034
2035	#ifdef CONFIG_SLAB_OBJ_EXT
2036
2037	#ifdef CONFIG_MEM_ALLOC_PROFILING_DEBUG
2038
2039	static inline void mark_objexts_empty(struct slabobj_ext *obj_exts)
2040	{
2041	struct slabobj_ext *slab_exts;
2042	struct slab *obj_exts_slab;
2043
2044	obj_exts_slab = virt_to_slab(obj_exts);
2045	slab_exts = slab_obj_exts(obj_exts_slab);
2046	if (slab_exts) {
2047	unsigned int offs = obj_to_index(obj_exts_slab->slab_cache,
2048	obj_exts_slab, obj_exts);
2049	/ codetag should be NULL /
2050	WARN_ON(slab_exts[offs].ref.ct);
2051	set_codetag_empty(&slab_exts[offs].ref);
2052	}
2053	}
2054
2055	static inline void mark_failed_objexts_alloc(struct slab *slab)
2056	{
2057	slab->obj_exts = OBJEXTS_ALLOC_FAIL;
2058	}
2059
2060	static inline void handle_failed_objexts_alloc(unsigned long obj_exts,
2061	struct slabobj_ext vec, unsigned* int objects)
2062	{
2063	/*
2064	* If vector previously failed to allocate then we have live
2065	* objects with no tag reference. Mark all references in this
2066	* vector as empty to avoid warnings later on.
2067	*/
2068	if (obj_exts == OBJEXTS_ALLOC_FAIL) {
2069	unsigned int i;
2070
2071	for (i = `0`; i < objects; i++)
2072	set_codetag_empty(&vec[i].ref);
2073	}
2074	}
2075
2076	#else /* CONFIG_MEM_ALLOC_PROFILING_DEBUG */
2077
2078	static inline void mark_objexts_empty(struct slabobj_ext *obj_exts) {}
2079	static inline void mark_failed_objexts_alloc(struct slab *slab) {}
2080	static inline void handle_failed_objexts_alloc(unsigned long obj_exts,
2081	struct slabobj_ext vec, unsigned* int objects) {}
2082
2083	#endif /* CONFIG_MEM_ALLOC_PROFILING_DEBUG */
2084
2085	/*
2086	* The allocated objcg pointers array is not accounted directly.
2087	* Moreover, it should not come from DMA buffer and is not readily
2088	* reclaimable. So those GFP bits should be masked off.
2089	*/
2090	#define OBJCGS_CLEAR_MASK (__GFP_DMA \| __GFP_RECLAIMABLE \| \
2091	__GFP_ACCOUNT \| __GFP_NOFAIL)
2092
2093	static inline void init_slab_obj_exts(struct slab *slab)
2094	{
2095	slab->obj_exts = `0`;
2096	}
2097
2098	int alloc_slab_obj_exts(struct slab slab, struct* kmem_cache *s,
2099	gfp_t gfp, bool new_slab)
2100	{
2101	bool allow_spin = gfpflags_allow_spinning(gfp);
2102	unsigned int objects = objs_per_slab(s, slab);
2103	unsigned long new_exts;
2104	unsigned long old_exts;
2105	struct slabobj_ext *vec;
2106
2107	gfp &= ~OBJCGS_CLEAR_MASK;
2108	/ Prevent recursive extension vector allocation /
2109	gfp \|= __GFP_NO_OBJ_EXT;
2110
2111	/*
2112	* Note that allow_spin may be false during early boot and its
2113	* restricted GFP_BOOT_MASK. Due to kmalloc_nolock() only supporting
2114	* architectures with cmpxchg16b, early obj_exts will be missing for
2115	* very early allocations on those.
2116	*/
2117	if (unlikely(!allow_spin)) {
2118	size_t sz = objects * sizeof(struct slabobj_ext);
2119
2120	vec = kmalloc_nolock(sz, __GFP_ZERO \| __GFP_NO_OBJ_EXT,
2121	slab_nid(slab));
2122	} else {
2123	vec = kcalloc_node(objects, sizeof(struct slabobj_ext), gfp,
2124	slab_nid(slab));
2125	}
2126	if (!vec) {
2127	/ Mark vectors which failed to allocate /
2128	mark_failed_objexts_alloc(slab);
2129
2130	return -ENOMEM;
2131	}
2132
2133	new_exts = (unsigned long)vec;
2134	if (unlikely(!allow_spin))
2135	new_exts \|= OBJEXTS_NOSPIN_ALLOC;
2136	#ifdef CONFIG_MEMCG
2137	new_exts \|= MEMCG_DATA_OBJEXTS;
2138	#endif
2139	old_exts = READ_ONCE(slab->obj_exts);
2140	handle_failed_objexts_alloc(old_exts, vec, objects);
2141	if (new_slab) {
2142	/*
2143	* If the slab is brand new and nobody can yet access its
2144	* obj_exts, no synchronization is required and obj_exts can
2145	* be simply assigned.
2146	*/
2147	slab->obj_exts = new_exts;
2148	} else if ((old_exts & ~OBJEXTS_FLAGS_MASK) \|\|
2149	cmpxchg(&slab->obj_exts, old_exts, new_exts) != old_exts) {
2150	/*
2151	* If the slab is already in use, somebody can allocate and
2152	* assign slabobj_exts in parallel. In this case the existing
2153	* objcg vector should be reused.
2154	*/
2155	mark_objexts_empty(vec);
2156	if (unlikely(!allow_spin))
2157	kfree_nolock(vec);
2158	else
2159	kfree(vec);
2160	return `0`;
2161	}
2162
2163	if (allow_spin)
2164	kmemleak_not_leak(vec);
2165	return `0`;
2166	}
2167
2168	static inline void free_slab_obj_exts(struct slab *slab)
2169	{
2170	struct slabobj_ext *obj_exts;
2171
2172	obj_exts = slab_obj_exts(slab);
2173	if (!obj_exts)
2174	return;
2175
2176	/*
2177	* obj_exts was created with __GFP_NO_OBJ_EXT flag, therefore its
2178	* corresponding extension will be NULL. alloc_tag_sub() will throw a
2179	* warning if slab has extensions but the extension of an object is
2180	* NULL, therefore replace NULL with CODETAG_EMPTY to indicate that
2181	* the extension for obj_exts is expected to be NULL.
2182	*/
2183	mark_objexts_empty(obj_exts);
2184	if (unlikely(READ_ONCE(slab->obj_exts) & OBJEXTS_NOSPIN_ALLOC))
2185	kfree_nolock(obj_exts);
2186	else
2187	kfree(obj_exts);
2188	slab->obj_exts = `0`;
2189	}
2190
2191	#else /* CONFIG_SLAB_OBJ_EXT */
2192
2193	static inline void init_slab_obj_exts(struct slab *slab)
2194	{
2195	}
2196
2197	static int alloc_slab_obj_exts(struct slab slab, struct* kmem_cache *s,
2198	gfp_t gfp, bool new_slab)
2199	{
2200	return `0`;
2201	}
2202
2203	static inline void free_slab_obj_exts(struct slab *slab)
2204	{
2205	}
2206
2207	#endif /* CONFIG_SLAB_OBJ_EXT */
2208
2209	#ifdef CONFIG_MEM_ALLOC_PROFILING
2210
2211	static inline struct slabobj_ext *
2212	prepare_slab_obj_exts_hook(struct kmem_cache s, gfp_t flags, void* *p)
2213	{
2214	struct slab *slab;
2215
2216	slab = virt_to_slab(p);
2217	if (!slab_obj_exts(slab) &&
2218	alloc_slab_obj_exts(slab, s, flags, false)) {
2219	pr_warn_once("%s, %s: Failed to create slab extension vector!\n",
2220	__func__, s->name);
2221	return NULL;
2222	}
2223
2224	return slab_obj_exts(slab) + obj_to_index(s, slab, p);
2225	}
2226
2227	/ Should be called only if mem_alloc_profiling_enabled() /
2228	static noinline void
2229	__alloc_tagging_slab_alloc_hook(struct kmem_cache s, void* *object, gfp_t flags)
2230	{
2231	struct slabobj_ext *obj_exts;
2232
2233	if (!object)
2234	return;
2235
2236	if (s->flags & (SLAB_NO_OBJ_EXT \| SLAB_NOLEAKTRACE))
2237	return;
2238
2239	if (flags & __GFP_NO_OBJ_EXT)
2240	return;
2241
2242	obj_exts = prepare_slab_obj_exts_hook(s, flags, object);
2243	/*
2244	* Currently obj_exts is used only for allocation profiling.
2245	* If other users appear then mem_alloc_profiling_enabled()
2246	* check should be added before alloc_tag_add().
2247	*/
2248	if (likely(obj_exts))
2249	alloc_tag_add(&obj_exts->ref, current->alloc_tag, s->size);
2250	else
2251	alloc_tag_set_inaccurate(current->alloc_tag);
2252	}
2253
2254	static inline void
2255	alloc_tagging_slab_alloc_hook(struct kmem_cache s, void* *object, gfp_t flags)
2256	{
2257	if (mem_alloc_profiling_enabled())
2258	__alloc_tagging_slab_alloc_hook(s, object, flags);
2259	}
2260
2261	/ Should be called only if mem_alloc_profiling_enabled() /
2262	static noinline void
2263	__alloc_tagging_slab_free_hook(struct kmem_cache s, struct* slab slab, void* **p,
2264	int objects)
2265	{
2266	struct slabobj_ext *obj_exts;
2267	int i;
2268
2269	/ slab->obj_exts might not be NULL if it was created for MEMCG accounting. /
2270	if (s->flags & (SLAB_NO_OBJ_EXT \| SLAB_NOLEAKTRACE))
2271	return;
2272
2273	obj_exts = slab_obj_exts(slab);
2274	if (!obj_exts)
2275	return;
2276
2277	for (i = `0`; i < objects; i++) {
2278	unsigned int off = obj_to_index(s, slab, p[i]);
2279
2280	alloc_tag_sub(&obj_exts[off].ref, s->size);
2281	}
2282	}
2283
2284	static inline void
2285	alloc_tagging_slab_free_hook(struct kmem_cache s, struct* slab slab, void* **p,
2286	int objects)
2287	{
2288	if (mem_alloc_profiling_enabled())
2289	__alloc_tagging_slab_free_hook(s, slab, p, objects);
2290	}
2291
2292	#else /* CONFIG_MEM_ALLOC_PROFILING */
2293
2294	static inline void
2295	alloc_tagging_slab_alloc_hook(struct kmem_cache s, void* *object, gfp_t flags)
2296	{
2297	}
2298
2299	static inline void
2300	alloc_tagging_slab_free_hook(struct kmem_cache s, struct* slab slab, void* **p,
2301	int objects)
2302	{
2303	}
2304
2305	#endif /* CONFIG_MEM_ALLOC_PROFILING */
2306
2307
2308	#ifdef CONFIG_MEMCG
2309
2310	static void memcg_alloc_abort_single(struct kmem_cache s, void* *object);
2311
2312	static __fastpath_inline
2313	bool memcg_slab_post_alloc_hook(struct kmem_cache s, struct* list_lru *lru,
2314	gfp_t flags, size_t size, void **p)
2315	{
2316	if (likely(!memcg_kmem_online()))
2317	return true;
2318
2319	if (likely(!(flags & __GFP_ACCOUNT) && !(s->flags & SLAB_ACCOUNT)))
2320	return true;
2321
2322	if (likely(__memcg_slab_post_alloc_hook(s, lru, flags, size, p)))
2323	return true;
2324
2325	if (likely(size == `1`)) {
2326	memcg_alloc_abort_single(s, *p);
2327	*p = NULL;
2328	} else {
2329	kmem_cache_free_bulk(s, size, p);
2330	}
2331
2332	return false;
2333	}
2334
2335	static __fastpath_inline
2336	void memcg_slab_free_hook(struct kmem_cache s, struct* slab slab, void* **p,
2337	int objects)
2338	{
2339	struct slabobj_ext *obj_exts;
2340
2341	if (!memcg_kmem_online())
2342	return;
2343
2344	obj_exts = slab_obj_exts(slab);
2345	if (likely(!obj_exts))
2346	return;
2347
2348	__memcg_slab_free_hook(s, slab, p, objects, obj_exts);
2349	}
2350
2351	static __fastpath_inline
2352	bool memcg_slab_post_charge(void *p, gfp_t flags)
2353	{
2354	struct slabobj_ext *slab_exts;
2355	struct kmem_cache *s;
2356	struct folio *folio;
2357	struct slab *slab;
2358	unsigned long off;
2359
2360	folio = virt_to_folio(p);
2361	if (!folio_test_slab(folio)) {
2362	int size;
2363
2364	if (folio_memcg_kmem(folio))
2365	return true;
2366
2367	if (__memcg_kmem_charge_page(folio_page(folio, `0`), flags,
2368	folio_order(folio)))
2369	return false;
2370
2371	/*
2372	* This folio has already been accounted in the global stats but
2373	* not in the memcg stats. So, subtract from the global and use
2374	* the interface which adds to both global and memcg stats.
2375	*/
2376	size = folio_size(folio);
2377	node_stat_mod_folio(folio, NR_SLAB_UNRECLAIMABLE_B, -size);
2378	lruvec_stat_mod_folio(folio, NR_SLAB_UNRECLAIMABLE_B, size);
2379	return true;
2380	}
2381
2382	slab = folio_slab(folio);
2383	s = slab->slab_cache;
2384
2385	/*
2386	* Ignore KMALLOC_NORMAL cache to avoid possible circular dependency
2387	* of slab_obj_exts being allocated from the same slab and thus the slab
2388	* becoming effectively unfreeable.
2389	*/
2390	if (is_kmalloc_normal(s))
2391	return true;
2392
2393	/ Ignore already charged objects. /
2394	slab_exts = slab_obj_exts(slab);
2395	if (slab_exts) {
2396	off = obj_to_index(s, slab, p);
2397	if (unlikely(slab_exts[off].objcg))
2398	return true;
2399	}
2400
2401	return __memcg_slab_post_alloc_hook(s, NULL, flags, `1`, &p);
2402	}
2403
2404	#else /* CONFIG_MEMCG */
2405	static inline bool memcg_slab_post_alloc_hook(struct kmem_cache *s,
2406	struct list_lru *lru,
2407	gfp_t flags, size_t size,
2408	void **p)
2409	{
2410	return true;
2411	}
2412
2413	static inline void memcg_slab_free_hook(struct kmem_cache s, struct* slab *slab,
2414	void *p, int* objects)
2415	{
2416	}
2417
2418	static inline bool memcg_slab_post_charge(void *p, gfp_t flags)
2419	{
2420	return true;
2421	}
2422	#endif /* CONFIG_MEMCG */
2423
2424	#ifdef CONFIG_SLUB_RCU_DEBUG
2425	static void slab_free_after_rcu_debug(struct rcu_head *rcu_head);
2426
2427	struct rcu_delayed_free {
2428	struct rcu_head head;
2429	void *object;
2430	};
2431	#endif
2432
2433	/*
2434	* Hooks for other subsystems that check memory allocations. In a typical
2435	* production configuration these hooks all should produce no code at all.
2436	*
2437	* Returns true if freeing of the object can proceed, false if its reuse
2438	* was delayed by CONFIG_SLUB_RCU_DEBUG or KASAN quarantine, or it was returned
2439	* to KFENCE.
2440	*/
2441	static __always_inline
2442	bool slab_free_hook(struct kmem_cache s, void* *x, bool init,
2443	bool after_rcu_delay)
2444	{
2445	/ Are the object contents still accessible? /
2446	bool still_accessible = (s->flags & SLAB_TYPESAFE_BY_RCU) && !after_rcu_delay;
2447
2448	kmemleak_free_recursive(ptr: x, flags: s->flags);
2449	kmsan_slab_free(s, object: x);
2450
2451	debug_check_no_locks_freed(from: x, len: s->object_size);
2452
2453	if (!(s->flags & SLAB_DEBUG_OBJECTS))
2454	debug_check_no_obj_freed(address: x, size: s->object_size);
2455
2456	/ Use KCSAN to help debug racy use-after-free. /
2457	if (!still_accessible)
2458	__kcsan_check_access(ptr: x, size: s->object_size,
2459	KCSAN_ACCESS_WRITE \| KCSAN_ACCESS_ASSERT);
2460
2461	if (kfence_free(addr: x))
2462	return false;
2463
2464	/*
2465	* Give KASAN a chance to notice an invalid free operation before we
2466	* modify the object.
2467	*/
2468	if (kasan_slab_pre_free(s, object: x))
2469	return false;
2470
2471	#ifdef CONFIG_SLUB_RCU_DEBUG
2472	if (still_accessible) {
2473	struct rcu_delayed_free *delayed_free;
2474
2475	delayed_free = kmalloc(sizeof(*delayed_free), GFP_NOWAIT);
2476	if (delayed_free) {
2477	/*
2478	* Let KASAN track our call stack as a "related work
2479	* creation", just like if the object had been freed
2480	* normally via kfree_rcu().
2481	* We have to do this manually because the rcu_head is
2482	* not located inside the object.
2483	*/
2484	kasan_record_aux_stack(x);
2485
2486	delayed_free->object = x;
2487	call_rcu(&delayed_free->head, slab_free_after_rcu_debug);
2488	return false;
2489	}
2490	}
2491	#endif /* CONFIG_SLUB_RCU_DEBUG */
2492
2493	/*
2494	* As memory initialization might be integrated into KASAN,
2495	* kasan_slab_free and initialization memset's must be
2496	* kept together to avoid discrepancies in behavior.
2497	*
2498	* The initialization memset's clear the object and the metadata,
2499	* but don't touch the SLAB redzone.
2500	*
2501	* The object's freepointer is also avoided if stored outside the
2502	* object.
2503	*/
2504	if (unlikely(init)) {
2505	int rsize;
2506	unsigned int inuse, orig_size;
2507
2508	inuse = get_info_end(s);
2509	orig_size = get_orig_size(s, object: x);
2510	if (!kasan_has_integrated_init())
2511	memset(s: kasan_reset_tag(addr: x), c: `0`, n: orig_size);
2512	rsize = (s->flags & SLAB_RED_ZONE) ? s->red_left_pad : `0`;
2513	memset(s: (char *)kasan_reset_tag(addr: x) + inuse, c: `0`,
2514	n: s->size - inuse - rsize);
2515	/*
2516	* Restore orig_size, otherwize kmalloc redzone overwritten
2517	* would be reported
2518	*/
2519	set_orig_size(s, object: x, orig_size);
2520
2521	}
2522	/ KASAN might put x into memory quarantine, delaying its reuse. /
2523	return !kasan_slab_free(s, object: x, init, still_accessible, no_quarantine: false);
2524	}
2525
2526	static __fastpath_inline
2527	bool slab_free_freelist_hook(struct kmem_cache s, void* *head, void* **tail,
2528	int *cnt)
2529	{
2530
2531	void *object;
2532	void next = head;
2533	void old_tail = tail;
2534	bool init;
2535
2536	if (is_kfence_address(addr: next)) {
2537	slab_free_hook(s, x: next, init: false, after_rcu_delay: false);
2538	return false;
2539	}
2540
2541	/ Head and tail of the reconstructed freelist /
2542	*head = NULL;
2543	*tail = NULL;
2544
2545	init = slab_want_init_on_free(c: s);
2546
2547	do {
2548	object = next;
2549	next = get_freepointer(s, object);
2550
2551	/ If object's reuse doesn't have to be delayed /
2552	if (likely(slab_free_hook(s, object, init, false))) {
2553	/ Move object to the new freelist /
2554	set_freepointer(s, object, fp: *head);
2555	*head = object;
2556	if (!*tail)
2557	*tail = object;
2558	} else {
2559	/*
2560	* Adjust the reconstructed freelist depth
2561	* accordingly if object's reuse is delayed.
2562	*/
2563	--(*cnt);
2564	}
2565	} while (object != old_tail);
2566
2567	return *head != NULL;
2568	}
2569
2570	static void setup_object(struct* kmem_cache s, void* *object)
2571	{
2572	setup_object_debug(s, object);
2573	object = kasan_init_slab_obj(cache: s, object);
2574	if (unlikely(s->ctor)) {
2575	kasan_unpoison_new_object(cache: s, object);
2576	s->ctor(object);
2577	kasan_poison_new_object(cache: s, object);
2578	}
2579	return object;
2580	}
2581
2582	static struct slab_sheaf alloc_empty_sheaf(struct* kmem_cache *s, gfp_t gfp)
2583	{
2584	struct slab_sheaf *sheaf = kzalloc(struct_size(sheaf, objects,
2585	s->sheaf_capacity), gfp);
2586
2587	if (unlikely(!sheaf))
2588	return NULL;
2589
2590	sheaf->cache = s;
2591
2592	stat(s, si: SHEAF_ALLOC);
2593
2594	return sheaf;
2595	}
2596
2597	static void free_empty_sheaf(struct kmem_cache s, struct* slab_sheaf *sheaf)
2598	{
2599	kfree(objp: sheaf);
2600
2601	stat(s, si: SHEAF_FREE);
2602	}
2603
2604	static int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags,
2605	size_t size, void **p);
2606
2607
2608	static int refill_sheaf(struct kmem_cache s, struct* slab_sheaf *sheaf,
2609	gfp_t gfp)
2610	{
2611	int to_fill = s->sheaf_capacity - sheaf->size;
2612	int filled;
2613
2614	if (!to_fill)
2615	return `0`;
2616
2617	filled = __kmem_cache_alloc_bulk(s, flags: gfp, size: to_fill,
2618	p: &sheaf->objects[sheaf->size]);
2619
2620	sheaf->size += filled;
2621
2622	stat_add(s, si: SHEAF_REFILL, v: filled);
2623
2624	if (filled < to_fill)
2625	return -ENOMEM;
2626
2627	return `0`;
2628	}
2629
2630
2631	static struct slab_sheaf alloc_full_sheaf(struct* kmem_cache *s, gfp_t gfp)
2632	{
2633	struct slab_sheaf *sheaf = alloc_empty_sheaf(s, gfp);
2634
2635	if (!sheaf)
2636	return NULL;
2637
2638	if (refill_sheaf(s, sheaf, gfp)) {
2639	free_empty_sheaf(s, sheaf);
2640	return NULL;
2641	}
2642
2643	return sheaf;
2644	}
2645
2646	/*
2647	* Maximum number of objects freed during a single flush of main pcs sheaf.
2648	* Translates directly to an on-stack array size.
2649	*/
2650	#define PCS_BATCH_MAX 32U
2651
2652	static void __kmem_cache_free_bulk(struct kmem_cache s, size_t size, void* **p);
2653
2654	/*
2655	* Free all objects from the main sheaf. In order to perform
2656	* __kmem_cache_free_bulk() outside of cpu_sheaves->lock, work in batches where
2657	* object pointers are moved to a on-stack array under the lock. To bound the
2658	* stack usage, limit each batch to PCS_BATCH_MAX.
2659	*
2660	* returns true if at least partially flushed
2661	*/
2662	static bool sheaf_flush_main(struct kmem_cache *s)
2663	{
2664	struct slub_percpu_sheaves *pcs;
2665	unsigned int batch, remaining;
2666	void *objects[PCS_BATCH_MAX];
2667	struct slab_sheaf *sheaf;
2668	bool ret = false;
2669
2670	next_batch:
2671	if (!local_trylock(&s->cpu_sheaves->lock))
2672	return ret;
2673
2674	pcs = this_cpu_ptr(s->cpu_sheaves);
2675	sheaf = pcs->main;
2676
2677	batch = min(PCS_BATCH_MAX, sheaf->size);
2678
2679	sheaf->size -= batch;
2680	memcpy(to: objects, from: sheaf->objects + sheaf->size, len: batch * sizeof(void *));
2681
2682	remaining = sheaf->size;
2683
2684	local_unlock(&s->cpu_sheaves->lock);
2685
2686	__kmem_cache_free_bulk(s, size: batch, p: &objects[`0`]);
2687
2688	stat_add(s, si: SHEAF_FLUSH, v: batch);
2689
2690	ret = true;
2691
2692	if (remaining)
2693	goto next_batch;
2694
2695	return ret;
2696	}
2697
2698	/*
2699	* Free all objects from a sheaf that's unused, i.e. not linked to any
2700	* cpu_sheaves, so we need no locking and batching. The locking is also not
2701	* necessary when flushing cpu's sheaves (both spare and main) during cpu
2702	* hotremove as the cpu is not executing anymore.
2703	*/
2704	static void sheaf_flush_unused(struct kmem_cache s, struct* slab_sheaf *sheaf)
2705	{
2706	if (!sheaf->size)
2707	return;
2708
2709	stat_add(s, si: SHEAF_FLUSH, v: sheaf->size);
2710
2711	__kmem_cache_free_bulk(s, size: sheaf->size, p: &sheaf->objects[`0`]);
2712
2713	sheaf->size = `0`;
2714	}
2715
2716	static void __rcu_free_sheaf_prepare(struct kmem_cache *s,
2717	struct slab_sheaf *sheaf)
2718	{
2719	bool init = slab_want_init_on_free(c: s);
2720	void **p = &sheaf->objects[`0`];
2721	unsigned int i = `0`;
2722
2723	while (i < sheaf->size) {
2724	struct slab *slab = virt_to_slab(addr: p[i]);
2725
2726	memcg_slab_free_hook(s, slab, p: p + i, objects: `1`);
2727	alloc_tagging_slab_free_hook(s, slab, p: p + i, objects: `1`);
2728
2729	if (unlikely(!slab_free_hook(s, p[i], init, true))) {
2730	p[i] = p[--sheaf->size];
2731	continue;
2732	}
2733
2734	i++;
2735	}
2736	}
2737
2738	static void rcu_free_sheaf_nobarn(struct rcu_head *head)
2739	{
2740	struct slab_sheaf *sheaf;
2741	struct kmem_cache *s;
2742
2743	sheaf = container_of(head, struct slab_sheaf, rcu_head);
2744	s = sheaf->cache;
2745
2746	__rcu_free_sheaf_prepare(s, sheaf);
2747
2748	sheaf_flush_unused(s, sheaf);
2749
2750	free_empty_sheaf(s, sheaf);
2751	}
2752
2753	/*
2754	* Caller needs to make sure migration is disabled in order to fully flush
2755	* single cpu's sheaves
2756	*
2757	* must not be called from an irq
2758	*
2759	* flushing operations are rare so let's keep it simple and flush to slabs
2760	* directly, skipping the barn
2761	*/
2762	static void pcs_flush_all(struct kmem_cache *s)
2763	{
2764	struct slub_percpu_sheaves *pcs;
2765	struct slab_sheaf spare, rcu_free;
2766
2767	local_lock(&s->cpu_sheaves->lock);
2768	pcs = this_cpu_ptr(s->cpu_sheaves);
2769
2770	spare = pcs->spare;
2771	pcs->spare = NULL;
2772
2773	rcu_free = pcs->rcu_free;
2774	pcs->rcu_free = NULL;
2775
2776	local_unlock(&s->cpu_sheaves->lock);
2777
2778	if (spare) {
2779	sheaf_flush_unused(s, sheaf: spare);
2780	free_empty_sheaf(s, sheaf: spare);
2781	}
2782
2783	if (rcu_free)
2784	call_rcu(head: &rcu_free->rcu_head, func: rcu_free_sheaf_nobarn);
2785
2786	sheaf_flush_main(s);
2787	}
2788
2789	static void __pcs_flush_all_cpu(struct kmem_cache s, unsigned* int cpu)
2790	{
2791	struct slub_percpu_sheaves *pcs;
2792
2793	pcs = per_cpu_ptr(s->cpu_sheaves, cpu);
2794
2795	/ The cpu is not executing anymore so we don't need pcs->lock /
2796	sheaf_flush_unused(s, sheaf: pcs->main);
2797	if (pcs->spare) {
2798	sheaf_flush_unused(s, sheaf: pcs->spare);
2799	free_empty_sheaf(s, sheaf: pcs->spare);
2800	pcs->spare = NULL;
2801	}
2802
2803	if (pcs->rcu_free) {
2804	call_rcu(head: &pcs->rcu_free->rcu_head, func: rcu_free_sheaf_nobarn);
2805	pcs->rcu_free = NULL;
2806	}
2807	}
2808
2809	static void pcs_destroy(struct kmem_cache *s)
2810	{
2811	int cpu;
2812
2813	for_each_possible_cpu(cpu) {
2814	struct slub_percpu_sheaves *pcs;
2815
2816	pcs = per_cpu_ptr(s->cpu_sheaves, cpu);
2817
2818	/ can happen when unwinding failed create /
2819	if (!pcs->main)
2820	continue;
2821
2822	/*
2823	* We have already passed __kmem_cache_shutdown() so everything
2824	* was flushed and there should be no objects allocated from
2825	* slabs, otherwise kmem_cache_destroy() would have aborted.
2826	* Therefore something would have to be really wrong if the
2827	* warnings here trigger, and we should rather leave objects and
2828	* sheaves to leak in that case.
2829	*/
2830
2831	WARN_ON(pcs->spare);
2832	WARN_ON(pcs->rcu_free);
2833
2834	if (!WARN_ON(pcs->main->size)) {
2835	free_empty_sheaf(s, sheaf: pcs->main);
2836	pcs->main = NULL;
2837	}
2838	}
2839
2840	free_percpu(pdata: s->cpu_sheaves);
2841	s->cpu_sheaves = NULL;
2842	}
2843
2844	static struct slab_sheaf barn_get_empty_sheaf(struct* node_barn *barn)
2845	{
2846	struct slab_sheaf *empty = NULL;
2847	unsigned long flags;
2848
2849	if (!data_race(barn->nr_empty))
2850	return NULL;
2851
2852	spin_lock_irqsave(&barn->lock, flags);
2853
2854	if (likely(barn->nr_empty)) {
2855	empty = list_first_entry(&barn->sheaves_empty,
2856	struct slab_sheaf, barn_list);
2857	list_del(entry: &empty->barn_list);
2858	barn->nr_empty--;
2859	}
2860
2861	spin_unlock_irqrestore(lock: &barn->lock, flags);
2862
2863	return empty;
2864	}
2865
2866	/*
2867	* The following two functions are used mainly in cases where we have to undo an
2868	* intended action due to a race or cpu migration. Thus they do not check the
2869	* empty or full sheaf limits for simplicity.
2870	*/
2871
2872	static void barn_put_empty_sheaf(struct node_barn barn, struct* slab_sheaf *sheaf)
2873	{
2874	unsigned long flags;
2875
2876	spin_lock_irqsave(&barn->lock, flags);
2877
2878	list_add(new: &sheaf->barn_list, head: &barn->sheaves_empty);
2879	barn->nr_empty++;
2880
2881	spin_unlock_irqrestore(lock: &barn->lock, flags);
2882	}
2883
2884	static void barn_put_full_sheaf(struct node_barn barn, struct* slab_sheaf *sheaf)
2885	{
2886	unsigned long flags;
2887
2888	spin_lock_irqsave(&barn->lock, flags);
2889
2890	list_add(new: &sheaf->barn_list, head: &barn->sheaves_full);
2891	barn->nr_full++;
2892
2893	spin_unlock_irqrestore(lock: &barn->lock, flags);
2894	}
2895
2896	static struct slab_sheaf barn_get_full_or_empty_sheaf(struct* node_barn *barn)
2897	{
2898	struct slab_sheaf *sheaf = NULL;
2899	unsigned long flags;
2900
2901	if (!data_race(barn->nr_full) && !data_race(barn->nr_empty))
2902	return NULL;
2903
2904	spin_lock_irqsave(&barn->lock, flags);
2905
2906	if (barn->nr_full) {
2907	sheaf = list_first_entry(&barn->sheaves_full, struct slab_sheaf,
2908	barn_list);
2909	list_del(entry: &sheaf->barn_list);
2910	barn->nr_full--;
2911	} else if (barn->nr_empty) {
2912	sheaf = list_first_entry(&barn->sheaves_empty,
2913	struct slab_sheaf, barn_list);
2914	list_del(entry: &sheaf->barn_list);
2915	barn->nr_empty--;
2916	}
2917
2918	spin_unlock_irqrestore(lock: &barn->lock, flags);
2919
2920	return sheaf;
2921	}
2922
2923	/*
2924	* If a full sheaf is available, return it and put the supplied empty one to
2925	* barn. We ignore the limit on empty sheaves as the number of sheaves doesn't
2926	* change.
2927	*/
2928	static struct slab_sheaf *
2929	barn_replace_empty_sheaf(struct node_barn barn, struct* slab_sheaf *empty)
2930	{
2931	struct slab_sheaf *full = NULL;
2932	unsigned long flags;
2933
2934	if (!data_race(barn->nr_full))
2935	return NULL;
2936
2937	spin_lock_irqsave(&barn->lock, flags);
2938
2939	if (likely(barn->nr_full)) {
2940	full = list_first_entry(&barn->sheaves_full, struct slab_sheaf,
2941	barn_list);
2942	list_del(entry: &full->barn_list);
2943	list_add(new: &empty->barn_list, head: &barn->sheaves_empty);
2944	barn->nr_full--;
2945	barn->nr_empty++;
2946	}
2947
2948	spin_unlock_irqrestore(lock: &barn->lock, flags);
2949
2950	return full;
2951	}
2952
2953	/*
2954	* If an empty sheaf is available, return it and put the supplied full one to
2955	* barn. But if there are too many full sheaves, reject this with -E2BIG.
2956	*/
2957	static struct slab_sheaf *
2958	barn_replace_full_sheaf(struct node_barn barn, struct* slab_sheaf *full)
2959	{
2960	struct slab_sheaf *empty;
2961	unsigned long flags;
2962
2963	/ we don't repeat this check under barn->lock as it's not critical /
2964	if (data_race(barn->nr_full) >= MAX_FULL_SHEAVES)
2965	return ERR_PTR(error: -E2BIG);
2966	if (!data_race(barn->nr_empty))
2967	return ERR_PTR(error: -ENOMEM);
2968
2969	spin_lock_irqsave(&barn->lock, flags);
2970
2971	if (likely(barn->nr_empty)) {
2972	empty = list_first_entry(&barn->sheaves_empty, struct slab_sheaf,
2973	barn_list);
2974	list_del(entry: &empty->barn_list);
2975	list_add(new: &full->barn_list, head: &barn->sheaves_full);
2976	barn->nr_empty--;
2977	barn->nr_full++;
2978	} else {
2979	empty = ERR_PTR(error: -ENOMEM);
2980	}
2981
2982	spin_unlock_irqrestore(lock: &barn->lock, flags);
2983
2984	return empty;
2985	}
2986
2987	static void barn_init(struct node_barn *barn)
2988	{
2989	spin_lock_init(&barn->lock);
2990	INIT_LIST_HEAD(list: &barn->sheaves_full);
2991	INIT_LIST_HEAD(list: &barn->sheaves_empty);
2992	barn->nr_full = `0`;
2993	barn->nr_empty = `0`;
2994	}
2995
2996	static void barn_shrink(struct kmem_cache s, struct* node_barn *barn)
2997	{
2998	struct list_head empty_list;
2999	struct list_head full_list;
3000	struct slab_sheaf sheaf, sheaf2;
3001	unsigned long flags;
3002
3003	INIT_LIST_HEAD(list: &empty_list);
3004	INIT_LIST_HEAD(list: &full_list);
3005
3006	spin_lock_irqsave(&barn->lock, flags);
3007
3008	list_splice_init(list: &barn->sheaves_full, head: &full_list);
3009	barn->nr_full = `0`;
3010	list_splice_init(list: &barn->sheaves_empty, head: &empty_list);
3011	barn->nr_empty = `0`;
3012
3013	spin_unlock_irqrestore(lock: &barn->lock, flags);
3014
3015	list_for_each_entry_safe(sheaf, sheaf2, &full_list, barn_list) {
3016	sheaf_flush_unused(s, sheaf);
3017	free_empty_sheaf(s, sheaf);
3018	}
3019
3020	list_for_each_entry_safe(sheaf, sheaf2, &empty_list, barn_list)
3021	free_empty_sheaf(s, sheaf);
3022	}
3023
3024	/*
3025	* Slab allocation and freeing
3026	*/
3027	static inline struct slab alloc_slab_page(gfp_t flags, int* node,
3028	struct kmem_cache_order_objects oo,
3029	bool allow_spin)
3030	{
3031	struct folio *folio;
3032	struct slab *slab;
3033	unsigned int order = oo_order(x: oo);
3034
3035	if (unlikely(!allow_spin))
3036	folio = (struct folio )alloc_frozen_pages_nolock(`0`/* __GFP_COMP is implied /,
3037	node, order);
3038	else if (node == NUMA_NO_NODE)
3039	folio = (struct folio *)alloc_frozen_pages(flags, order);
3040	else
3041	folio = (struct folio *)__alloc_frozen_pages(flags, order, node, NULL);
3042
3043	if (!folio)
3044	return NULL;
3045
3046	slab = folio_slab(folio);
3047	__folio_set_slab(folio);
3048	if (folio_is_pfmemalloc(folio))
3049	slab_set_pfmemalloc(slab);
3050
3051	return slab;
3052	}
3053
3054	#ifdef CONFIG_SLAB_FREELIST_RANDOM
3055	/ Pre-initialize the random sequence cache /
3056	static int init_cache_random_seq(struct kmem_cache *s)
3057	{
3058	unsigned int count = oo_objects(s->oo);
3059	int err;
3060
3061	/ Bailout if already initialised /
3062	if (s->random_seq)
3063	return `0`;
3064
3065	err = cache_random_seq_create(s, count, GFP_KERNEL);
3066	if (err) {
3067	pr_err("SLUB: Unable to initialize free list for %s\n",
3068	s->name);
3069	return err;
3070	}
3071
3072	/ Transform to an offset on the set of pages /
3073	if (s->random_seq) {
3074	unsigned int i;
3075
3076	for (i = `0`; i < count; i++)
3077	s->random_seq[i] *= s->size;
3078	}
3079	return `0`;
3080	}
3081
3082	/ Initialize each random sequence freelist per cache /
3083	static void __init init_freelist_randomization(void)
3084	{
3085	struct kmem_cache *s;
3086
3087	mutex_lock(&slab_mutex);
3088
3089	list_for_each_entry(s, &slab_caches, list)
3090	init_cache_random_seq(s);
3091
3092	mutex_unlock(&slab_mutex);
3093	}
3094
3095	/ Get the next entry on the pre-computed freelist randomized /
3096	static void next_freelist_entry(struct* kmem_cache *s,
3097	unsigned long pos, void* *start,
3098	unsigned long page_limit,
3099	unsigned long freelist_count)
3100	{
3101	unsigned int idx;
3102
3103	/*
3104	* If the target page allocation failed, the number of objects on the
3105	* page might be smaller than the usual size defined by the cache.
3106	*/
3107	do {
3108	idx = s->random_seq[*pos];
3109	*pos += `1`;
3110	if (*pos >= freelist_count)
3111	*pos = `0`;
3112	} while (unlikely(idx >= page_limit));
3113
3114	return (char *)start + idx;
3115	}
3116
3117	/ Shuffle the single linked freelist based on a random pre-computed sequence /
3118	static bool shuffle_freelist(struct kmem_cache s, struct* slab *slab)
3119	{
3120	void *start;
3121	void *cur;
3122	void *next;
3123	unsigned long idx, pos, page_limit, freelist_count;
3124
3125	if (slab->objects < `2` \|\| !s->random_seq)
3126	return false;
3127
3128	freelist_count = oo_objects(s->oo);
3129	pos = get_random_u32_below(freelist_count);
3130
3131	page_limit = slab->objects * s->size;
3132	start = fixup_red_left(s, slab_address(slab));
3133
3134	/ First entry is used as the base of the freelist /
3135	cur = next_freelist_entry(s, &pos, start, page_limit, freelist_count);
3136	cur = setup_object(s, cur);
3137	slab->freelist = cur;
3138
3139	for (idx = `1`; idx < slab->objects; idx++) {
3140	next = next_freelist_entry(s, &pos, start, page_limit,
3141	freelist_count);
3142	next = setup_object(s, next);
3143	set_freepointer(s, cur, next);
3144	cur = next;
3145	}
3146	set_freepointer(s, cur, NULL);
3147
3148	return true;
3149	}
3150	#else
3151	static inline int init_cache_random_seq(struct kmem_cache *s)
3152	{
3153	return `0`;
3154	}
3155	static inline void init_freelist_randomization(void) { }
3156	static inline bool shuffle_freelist(struct kmem_cache s, struct* slab *slab)
3157	{
3158	return false;
3159	}
3160	#endif /* CONFIG_SLAB_FREELIST_RANDOM */
3161
3162	static __always_inline void account_slab(struct slab slab, int* order,
3163	struct kmem_cache *s, gfp_t gfp)
3164	{
3165	if (memcg_kmem_online() && (s->flags & SLAB_ACCOUNT))
3166	alloc_slab_obj_exts(slab, s, gfp, new_slab: true);
3167
3168	mod_node_page_state(slab_pgdat(slab), cache_vmstat_idx(s),
3169	PAGE_SIZE << order);
3170	}
3171
3172	static __always_inline void unaccount_slab(struct slab slab, int* order,
3173	struct kmem_cache *s)
3174	{
3175	/*
3176	* The slab object extensions should now be freed regardless of
3177	* whether mem_alloc_profiling_enabled() or not because profiling
3178	* might have been disabled after slab->obj_exts got allocated.
3179	*/
3180	free_slab_obj_exts(slab);
3181
3182	mod_node_page_state(slab_pgdat(slab), cache_vmstat_idx(s),
3183	-(PAGE_SIZE << order));
3184	}
3185
3186	static struct slab allocate_slab(struct* kmem_cache s, gfp_t flags, int* node)
3187	{
3188	bool allow_spin = gfpflags_allow_spinning(gfp_flags: flags);
3189	struct slab *slab;
3190	struct kmem_cache_order_objects oo = s->oo;
3191	gfp_t alloc_gfp;
3192	void start, p, *next;
3193	int idx;
3194	bool shuffle;
3195
3196	flags &= gfp_allowed_mask;
3197
3198	flags \|= s->allocflags;
3199
3200	/*
3201	* Let the initial higher-order allocation fail under memory pressure
3202	* so we fall-back to the minimum order allocation.
3203	*/
3204	alloc_gfp = (flags \| __GFP_NOWARN \| __GFP_NORETRY) & ~__GFP_NOFAIL;
3205	if ((alloc_gfp & __GFP_DIRECT_RECLAIM) && oo_order(x: oo) > oo_order(x: s->min))
3206	alloc_gfp = (alloc_gfp \| __GFP_NOMEMALLOC) & ~__GFP_RECLAIM;
3207
3208	/*
3209	* __GFP_RECLAIM could be cleared on the first allocation attempt,
3210	* so pass allow_spin flag directly.
3211	*/
3212	slab = alloc_slab_page(flags: alloc_gfp, node, oo, allow_spin);
3213	if (unlikely(!slab)) {
3214	oo = s->min;
3215	alloc_gfp = flags;
3216	/*
3217	* Allocation may have failed due to fragmentation.
3218	* Try a lower order alloc if possible
3219	*/
3220	slab = alloc_slab_page(flags: alloc_gfp, node, oo, allow_spin);
3221	if (unlikely(!slab))
3222	return NULL;
3223	stat(s, si: ORDER_FALLBACK);
3224	}
3225
3226	slab->objects = oo_objects(x: oo);
3227	slab->inuse = `0`;
3228	slab->frozen = `0`;
3229	init_slab_obj_exts(slab);
3230
3231	account_slab(slab, order: oo_order(x: oo), s, gfp: flags);
3232
3233	slab->slab_cache = s;
3234
3235	kasan_poison_slab(slab);
3236
3237	start = slab_address(slab);
3238
3239	setup_slab_debug(s, slab, addr: start);
3240
3241	shuffle = shuffle_freelist(s, slab);
3242
3243	if (!shuffle) {
3244	start = fixup_red_left(s, p: start);
3245	start = setup_object(s, object: start);
3246	slab->freelist = start;
3247	for (idx = `0`, p = start; idx < slab->objects - `1`; idx++) {
3248	next = p + s->size;
3249	next = setup_object(s, object: next);
3250	set_freepointer(s, object: p, fp: next);
3251	p = next;
3252	}
3253	set_freepointer(s, object: p, NULL);
3254	}
3255
3256	return slab;
3257	}
3258
3259	static struct slab new_slab(struct* kmem_cache s, gfp_t flags, int* node)
3260	{
3261	if (unlikely(flags & GFP_SLAB_BUG_MASK))
3262	flags = kmalloc_fix_flags(flags);
3263
3264	WARN_ON_ONCE(s->ctor && (flags & __GFP_ZERO));
3265
3266	return allocate_slab(s,
3267	flags: flags & (GFP_RECLAIM_MASK \| GFP_CONSTRAINT_MASK), node);
3268	}
3269
3270	static void __free_slab(struct kmem_cache s, struct* slab *slab)
3271	{
3272	struct folio *folio = slab_folio(slab);
3273	int order = folio_order(folio);
3274	int pages = `1` << order;
3275
3276	__slab_clear_pfmemalloc(slab);
3277	folio->mapping = NULL;
3278	__folio_clear_slab(folio);
3279	mm_account_reclaimed_pages(pages);
3280	unaccount_slab(slab, order, s);
3281	free_frozen_pages(page: &folio->page, order);
3282	}
3283
3284	static void rcu_free_slab(struct rcu_head *h)
3285	{
3286	struct slab slab = container_of(h, struct* slab, rcu_head);
3287
3288	__free_slab(s: slab->slab_cache, slab);
3289	}
3290
3291	static void free_slab(struct kmem_cache s, struct* slab *slab)
3292	{
3293	if (kmem_cache_debug_flags(s, SLAB_CONSISTENCY_CHECKS)) {
3294	void *p;
3295
3296	slab_pad_check(s, slab);
3297	for_each_object(p, s, slab_address(slab), slab->objects)
3298	check_object(s, slab, object: p, SLUB_RED_INACTIVE);
3299	}
3300
3301	if (unlikely(s->flags & SLAB_TYPESAFE_BY_RCU))
3302	call_rcu(head: &slab->rcu_head, func: rcu_free_slab);
3303	else
3304	__free_slab(s, slab);
3305	}
3306
3307	static void discard_slab(struct kmem_cache s, struct* slab *slab)
3308	{
3309	dec_slabs_node(s, node: slab_nid(slab), objects: slab->objects);
3310	free_slab(s, slab);
3311	}
3312
3313	static inline bool slab_test_node_partial(const struct slab *slab)
3314	{
3315	return test_bit(SL_partial, &slab->flags.f);
3316	}
3317
3318	static inline void slab_set_node_partial(struct slab *slab)
3319	{
3320	set_bit(nr: SL_partial, addr: &slab->flags.f);
3321	}
3322
3323	static inline void slab_clear_node_partial(struct slab *slab)
3324	{
3325	clear_bit(nr: SL_partial, addr: &slab->flags.f);
3326	}
3327
3328	/*
3329	* Management of partially allocated slabs.
3330	*/
3331	static inline void
3332	__add_partial(struct kmem_cache_node n, struct* slab slab, int* tail)
3333	{
3334	n->nr_partial++;
3335	if (tail == DEACTIVATE_TO_TAIL)
3336	list_add_tail(new: &slab->slab_list, head: &n->partial);
3337	else
3338	list_add(new: &slab->slab_list, head: &n->partial);
3339	slab_set_node_partial(slab);
3340	}
3341
3342	static inline void add_partial(struct kmem_cache_node *n,
3343	struct slab slab, int* tail)
3344	{
3345	lockdep_assert_held(&n->list_lock);
3346	__add_partial(n, slab, tail);
3347	}
3348
3349	static inline void remove_partial(struct kmem_cache_node *n,
3350	struct slab *slab)
3351	{
3352	lockdep_assert_held(&n->list_lock);
3353	list_del(entry: &slab->slab_list);
3354	slab_clear_node_partial(slab);
3355	n->nr_partial--;
3356	}
3357
3358	/*
3359	* Called only for kmem_cache_debug() caches instead of remove_partial(), with a
3360	* slab from the n->partial list. Remove only a single object from the slab, do
3361	* the alloc_debug_processing() checks and leave the slab on the list, or move
3362	* it to full list if it was the last free object.
3363	*/
3364	static void alloc_single_from_partial(struct* kmem_cache *s,
3365	struct kmem_cache_node n, struct* slab slab, int* orig_size)
3366	{
3367	void *object;
3368
3369	lockdep_assert_held(&n->list_lock);
3370
3371	#ifdef CONFIG_SLUB_DEBUG
3372	if (s->flags & SLAB_CONSISTENCY_CHECKS) {
3373	if (!validate_slab_ptr(slab)) {
3374	slab_err(s, slab, fmt: "Not a valid slab page");
3375	return NULL;
3376	}
3377	}
3378	#endif
3379
3380	object = slab->freelist;
3381	slab->freelist = get_freepointer(s, object);
3382	slab->inuse++;
3383
3384	if (!alloc_debug_processing(s, slab, object, orig_size)) {
3385	remove_partial(n, slab);
3386	return NULL;
3387	}
3388
3389	if (slab->inuse == slab->objects) {
3390	remove_partial(n, slab);
3391	add_full(s, n, slab);
3392	}
3393
3394	return object;
3395	}
3396
3397	static void defer_deactivate_slab(struct slab slab, void* *flush_freelist);
3398
3399	/*
3400	* Called only for kmem_cache_debug() caches to allocate from a freshly
3401	* allocated slab. Allocate a single object instead of whole freelist
3402	* and put the slab to the partial (or full) list.
3403	*/
3404	static void alloc_single_from_new_slab(struct* kmem_cache s, struct* slab *slab,
3405	int orig_size, gfp_t gfpflags)
3406	{
3407	bool allow_spin = gfpflags_allow_spinning(gfp_flags: gfpflags);
3408	int nid = slab_nid(slab);
3409	struct kmem_cache_node *n = get_node(s, node: nid);
3410	unsigned long flags;
3411	void *object;
3412
3413	if (!allow_spin && !spin_trylock_irqsave(&n->list_lock, flags)) {
3414	/ Unlucky, discard newly allocated slab /
3415	slab->frozen = `1`;
3416	defer_deactivate_slab(slab, NULL);
3417	return NULL;
3418	}
3419
3420	object = slab->freelist;
3421	slab->freelist = get_freepointer(s, object);
3422	slab->inuse = `1`;
3423
3424	if (!alloc_debug_processing(s, slab, object, orig_size)) {
3425	/*
3426	* It's not really expected that this would fail on a
3427	* freshly allocated slab, but a concurrent memory
3428	* corruption in theory could cause that.
3429	* Leak memory of allocated slab.
3430	*/
3431	if (!allow_spin)
3432	spin_unlock_irqrestore(lock: &n->list_lock, flags);
3433	return NULL;
3434	}
3435
3436	if (allow_spin)
3437	spin_lock_irqsave(&n->list_lock, flags);
3438
3439	if (slab->inuse == slab->objects)
3440	add_full(s, n, slab);
3441	else
3442	add_partial(n, slab, tail: DEACTIVATE_TO_HEAD);
3443
3444	inc_slabs_node(s, node: nid, objects: slab->objects);
3445	spin_unlock_irqrestore(lock: &n->list_lock, flags);
3446
3447	return object;
3448	}
3449
3450	#ifdef CONFIG_SLUB_CPU_PARTIAL
3451	static void put_cpu_partial(struct kmem_cache s, struct* slab slab, int* drain);
3452	#else
3453	static inline void put_cpu_partial(struct kmem_cache s, struct* slab *slab,
3454	int drain) { }
3455	#endif
3456	static inline bool pfmemalloc_match(struct slab *slab, gfp_t gfpflags);
3457
3458	/*
3459	* Try to allocate a partial slab from a specific node.
3460	*/
3461	static struct slab get_partial_node(struct* kmem_cache *s,
3462	struct kmem_cache_node *n,
3463	struct partial_context *pc)
3464	{
3465	struct slab slab, slab2, *partial = NULL;
3466	unsigned long flags;
3467	unsigned int partial_slabs = `0`;
3468
3469	/*
3470	* Racy check. If we mistakenly see no partial slabs then we
3471	* just allocate an empty slab. If we mistakenly try to get a
3472	* partial slab and there is none available then get_partial()
3473	* will return NULL.
3474	*/
3475	if (!n \|\| !n->nr_partial)
3476	return NULL;
3477
3478	if (gfpflags_allow_spinning(gfp_flags: pc->flags))
3479	spin_lock_irqsave(&n->list_lock, flags);
3480	else if (!spin_trylock_irqsave(&n->list_lock, flags))
3481	return NULL;
3482	list_for_each_entry_safe(slab, slab2, &n->partial, slab_list) {
3483	if (!pfmemalloc_match(slab, gfpflags: pc->flags))
3484	continue;
3485
3486	if (IS_ENABLED(CONFIG_SLUB_TINY) \|\| kmem_cache_debug(s)) {
3487	void *object = alloc_single_from_partial(s, n, slab,
3488	orig_size: pc->orig_size);
3489	if (object) {
3490	partial = slab;
3491	pc->object = object;
3492	break;
3493	}
3494	continue;
3495	}
3496
3497	remove_partial(n, slab);
3498
3499	if (!partial) {
3500	partial = slab;
3501	stat(s, si: ALLOC_FROM_PARTIAL);
3502
3503	if ((slub_get_cpu_partial(s) == `0`)) {
3504	break;
3505	}
3506	} else {
3507	put_cpu_partial(s, slab, drain: `0`);
3508	stat(s, si: CPU_PARTIAL_NODE);
3509
3510	if (++partial_slabs > slub_get_cpu_partial(s) / `2`) {
3511	break;
3512	}
3513	}
3514	}
3515	spin_unlock_irqrestore(lock: &n->list_lock, flags);
3516	return partial;
3517	}
3518
3519	/*
3520	* Get a slab from somewhere. Search in increasing NUMA distances.
3521	*/
3522	static struct slab get_any_partial(struct* kmem_cache *s,
3523	struct partial_context *pc)
3524	{
3525	#ifdef CONFIG_NUMA
3526	struct zonelist *zonelist;
3527	struct zoneref *z;
3528	struct zone *zone;
3529	enum zone_type highest_zoneidx = gfp_zone(flags: pc->flags);
3530	struct slab *slab;
3531	unsigned int cpuset_mems_cookie;
3532
3533	/*
3534	* The defrag ratio allows a configuration of the tradeoffs between
3535	* inter node defragmentation and node local allocations. A lower
3536	* defrag_ratio increases the tendency to do local allocations
3537	* instead of attempting to obtain partial slabs from other nodes.
3538	*
3539	* If the defrag_ratio is set to 0 then kmalloc() always
3540	* returns node local objects. If the ratio is higher then kmalloc()
3541	* may return off node objects because partial slabs are obtained
3542	* from other nodes and filled up.
3543	*
3544	* If /sys/kernel/slab/xx/remote_node_defrag_ratio is set to 100
3545	* (which makes defrag_ratio = 1000) then every (well almost)
3546	* allocation will first attempt to defrag slab caches on other nodes.
3547	* This means scanning over all nodes to look for partial slabs which
3548	* may be expensive if we do it every time we are trying to find a slab
3549	* with available objects.
3550	*/
3551	if (!s->remote_node_defrag_ratio \|\|
3552	get_cycles() % `1024` > s->remote_node_defrag_ratio)
3553	return NULL;
3554
3555	do {
3556	cpuset_mems_cookie = read_mems_allowed_begin();
3557	zonelist = node_zonelist(nid: mempolicy_slab_node(), flags: pc->flags);
3558	for_each_zone_zonelist(zone, z, zonelist, highest_zoneidx) {
3559	struct kmem_cache_node *n;
3560
3561	n = get_node(s, node: zone_to_nid(zone));
3562
3563	if (n && cpuset_zone_allowed(z: zone, gfp_mask: pc->flags) &&
3564	n->nr_partial > s->min_partial) {
3565	slab = get_partial_node(s, n, pc);
3566	if (slab) {
3567	/*
3568	* Don't check read_mems_allowed_retry()
3569	* here - if mems_allowed was updated in
3570	* parallel, that was a harmless race
3571	* between allocation and the cpuset
3572	* update
3573	*/
3574	return slab;
3575	}
3576	}
3577	}
3578	} while (read_mems_allowed_retry(seq: cpuset_mems_cookie));
3579	#endif /* CONFIG_NUMA */
3580	return NULL;
3581	}
3582
3583	/*
3584	* Get a partial slab, lock it and return it.
3585	*/
3586	static struct slab get_partial(struct* kmem_cache s, int* node,
3587	struct partial_context *pc)
3588	{
3589	struct slab *slab;
3590	int searchnode = node;
3591
3592	if (node == NUMA_NO_NODE)
3593	searchnode = numa_mem_id();
3594
3595	slab = get_partial_node(s, n: get_node(s, node: searchnode), pc);
3596	if (slab \|\| (node != NUMA_NO_NODE && (pc->flags & __GFP_THISNODE)))
3597	return slab;
3598
3599	return get_any_partial(s, pc);
3600	}
3601
3602	#ifndef CONFIG_SLUB_TINY
3603
3604	#ifdef CONFIG_PREEMPTION
3605	/*
3606	* Calculate the next globally unique transaction for disambiguation
3607	* during cmpxchg. The transactions start with the cpu number and are then
3608	* incremented by CONFIG_NR_CPUS.
3609	*/
3610	#define TID_STEP roundup_pow_of_two(CONFIG_NR_CPUS)
3611	#else
3612	/*
3613	* No preemption supported therefore also no need to check for
3614	* different cpus.
3615	*/
3616	#define TID_STEP 1
3617	#endif /* CONFIG_PREEMPTION */
3618
3619	static inline unsigned long next_tid(unsigned long tid)
3620	{
3621	return tid + TID_STEP;
3622	}
3623
3624	#ifdef SLUB_DEBUG_CMPXCHG
3625	static inline unsigned int tid_to_cpu(unsigned long tid)
3626	{
3627	return tid % TID_STEP;
3628	}
3629
3630	static inline unsigned long tid_to_event(unsigned long tid)
3631	{
3632	return tid / TID_STEP;
3633	}
3634	#endif
3635
3636	static inline unsigned int init_tid(int cpu)
3637	{
3638	return cpu;
3639	}
3640
3641	static inline void note_cmpxchg_failure(const char *n,
3642	const struct kmem_cache s, unsigned* long tid)
3643	{
3644	#ifdef SLUB_DEBUG_CMPXCHG
3645	unsigned long actual_tid = __this_cpu_read(s->cpu_slab->tid);
3646
3647	pr_info("%s %s: cmpxchg redo ", n, s->name);
3648
3649	if (IS_ENABLED(CONFIG_PREEMPTION) &&
3650	tid_to_cpu(tid) != tid_to_cpu(actual_tid)) {
3651	pr_warn("due to cpu change %d -> %d\n",
3652	tid_to_cpu(tid), tid_to_cpu(actual_tid));
3653	} else if (tid_to_event(tid) != tid_to_event(actual_tid)) {
3654	pr_warn("due to cpu running other code. Event %ld->%ld\n",
3655	tid_to_event(tid), tid_to_event(actual_tid));
3656	} else {
3657	pr_warn("for unknown reason: actual=%lx was=%lx target=%lx\n",
3658	actual_tid, tid, next_tid(tid));
3659	}
3660	#endif
3661	stat(s, si: CMPXCHG_DOUBLE_CPU_FAIL);
3662	}
3663
3664	static void init_kmem_cache_cpus(struct kmem_cache *s)
3665	{
3666	#ifdef CONFIG_PREEMPT_RT
3667	/*
3668	* Register lockdep key for non-boot kmem caches to avoid
3669	* WARN_ON_ONCE(static_obj(key))) in lockdep_register_key()
3670	*/
3671	bool finegrain_lockdep = !init_section_contains(s, `1`);
3672	#else
3673	/*
3674	* Don't bother with different lockdep classes for each
3675	* kmem_cache, since we only use local_trylock_irqsave().
3676	*/
3677	bool finegrain_lockdep = false;
3678	#endif
3679	int cpu;
3680	struct kmem_cache_cpu *c;
3681
3682	if (finegrain_lockdep)
3683	lockdep_register_key(key: &s->lock_key);
3684	for_each_possible_cpu(cpu) {
3685	c = per_cpu_ptr(s->cpu_slab, cpu);
3686	local_trylock_init(&c->lock);
3687	if (finegrain_lockdep)
3688	lockdep_set_class(&c->lock, &s->lock_key);
3689	c->tid = init_tid(cpu);
3690	}
3691	}
3692
3693	/*
3694	* Finishes removing the cpu slab. Merges cpu's freelist with slab's freelist,
3695	* unfreezes the slabs and puts it on the proper list.
3696	* Assumes the slab has been already safely taken away from kmem_cache_cpu
3697	* by the caller.
3698	*/
3699	static void deactivate_slab(struct kmem_cache s, struct* slab *slab,
3700	void *freelist)
3701	{
3702	struct kmem_cache_node *n = get_node(s, node: slab_nid(slab));
3703	int free_delta = `0`;
3704	void nextfree, freelist_iter, *freelist_tail;
3705	int tail = DEACTIVATE_TO_HEAD;
3706	unsigned long flags = `0`;
3707	struct slab new;
3708	struct slab old;
3709
3710	if (READ_ONCE(slab->freelist)) {
3711	stat(s, si: DEACTIVATE_REMOTE_FREES);
3712	tail = DEACTIVATE_TO_TAIL;
3713	}
3714
3715	/*
3716	* Stage one: Count the objects on cpu's freelist as free_delta and
3717	* remember the last object in freelist_tail for later splicing.
3718	*/
3719	freelist_tail = NULL;
3720	freelist_iter = freelist;
3721	while (freelist_iter) {
3722	nextfree = get_freepointer(s, object: freelist_iter);
3723
3724	/*
3725	* If 'nextfree' is invalid, it is possible that the object at
3726	* 'freelist_iter' is already corrupted. So isolate all objects
3727	* starting at 'freelist_iter' by skipping them.
3728	*/
3729	if (freelist_corrupted(s, slab, freelist: &freelist_iter, nextfree))
3730	break;
3731
3732	freelist_tail = freelist_iter;
3733	free_delta++;
3734
3735	freelist_iter = nextfree;
3736	}
3737
3738	/*
3739	* Stage two: Unfreeze the slab while splicing the per-cpu
3740	* freelist to the head of slab's freelist.
3741	*/
3742	do {
3743	old.freelist = READ_ONCE(slab->freelist);
3744	old.counters = READ_ONCE(slab->counters);
3745	VM_BUG_ON(!old.frozen);
3746
3747	/ Determine target state of the slab /
3748	new.counters = old.counters;
3749	new.frozen = `0`;
3750	if (freelist_tail) {
3751	new.inuse -= free_delta;
3752	set_freepointer(s, object: freelist_tail, fp: old.freelist);
3753	new.freelist = freelist;
3754	} else {
3755	new.freelist = old.freelist;
3756	}
3757	} while (!slab_update_freelist(s, slab,
3758	freelist_old: old.freelist, counters_old: old.counters,
3759	freelist_new: new.freelist, counters_new: new.counters,
3760	n: "unfreezing slab"));
3761
3762	/*
3763	* Stage three: Manipulate the slab list based on the updated state.
3764	*/
3765	if (!new.inuse && n->nr_partial >= s->min_partial) {
3766	stat(s, si: DEACTIVATE_EMPTY);
3767	discard_slab(s, slab);
3768	stat(s, si: FREE_SLAB);
3769	} else if (new.freelist) {
3770	spin_lock_irqsave(&n->list_lock, flags);
3771	add_partial(n, slab, tail);
3772	spin_unlock_irqrestore(lock: &n->list_lock, flags);
3773	stat(s, si: tail);
3774	} else {
3775	stat(s, si: DEACTIVATE_FULL);
3776	}
3777	}
3778
3779	/*
3780	* ___slab_alloc()'s caller is supposed to check if kmem_cache::kmem_cache_cpu::lock
3781	* can be acquired without a deadlock before invoking the function.
3782	*
3783	* Without LOCKDEP we trust the code to be correct. kmalloc_nolock() is
3784	* using local_lock_is_locked() properly before calling local_lock_cpu_slab(),
3785	* and kmalloc() is not used in an unsupported context.
3786	*
3787	* With LOCKDEP, on PREEMPT_RT lockdep does its checking in local_lock_irqsave().
3788	* On !PREEMPT_RT we use trylock to avoid false positives in NMI, but
3789	* lockdep_assert() will catch a bug in case:
3790	* #1
3791	* kmalloc() -> ___slab_alloc() -> irqsave -> NMI -> bpf -> kmalloc_nolock()
3792	* or
3793	* #2
3794	* kmalloc() -> ___slab_alloc() -> irqsave -> tracepoint/kprobe -> bpf -> kmalloc_nolock()
3795	*
3796	* On PREEMPT_RT an invocation is not possible from IRQ-off or preempt
3797	* disabled context. The lock will always be acquired and if needed it
3798	* block and sleep until the lock is available.
3799	* #1 is possible in !PREEMPT_RT only.
3800	* #2 is possible in both with a twist that irqsave is replaced with rt_spinlock:
3801	* kmalloc() -> ___slab_alloc() -> rt_spin_lock(kmem_cache_A) ->
3802	* tracepoint/kprobe -> bpf -> kmalloc_nolock() -> rt_spin_lock(kmem_cache_B)
3803	*
3804	* local_lock_is_locked() prevents the case kmem_cache_A == kmem_cache_B
3805	*/
3806	#if defined(CONFIG_PREEMPT_RT) \|\| !defined(CONFIG_LOCKDEP)
3807	#define local_lock_cpu_slab(s, flags) \
3808	local_lock_irqsave(&(s)->cpu_slab->lock, flags)
3809	#else
3810	#define local_lock_cpu_slab(s, flags) \
3811	do { \
3812	bool __l = local_trylock_irqsave(&(s)->cpu_slab->lock, flags); \
3813	lockdep_assert(__l); \
3814	} while (0)
3815	#endif
3816
3817	#define local_unlock_cpu_slab(s, flags) \
3818	local_unlock_irqrestore(&(s)->cpu_slab->lock, flags)
3819
3820	#ifdef CONFIG_SLUB_CPU_PARTIAL
3821	static void __put_partials(struct kmem_cache s, struct* slab *partial_slab)
3822	{
3823	struct kmem_cache_node n = NULL, n2 = NULL;
3824	struct slab slab, slab_to_discard = NULL;
3825	unsigned long flags = `0`;
3826
3827	while (partial_slab) {
3828	slab = partial_slab;
3829	partial_slab = slab->next;
3830
3831	n2 = get_node(s, node: slab_nid(slab));
3832	if (n != n2) {
3833	if (n)
3834	spin_unlock_irqrestore(lock: &n->list_lock, flags);
3835
3836	n = n2;
3837	spin_lock_irqsave(&n->list_lock, flags);
3838	}
3839
3840	if (unlikely(!slab->inuse && n->nr_partial >= s->min_partial)) {
3841	slab->next = slab_to_discard;
3842	slab_to_discard = slab;
3843	} else {
3844	add_partial(n, slab, tail: DEACTIVATE_TO_TAIL);
3845	stat(s, si: FREE_ADD_PARTIAL);
3846	}
3847	}
3848
3849	if (n)
3850	spin_unlock_irqrestore(lock: &n->list_lock, flags);
3851
3852	while (slab_to_discard) {
3853	slab = slab_to_discard;
3854	slab_to_discard = slab_to_discard->next;
3855
3856	stat(s, si: DEACTIVATE_EMPTY);
3857	discard_slab(s, slab);
3858	stat(s, si: FREE_SLAB);
3859	}
3860	}
3861
3862	/*
3863	* Put all the cpu partial slabs to the node partial list.
3864	*/
3865	static void put_partials(struct kmem_cache *s)
3866	{
3867	struct slab *partial_slab;
3868	unsigned long flags;
3869
3870	local_lock_irqsave(&s->cpu_slab->lock, flags);
3871	partial_slab = this_cpu_read(s->cpu_slab->partial);
3872	this_cpu_write(s->cpu_slab->partial, NULL);
3873	local_unlock_irqrestore(&s->cpu_slab->lock, flags);
3874
3875	if (partial_slab)
3876	__put_partials(s, partial_slab);
3877	}
3878
3879	static void put_partials_cpu(struct kmem_cache *s,
3880	struct kmem_cache_cpu *c)
3881	{
3882	struct slab *partial_slab;
3883
3884	partial_slab = slub_percpu_partial(c);
3885	c->partial = NULL;
3886
3887	if (partial_slab)
3888	__put_partials(s, partial_slab);
3889	}
3890
3891	/*
3892	* Put a slab into a partial slab slot if available.
3893	*
3894	* If we did not find a slot then simply move all the partials to the
3895	* per node partial list.
3896	*/
3897	static void put_cpu_partial(struct kmem_cache s, struct* slab slab, int* drain)
3898	{
3899	struct slab *oldslab;
3900	struct slab *slab_to_put = NULL;
3901	unsigned long flags;
3902	int slabs = `0`;
3903
3904	local_lock_cpu_slab(s, flags);
3905
3906	oldslab = this_cpu_read(s->cpu_slab->partial);
3907
3908	if (oldslab) {
3909	if (drain && oldslab->slabs >= s->cpu_partial_slabs) {
3910	/*
3911	* Partial array is full. Move the existing set to the
3912	* per node partial list. Postpone the actual unfreezing
3913	* outside of the critical section.
3914	*/
3915	slab_to_put = oldslab;
3916	oldslab = NULL;
3917	} else {
3918	slabs = oldslab->slabs;
3919	}
3920	}
3921
3922	slabs++;
3923
3924	slab->slabs = slabs;
3925	slab->next = oldslab;
3926
3927	this_cpu_write(s->cpu_slab->partial, slab);
3928
3929	local_unlock_cpu_slab(s, flags);
3930
3931	if (slab_to_put) {
3932	__put_partials(s, partial_slab: slab_to_put);
3933	stat(s, si: CPU_PARTIAL_DRAIN);
3934	}
3935	}
3936
3937	#else /* CONFIG_SLUB_CPU_PARTIAL */
3938
3939	static inline void put_partials(struct kmem_cache *s) { }
3940	static inline void put_partials_cpu(struct kmem_cache *s,
3941	struct kmem_cache_cpu *c) { }
3942
3943	#endif /* CONFIG_SLUB_CPU_PARTIAL */
3944
3945	static inline void flush_slab(struct kmem_cache s, struct* kmem_cache_cpu *c)
3946	{
3947	unsigned long flags;
3948	struct slab *slab;
3949	void *freelist;
3950
3951	local_lock_irqsave(&s->cpu_slab->lock, flags);
3952
3953	slab = c->slab;
3954	freelist = c->freelist;
3955
3956	c->slab = NULL;
3957	c->freelist = NULL;
3958	c->tid = next_tid(tid: c->tid);
3959
3960	local_unlock_irqrestore(&s->cpu_slab->lock, flags);
3961
3962	if (slab) {
3963	deactivate_slab(s, slab, freelist);
3964	stat(s, si: CPUSLAB_FLUSH);
3965	}
3966	}
3967
3968	static inline void __flush_cpu_slab(struct kmem_cache s, int* cpu)
3969	{
3970	struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
3971	void *freelist = c->freelist;
3972	struct slab *slab = c->slab;
3973
3974	c->slab = NULL;
3975	c->freelist = NULL;
3976	c->tid = next_tid(tid: c->tid);
3977
3978	if (slab) {
3979	deactivate_slab(s, slab, freelist);
3980	stat(s, si: CPUSLAB_FLUSH);
3981	}
3982
3983	put_partials_cpu(s, c);
3984	}
3985
3986	static inline void flush_this_cpu_slab(struct kmem_cache *s)
3987	{
3988	struct kmem_cache_cpu *c = this_cpu_ptr(s->cpu_slab);
3989
3990	if (c->slab)
3991	flush_slab(s, c);
3992
3993	put_partials(s);
3994	}
3995
3996	static bool has_cpu_slab(int cpu, struct kmem_cache *s)
3997	{
3998	struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
3999
4000	return c->slab \|\| slub_percpu_partial(c);
4001	}
4002
4003	#else /* CONFIG_SLUB_TINY */
4004	static inline void __flush_cpu_slab(struct kmem_cache s, int* cpu) { }
4005	static inline bool has_cpu_slab(int cpu, struct kmem_cache s) { return* false; }
4006	static inline void flush_this_cpu_slab(struct kmem_cache *s) { }
4007	#endif /* CONFIG_SLUB_TINY */
4008
4009	static bool has_pcs_used(int cpu, struct kmem_cache *s)
4010	{
4011	struct slub_percpu_sheaves *pcs;
4012
4013	if (!s->cpu_sheaves)
4014	return false;
4015
4016	pcs = per_cpu_ptr(s->cpu_sheaves, cpu);
4017
4018	return (pcs->spare \|\| pcs->rcu_free \|\| pcs->main->size);
4019	}
4020
4021	/*
4022	* Flush cpu slab.
4023	*
4024	* Called from CPU work handler with migration disabled.
4025	*/
4026	static void flush_cpu_slab(struct work_struct *w)
4027	{
4028	struct kmem_cache *s;
4029	struct slub_flush_work *sfw;
4030
4031	sfw = container_of(w, struct slub_flush_work, work);
4032
4033	s = sfw->s;
4034
4035	if (s->cpu_sheaves)
4036	pcs_flush_all(s);
4037
4038	flush_this_cpu_slab(s);
4039	}
4040
4041	static void flush_all_cpus_locked(struct kmem_cache *s)
4042	{
4043	struct slub_flush_work *sfw;
4044	unsigned int cpu;
4045
4046	lockdep_assert_cpus_held();
4047	mutex_lock(lock: &flush_lock);
4048
4049	for_each_online_cpu(cpu) {
4050	sfw = &per_cpu(slub_flush, cpu);
4051	if (!has_cpu_slab(cpu, s) && !has_pcs_used(cpu, s)) {
4052	sfw->skip = true;
4053	continue;
4054	}
4055	INIT_WORK(&sfw->work, flush_cpu_slab);
4056	sfw->skip = false;
4057	sfw->s = s;
4058	queue_work_on(cpu, wq: flushwq, work: &sfw->work);
4059	}
4060
4061	for_each_online_cpu(cpu) {
4062	sfw = &per_cpu(slub_flush, cpu);
4063	if (sfw->skip)
4064	continue;
4065	flush_work(work: &sfw->work);
4066	}
4067
4068	mutex_unlock(lock: &flush_lock);
4069	}
4070
4071	static void flush_all(struct kmem_cache *s)
4072	{
4073	cpus_read_lock();
4074	flush_all_cpus_locked(s);
4075	cpus_read_unlock();
4076	}
4077
4078	static void flush_rcu_sheaf(struct work_struct *w)
4079	{
4080	struct slub_percpu_sheaves *pcs;
4081	struct slab_sheaf *rcu_free;
4082	struct slub_flush_work *sfw;
4083	struct kmem_cache *s;
4084
4085	sfw = container_of(w, struct slub_flush_work, work);
4086	s = sfw->s;
4087
4088	local_lock(&s->cpu_sheaves->lock);
4089	pcs = this_cpu_ptr(s->cpu_sheaves);
4090
4091	rcu_free = pcs->rcu_free;
4092	pcs->rcu_free = NULL;
4093
4094	local_unlock(&s->cpu_sheaves->lock);
4095
4096	if (rcu_free)
4097	call_rcu(head: &rcu_free->rcu_head, func: rcu_free_sheaf_nobarn);
4098	}
4099
4100
4101	/ needed for kvfree_rcu_barrier() /
4102	void flush_all_rcu_sheaves(void)
4103	{
4104	struct slub_flush_work *sfw;
4105	struct kmem_cache *s;
4106	unsigned int cpu;
4107
4108	cpus_read_lock();
4109	mutex_lock(lock: &slab_mutex);
4110
4111	list_for_each_entry(s, &slab_caches, list) {
4112	if (!s->cpu_sheaves)
4113	continue;
4114
4115	mutex_lock(lock: &flush_lock);
4116
4117	for_each_online_cpu(cpu) {
4118	sfw = &per_cpu(slub_flush, cpu);
4119
4120	/*
4121	* we don't check if rcu_free sheaf exists - racing
4122	* __kfree_rcu_sheaf() might have just removed it.
4123	* by executing flush_rcu_sheaf() on the cpu we make
4124	* sure the __kfree_rcu_sheaf() finished its call_rcu()
4125	*/
4126
4127	INIT_WORK(&sfw->work, flush_rcu_sheaf);
4128	sfw->s = s;
4129	queue_work_on(cpu, wq: flushwq, work: &sfw->work);
4130	}
4131
4132	for_each_online_cpu(cpu) {
4133	sfw = &per_cpu(slub_flush, cpu);
4134	flush_work(work: &sfw->work);
4135	}
4136
4137	mutex_unlock(lock: &flush_lock);
4138	}
4139
4140	mutex_unlock(lock: &slab_mutex);
4141	cpus_read_unlock();
4142
4143	rcu_barrier();
4144	}
4145
4146	/*
4147	* Use the cpu notifier to insure that the cpu slabs are flushed when
4148	* necessary.
4149	*/
4150	static int slub_cpu_dead(unsigned int cpu)
4151	{
4152	struct kmem_cache *s;
4153
4154	mutex_lock(lock: &slab_mutex);
4155	list_for_each_entry(s, &slab_caches, list) {
4156	__flush_cpu_slab(s, cpu);
4157	if (s->cpu_sheaves)
4158	__pcs_flush_all_cpu(s, cpu);
4159	}
4160	mutex_unlock(lock: &slab_mutex);
4161	return `0`;
4162	}
4163
4164	/*
4165	* Check if the objects in a per cpu structure fit numa
4166	* locality expectations.
4167	*/
4168	static inline int node_match(struct slab slab, int* node)
4169	{
4170	#ifdef CONFIG_NUMA
4171	if (node != NUMA_NO_NODE && slab_nid(slab) != node)
4172	return `0`;
4173	#endif
4174	return `1`;
4175	}
4176
4177	#ifdef CONFIG_SLUB_DEBUG
4178	static int count_free(struct slab *slab)
4179	{
4180	return slab->objects - slab->inuse;
4181	}
4182
4183	static inline unsigned long node_nr_objs(struct kmem_cache_node *n)
4184	{
4185	return atomic_long_read(v: &n->total_objects);
4186	}
4187
4188	/ Supports checking bulk free of a constructed freelist /
4189	static inline bool free_debug_processing(struct kmem_cache *s,
4190	struct slab slab, void* head, void* tail, int* *bulk_cnt,
4191	unsigned long addr, depot_stack_handle_t handle)
4192	{
4193	bool checks_ok = false;
4194	void *object = head;
4195	int cnt = `0`;
4196
4197	if (s->flags & SLAB_CONSISTENCY_CHECKS) {
4198	if (!check_slab(s, slab))
4199	goto out;
4200	}
4201
4202	if (slab->inuse < *bulk_cnt) {
4203	slab_err(s, slab, fmt: "Slab has %d allocated objects but %d are to be freed\n",
4204	slab->inuse, *bulk_cnt);
4205	goto out;
4206	}
4207
4208	next_object:
4209
4210	if (++cnt > *bulk_cnt)
4211	goto out_cnt;
4212
4213	if (s->flags & SLAB_CONSISTENCY_CHECKS) {
4214	if (!free_consistency_checks(s, slab, object, addr))
4215	goto out;
4216	}
4217
4218	if (s->flags & SLAB_STORE_USER)
4219	set_track_update(s, object, alloc: TRACK_FREE, addr, handle);
4220	trace(s, slab, object, alloc: `0`);
4221	/ Freepointer not overwritten by init_object(), SLAB_POISON moved it /
4222	init_object(s, object, SLUB_RED_INACTIVE);
4223
4224	/ Reached end of constructed freelist yet? /
4225	if (object != tail) {
4226	object = get_freepointer(s, object);
4227	goto next_object;
4228	}
4229	checks_ok = true;
4230
4231	out_cnt:
4232	if (cnt != *bulk_cnt) {
4233	slab_err(s, slab, fmt: "Bulk free expected %d objects but found %d\n",
4234	*bulk_cnt, cnt);
4235	*bulk_cnt = cnt;
4236	}
4237
4238	out:
4239
4240	if (!checks_ok)
4241	slab_fix(s, fmt: "Object at 0x%p not freed", object);
4242
4243	return checks_ok;
4244	}
4245	#endif /* CONFIG_SLUB_DEBUG */
4246
4247	#if defined(CONFIG_SLUB_DEBUG) \|\| defined(SLAB_SUPPORTS_SYSFS)
4248	static unsigned long count_partial(struct kmem_cache_node *n,
4249	int (get_count)(struct* slab *))
4250	{
4251	unsigned long flags;
4252	unsigned long x = `0`;
4253	struct slab *slab;
4254
4255	spin_lock_irqsave(&n->list_lock, flags);
4256	list_for_each_entry(slab, &n->partial, slab_list)
4257	x += get_count(slab);
4258	spin_unlock_irqrestore(lock: &n->list_lock, flags);
4259	return x;
4260	}
4261	#endif /* CONFIG_SLUB_DEBUG \|\| SLAB_SUPPORTS_SYSFS */
4262
4263	#ifdef CONFIG_SLUB_DEBUG
4264	#define MAX_PARTIAL_TO_SCAN 10000
4265
4266	static unsigned long count_partial_free_approx(struct kmem_cache_node *n)
4267	{
4268	unsigned long flags;
4269	unsigned long x = `0`;
4270	struct slab *slab;
4271
4272	spin_lock_irqsave(&n->list_lock, flags);
4273	if (n->nr_partial <= MAX_PARTIAL_TO_SCAN) {
4274	list_for_each_entry(slab, &n->partial, slab_list)
4275	x += slab->objects - slab->inuse;
4276	} else {
4277	/*
4278	* For a long list, approximate the total count of objects in
4279	* it to meet the limit on the number of slabs to scan.
4280	* Scan from both the list's head and tail for better accuracy.
4281	*/
4282	unsigned long scanned = `0`;
4283
4284	list_for_each_entry(slab, &n->partial, slab_list) {
4285	x += slab->objects - slab->inuse;
4286	if (++scanned == MAX_PARTIAL_TO_SCAN / `2`)
4287	break;
4288	}
4289	list_for_each_entry_reverse(slab, &n->partial, slab_list) {
4290	x += slab->objects - slab->inuse;
4291	if (++scanned == MAX_PARTIAL_TO_SCAN)
4292	break;
4293	}
4294	x = mult_frac(x, n->nr_partial, scanned);
4295	x = min(x, node_nr_objs(n));
4296	}
4297	spin_unlock_irqrestore(lock: &n->list_lock, flags);
4298	return x;
4299	}
4300
4301	static noinline void
4302	slab_out_of_memory(struct kmem_cache s, gfp_t gfpflags, int* nid)
4303	{
4304	static DEFINE_RATELIMIT_STATE(slub_oom_rs, DEFAULT_RATELIMIT_INTERVAL,
4305	DEFAULT_RATELIMIT_BURST);
4306	int cpu = raw_smp_processor_id();
4307	int node;
4308	struct kmem_cache_node *n;
4309
4310	if ((gfpflags & __GFP_NOWARN) \|\| !__ratelimit(&slub_oom_rs))
4311	return;
4312
4313	pr_warn("SLUB: Unable to allocate memory on CPU %u (of node %d) on node %d, gfp=%#x(%pGg)\n",
4314	cpu, cpu_to_node(cpu), nid, gfpflags, &gfpflags);
4315	pr_warn(" cache: %s, object size: %u, buffer size: %u, default order: %u, min order: %u\n",
4316	s->name, s->object_size, s->size, oo_order(s->oo),
4317	oo_order(s->min));
4318
4319	if (oo_order(x: s->min) > get_order(size: s->object_size))
4320	pr_warn(" %s debugging increased min order, use slab_debug=O to disable.\n",
4321	s->name);
4322
4323	for_each_kmem_cache_node(s, node, n) {
4324	unsigned long nr_slabs;
4325	unsigned long nr_objs;
4326	unsigned long nr_free;
4327
4328	nr_free = count_partial_free_approx(n);
4329	nr_slabs = node_nr_slabs(n);
4330	nr_objs = node_nr_objs(n);
4331
4332	pr_warn(" node %d: slabs: %ld, objs: %ld, free: %ld\n",
4333	node, nr_slabs, nr_objs, nr_free);
4334	}
4335	}
4336	#else /* CONFIG_SLUB_DEBUG */
4337	static inline void
4338	slab_out_of_memory(struct kmem_cache s, gfp_t gfpflags, int* nid) { }
4339	#endif
4340
4341	static inline bool pfmemalloc_match(struct slab *slab, gfp_t gfpflags)
4342	{
4343	if (unlikely(slab_test_pfmemalloc(slab)))
4344	return gfp_pfmemalloc_allowed(gfp_mask: gfpflags);
4345
4346	return true;
4347	}
4348
4349	#ifndef CONFIG_SLUB_TINY
4350	static inline bool
4351	__update_cpu_freelist_fast(struct kmem_cache *s,
4352	void freelist_old, void* *freelist_new,
4353	unsigned long tid)
4354	{
4355	freelist_aba_t old = { .freelist = freelist_old, .counter = tid };
4356	freelist_aba_t new = { .freelist = freelist_new, .counter = next_tid(tid) };
4357
4358	return this_cpu_try_cmpxchg_freelist(s->cpu_slab->freelist_tid.full,
4359	&old.full, new.full);
4360	}
4361
4362	/*
4363	* Check the slab->freelist and either transfer the freelist to the
4364	* per cpu freelist or deactivate the slab.
4365	*
4366	* The slab is still frozen if the return value is not NULL.
4367	*
4368	* If this function returns NULL then the slab has been unfrozen.
4369	*/
4370	static inline void get_freelist(struct* kmem_cache s, struct* slab *slab)
4371	{
4372	struct slab new;
4373	unsigned long counters;
4374	void *freelist;
4375
4376	lockdep_assert_held(this_cpu_ptr(&s->cpu_slab->lock));
4377
4378	do {
4379	freelist = slab->freelist;
4380	counters = slab->counters;
4381
4382	new.counters = counters;
4383
4384	new.inuse = slab->objects;
4385	new.frozen = freelist != NULL;
4386
4387	} while (!__slab_update_freelist(s, slab,
4388	freelist_old: freelist, counters_old: counters,
4389	NULL, counters_new: new.counters,
4390	n: "get_freelist"));
4391
4392	return freelist;
4393	}
4394
4395	/*
4396	* Freeze the partial slab and return the pointer to the freelist.
4397	*/
4398	static inline void freeze_slab(struct* kmem_cache s, struct* slab *slab)
4399	{
4400	struct slab new;
4401	unsigned long counters;
4402	void *freelist;
4403
4404	do {
4405	freelist = slab->freelist;
4406	counters = slab->counters;
4407
4408	new.counters = counters;
4409	VM_BUG_ON(new.frozen);
4410
4411	new.inuse = slab->objects;
4412	new.frozen = `1`;
4413
4414	} while (!slab_update_freelist(s, slab,
4415	freelist_old: freelist, counters_old: counters,
4416	NULL, counters_new: new.counters,
4417	n: "freeze_slab"));
4418
4419	return freelist;
4420	}
4421
4422	/*
4423	* Slow path. The lockless freelist is empty or we need to perform
4424	* debugging duties.
4425	*
4426	* Processing is still very fast if new objects have been freed to the
4427	* regular freelist. In that case we simply take over the regular freelist
4428	* as the lockless freelist and zap the regular freelist.
4429	*
4430	* If that is not working then we fall back to the partial lists. We take the
4431	* first element of the freelist as the object to allocate now and move the
4432	* rest of the freelist to the lockless freelist.
4433	*
4434	* And if we were unable to get a new slab from the partial slab lists then
4435	* we need to allocate a new slab. This is the slowest path since it involves
4436	* a call to the page allocator and the setup of a new slab.
4437	*
4438	* Version of __slab_alloc to use when we know that preemption is
4439	* already disabled (which is the case for bulk allocation).
4440	*/
4441	static void ___slab_alloc(struct* kmem_cache s, gfp_t gfpflags, int* node,
4442	unsigned long addr, struct kmem_cache_cpu c, unsigned* int orig_size)
4443	{
4444	bool allow_spin = gfpflags_allow_spinning(gfp_flags: gfpflags);
4445	void *freelist;
4446	struct slab *slab;
4447	unsigned long flags;
4448	struct partial_context pc;
4449	bool try_thisnode = true;
4450
4451	stat(s, si: ALLOC_SLOWPATH);
4452
4453	reread_slab:
4454
4455	slab = READ_ONCE(c->slab);
4456	if (!slab) {
4457	/*
4458	* if the node is not online or has no normal memory, just
4459	* ignore the node constraint
4460	*/
4461	if (unlikely(node != NUMA_NO_NODE &&
4462	!node_isset(node, slab_nodes)))
4463	node = NUMA_NO_NODE;
4464	goto new_slab;
4465	}
4466
4467	if (unlikely(!node_match(slab, node))) {
4468	/*
4469	* same as above but node_match() being false already
4470	* implies node != NUMA_NO_NODE.
4471	*
4472	* We don't strictly honor pfmemalloc and NUMA preferences
4473	* when !allow_spin because:
4474	*
4475	* 1. Most kmalloc() users allocate objects on the local node,
4476	* so kmalloc_nolock() tries not to interfere with them by
4477	* deactivating the cpu slab.
4478	*
4479	* 2. Deactivating due to NUMA or pfmemalloc mismatch may cause
4480	* unnecessary slab allocations even when n->partial list
4481	* is not empty.
4482	*/
4483	if (!node_isset(node, slab_nodes) \|\|
4484	!allow_spin) {
4485	node = NUMA_NO_NODE;
4486	} else {
4487	stat(s, si: ALLOC_NODE_MISMATCH);
4488	goto deactivate_slab;
4489	}
4490	}
4491
4492	/*
4493	* By rights, we should be searching for a slab page that was
4494	* PFMEMALLOC but right now, we are losing the pfmemalloc
4495	* information when the page leaves the per-cpu allocator
4496	*/
4497	if (unlikely(!pfmemalloc_match(slab, gfpflags) && allow_spin))
4498	goto deactivate_slab;
4499
4500	/ must check again c->slab in case we got preempted and it changed /
4501	local_lock_cpu_slab(s, flags);
4502
4503	if (unlikely(slab != c->slab)) {
4504	local_unlock_cpu_slab(s, flags);
4505	goto reread_slab;
4506	}
4507	freelist = c->freelist;
4508	if (freelist)
4509	goto load_freelist;
4510
4511	freelist = get_freelist(s, slab);
4512
4513	if (!freelist) {
4514	c->slab = NULL;
4515	c->tid = next_tid(tid: c->tid);
4516	local_unlock_cpu_slab(s, flags);
4517	stat(s, si: DEACTIVATE_BYPASS);
4518	goto new_slab;
4519	}
4520
4521	stat(s, si: ALLOC_REFILL);
4522
4523	load_freelist:
4524
4525	lockdep_assert_held(this_cpu_ptr(&s->cpu_slab->lock));
4526
4527	/*
4528	* freelist is pointing to the list of objects to be used.
4529	* slab is pointing to the slab from which the objects are obtained.
4530	* That slab must be frozen for per cpu allocations to work.
4531	*/
4532	VM_BUG_ON(!c->slab->frozen);
4533	c->freelist = get_freepointer(s, object: freelist);
4534	c->tid = next_tid(tid: c->tid);
4535	local_unlock_cpu_slab(s, flags);
4536	return freelist;
4537
4538	deactivate_slab:
4539
4540	local_lock_cpu_slab(s, flags);
4541	if (slab != c->slab) {
4542	local_unlock_cpu_slab(s, flags);
4543	goto reread_slab;
4544	}
4545	freelist = c->freelist;
4546	c->slab = NULL;
4547	c->freelist = NULL;
4548	c->tid = next_tid(tid: c->tid);
4549	local_unlock_cpu_slab(s, flags);
4550	deactivate_slab(s, slab, freelist);
4551
4552	new_slab:
4553
4554	#ifdef CONFIG_SLUB_CPU_PARTIAL
4555	while (slub_percpu_partial(c)) {
4556	local_lock_cpu_slab(s, flags);
4557	if (unlikely(c->slab)) {
4558	local_unlock_cpu_slab(s, flags);
4559	goto reread_slab;
4560	}
4561	if (unlikely(!slub_percpu_partial(c))) {
4562	local_unlock_cpu_slab(s, flags);
4563	/ we were preempted and partial list got empty /
4564	goto new_objects;
4565	}
4566
4567	slab = slub_percpu_partial(c);
4568	slub_set_percpu_partial(c, slab);
4569
4570	if (likely(node_match(slab, node) &&
4571	pfmemalloc_match(slab, gfpflags)) \|\|
4572	!allow_spin) {
4573	c->slab = slab;
4574	freelist = get_freelist(s, slab);
4575	VM_BUG_ON(!freelist);
4576	stat(s, si: CPU_PARTIAL_ALLOC);
4577	goto load_freelist;
4578	}
4579
4580	local_unlock_cpu_slab(s, flags);
4581
4582	slab->next = NULL;
4583	__put_partials(s, partial_slab: slab);
4584	}
4585	#endif
4586
4587	new_objects:
4588
4589	pc.flags = gfpflags;
4590	/*
4591	* When a preferred node is indicated but no __GFP_THISNODE
4592	*
4593	* 1) try to get a partial slab from target node only by having
4594	* __GFP_THISNODE in pc.flags for get_partial()
4595	* 2) if 1) failed, try to allocate a new slab from target node with
4596	* GPF_NOWAIT \| __GFP_THISNODE opportunistically
4597	* 3) if 2) failed, retry with original gfpflags which will allow
4598	* get_partial() try partial lists of other nodes before potentially
4599	* allocating new page from other nodes
4600	*/
4601	if (unlikely(node != NUMA_NO_NODE && !(gfpflags & __GFP_THISNODE)
4602	&& try_thisnode)) {
4603	if (unlikely(!allow_spin))
4604	/ Do not upgrade gfp to NOWAIT from more restrictive mode /
4605	pc.flags = gfpflags \| __GFP_THISNODE;
4606	else
4607	pc.flags = GFP_NOWAIT \| __GFP_THISNODE;
4608	}
4609
4610	pc.orig_size = orig_size;
4611	slab = get_partial(s, node, pc: &pc);
4612	if (slab) {
4613	if (kmem_cache_debug(s)) {
4614	freelist = pc.object;
4615	/*
4616	* For debug caches here we had to go through
4617	* alloc_single_from_partial() so just store the
4618	* tracking info and return the object.
4619	*
4620	* Due to disabled preemption we need to disallow
4621	* blocking. The flags are further adjusted by
4622	* gfp_nested_mask() in stack_depot itself.
4623	*/
4624	if (s->flags & SLAB_STORE_USER)
4625	set_track(s, object: freelist, alloc: TRACK_ALLOC, addr,
4626	gfp_flags: gfpflags & ~(__GFP_DIRECT_RECLAIM));
4627
4628	return freelist;
4629	}
4630
4631	freelist = freeze_slab(s, slab);
4632	goto retry_load_slab;
4633	}
4634
4635	slub_put_cpu_ptr(s->cpu_slab);
4636	slab = new_slab(s, flags: pc.flags, node);
4637	c = slub_get_cpu_ptr(s->cpu_slab);
4638
4639	if (unlikely(!slab)) {
4640	if (node != NUMA_NO_NODE && !(gfpflags & __GFP_THISNODE)
4641	&& try_thisnode) {
4642	try_thisnode = false;
4643	goto new_objects;
4644	}
4645	slab_out_of_memory(s, gfpflags, nid: node);
4646	return NULL;
4647	}
4648
4649	stat(s, si: ALLOC_SLAB);
4650
4651	if (kmem_cache_debug(s)) {
4652	freelist = alloc_single_from_new_slab(s, slab, orig_size, gfpflags);
4653
4654	if (unlikely(!freelist))
4655	goto new_objects;
4656
4657	if (s->flags & SLAB_STORE_USER)
4658	set_track(s, object: freelist, alloc: TRACK_ALLOC, addr,
4659	gfp_flags: gfpflags & ~(__GFP_DIRECT_RECLAIM));
4660
4661	return freelist;
4662	}
4663
4664	/*
4665	* No other reference to the slab yet so we can
4666	* muck around with it freely without cmpxchg
4667	*/
4668	freelist = slab->freelist;
4669	slab->freelist = NULL;
4670	slab->inuse = slab->objects;
4671	slab->frozen = `1`;
4672
4673	inc_slabs_node(s, node: slab_nid(slab), objects: slab->objects);
4674
4675	if (unlikely(!pfmemalloc_match(slab, gfpflags) && allow_spin)) {
4676	/*
4677	* For !pfmemalloc_match() case we don't load freelist so that
4678	* we don't make further mismatched allocations easier.
4679	*/
4680	deactivate_slab(s, slab, freelist: get_freepointer(s, object: freelist));
4681	return freelist;
4682	}
4683
4684	retry_load_slab:
4685
4686	local_lock_cpu_slab(s, flags);
4687	if (unlikely(c->slab)) {
4688	void *flush_freelist = c->freelist;
4689	struct slab *flush_slab = c->slab;
4690
4691	c->slab = NULL;
4692	c->freelist = NULL;
4693	c->tid = next_tid(tid: c->tid);
4694
4695	local_unlock_cpu_slab(s, flags);
4696
4697	if (unlikely(!allow_spin)) {
4698	/ Reentrant slub cannot take locks, defer /
4699	defer_deactivate_slab(slab: flush_slab, flush_freelist);
4700	} else {
4701	deactivate_slab(s, slab: flush_slab, freelist: flush_freelist);
4702	}
4703
4704	stat(s, si: CPUSLAB_FLUSH);
4705
4706	goto retry_load_slab;
4707	}
4708	c->slab = slab;
4709
4710	goto load_freelist;
4711	}
4712	/*
4713	* We disallow kprobes in ___slab_alloc() to prevent reentrance
4714	*
4715	* kmalloc() -> ___slab_alloc() -> local_lock_cpu_slab() protected part of
4716	* ___slab_alloc() manipulating c->freelist -> kprobe -> bpf ->
4717	* kmalloc_nolock() or kfree_nolock() -> __update_cpu_freelist_fast()
4718	* manipulating c->freelist without lock.
4719	*
4720	* This does not prevent kprobe in functions called from ___slab_alloc() such as
4721	* local_lock_irqsave() itself, and that is fine, we only need to protect the
4722	* c->freelist manipulation in ___slab_alloc() itself.
4723	*/
4724	NOKPROBE_SYMBOL(___slab_alloc);
4725
4726	/*
4727	* A wrapper for ___slab_alloc() for contexts where preemption is not yet
4728	* disabled. Compensates for possible cpu changes by refetching the per cpu area
4729	* pointer.
4730	*/
4731	static void __slab_alloc(struct* kmem_cache s, gfp_t gfpflags, int* node,
4732	unsigned long addr, struct kmem_cache_cpu c, unsigned* int orig_size)
4733	{
4734	void *p;
4735
4736	#ifdef CONFIG_PREEMPT_COUNT
4737	/*
4738	* We may have been preempted and rescheduled on a different
4739	* cpu before disabling preemption. Need to reload cpu area
4740	* pointer.
4741	*/
4742	c = slub_get_cpu_ptr(s->cpu_slab);
4743	#endif
4744	if (unlikely(!gfpflags_allow_spinning(gfpflags))) {
4745	if (local_lock_is_locked(&s->cpu_slab->lock)) {
4746	/*
4747	* EBUSY is an internal signal to kmalloc_nolock() to
4748	* retry a different bucket. It's not propagated
4749	* to the caller.
4750	*/
4751	p = ERR_PTR(error: -EBUSY);
4752	goto out;
4753	}
4754	}
4755	p = ___slab_alloc(s, gfpflags, node, addr, c, orig_size);
4756	out:
4757	#ifdef CONFIG_PREEMPT_COUNT
4758	slub_put_cpu_ptr(s->cpu_slab);
4759	#endif
4760	return p;
4761	}
4762
4763	static __always_inline void __slab_alloc_node(struct* kmem_cache *s,
4764	gfp_t gfpflags, int node, unsigned long addr, size_t orig_size)
4765	{
4766	struct kmem_cache_cpu *c;
4767	struct slab *slab;
4768	unsigned long tid;
4769	void *object;
4770
4771	redo:
4772	/*
4773	* Must read kmem_cache cpu data via this cpu ptr. Preemption is
4774	* enabled. We may switch back and forth between cpus while
4775	* reading from one cpu area. That does not matter as long
4776	* as we end up on the original cpu again when doing the cmpxchg.
4777	*
4778	* We must guarantee that tid and kmem_cache_cpu are retrieved on the
4779	* same cpu. We read first the kmem_cache_cpu pointer and use it to read
4780	* the tid. If we are preempted and switched to another cpu between the
4781	* two reads, it's OK as the two are still associated with the same cpu
4782	* and cmpxchg later will validate the cpu.
4783	*/
4784	c = raw_cpu_ptr(s->cpu_slab);
4785	tid = READ_ONCE(c->tid);
4786
4787	/*
4788	* Irqless object alloc/free algorithm used here depends on sequence
4789	* of fetching cpu_slab's data. tid should be fetched before anything
4790	* on c to guarantee that object and slab associated with previous tid
4791	* won't be used with current tid. If we fetch tid first, object and
4792	* slab could be one associated with next tid and our alloc/free
4793	* request will be failed. In this case, we will retry. So, no problem.
4794	*/
4795	barrier();
4796
4797	/*
4798	* The transaction ids are globally unique per cpu and per operation on
4799	* a per cpu queue. Thus they can be guarantee that the cmpxchg_double
4800	* occurs on the right processor and that there was no operation on the
4801	* linked list in between.
4802	*/
4803
4804	object = c->freelist;
4805	slab = c->slab;
4806
4807	#ifdef CONFIG_NUMA
4808	if (static_branch_unlikely(&strict_numa) &&
4809	node == NUMA_NO_NODE) {
4810
4811	struct mempolicy *mpol = current->mempolicy;
4812
4813	if (mpol) {
4814	/*
4815	* Special BIND rule support. If existing slab
4816	* is in permitted set then do not redirect
4817	* to a particular node.
4818	* Otherwise we apply the memory policy to get
4819	* the node we need to allocate on.
4820	*/
4821	if (mpol->mode != MPOL_BIND \|\| !slab \|\|
4822	!node_isset(slab_nid(slab), mpol->nodes))
4823
4824	node = mempolicy_slab_node();
4825	}
4826	}
4827	#endif
4828
4829	if (!USE_LOCKLESS_FAST_PATH() \|\|
4830	unlikely(!object \|\| !slab \|\| !node_match(slab, node))) {
4831	object = __slab_alloc(s, gfpflags, node, addr, c, orig_size);
4832	} else {
4833	void *next_object = get_freepointer_safe(s, object);
4834
4835	/*
4836	* The cmpxchg will only match if there was no additional
4837	* operation and if we are on the right processor.
4838	*
4839	* The cmpxchg does the following atomically (without lock
4840	* semantics!)
4841	* 1. Relocate first pointer to the current per cpu area.
4842	* 2. Verify that tid and freelist have not been changed
4843	* 3. If they were not changed replace tid and freelist
4844	*
4845	* Since this is without lock semantics the protection is only
4846	* against code executing on this cpu not from access by
4847	* other cpus.
4848	*/
4849	if (unlikely(!__update_cpu_freelist_fast(s, object, next_object, tid))) {
4850	note_cmpxchg_failure(n: "slab_alloc", s, tid);
4851	goto redo;
4852	}
4853	prefetch_freepointer(s, object: next_object);
4854	stat(s, si: ALLOC_FASTPATH);
4855	}
4856
4857	return object;
4858	}
4859	#else /* CONFIG_SLUB_TINY */
4860	static void __slab_alloc_node(struct* kmem_cache *s,
4861	gfp_t gfpflags, int node, unsigned long addr, size_t orig_size)
4862	{
4863	struct partial_context pc;
4864	struct slab *slab;
4865	void *object;
4866
4867	pc.flags = gfpflags;
4868	pc.orig_size = orig_size;
4869	slab = get_partial(s, node, &pc);
4870
4871	if (slab)
4872	return pc.object;
4873
4874	slab = new_slab(s, gfpflags, node);
4875	if (unlikely(!slab)) {
4876	slab_out_of_memory(s, gfpflags, node);
4877	return NULL;
4878	}
4879
4880	object = alloc_single_from_new_slab(s, slab, orig_size, gfpflags);
4881
4882	return object;
4883	}
4884	#endif /* CONFIG_SLUB_TINY */
4885
4886	/*
4887	* If the object has been wiped upon free, make sure it's fully initialized by
4888	* zeroing out freelist pointer.
4889	*
4890	* Note that we also wipe custom freelist pointers.
4891	*/
4892	static __always_inline void maybe_wipe_obj_freeptr(struct kmem_cache *s,
4893	void *obj)
4894	{
4895	if (unlikely(slab_want_init_on_free(s)) && obj &&
4896	!freeptr_outside_object(s))
4897	memset(s: (void )((char* *)kasan_reset_tag(addr: obj) + s->offset),
4898	c: `0`, n: sizeof(void *));
4899	}
4900
4901	static __fastpath_inline
4902	struct kmem_cache slab_pre_alloc_hook(struct* kmem_cache *s, gfp_t flags)
4903	{
4904	flags &= gfp_allowed_mask;
4905
4906	might_alloc(gfp_mask: flags);
4907
4908	if (unlikely(should_failslab(s, flags)))
4909	return NULL;
4910
4911	return s;
4912	}
4913
4914	static __fastpath_inline
4915	bool slab_post_alloc_hook(struct kmem_cache s, struct* list_lru *lru,
4916	gfp_t flags, size_t size, void **p, bool init,
4917	unsigned int orig_size)
4918	{
4919	unsigned int zero_size = s->object_size;
4920	bool kasan_init = init;
4921	size_t i;
4922	gfp_t init_flags = flags & gfp_allowed_mask;
4923
4924	/*
4925	* For kmalloc object, the allocated memory size(object_size) is likely
4926	* larger than the requested size(orig_size). If redzone check is
4927	* enabled for the extra space, don't zero it, as it will be redzoned
4928	* soon. The redzone operation for this extra space could be seen as a
4929	* replacement of current poisoning under certain debug option, and
4930	* won't break other sanity checks.
4931	*/
4932	if (kmem_cache_debug_flags(s, SLAB_STORE_USER \| SLAB_RED_ZONE) &&
4933	(s->flags & SLAB_KMALLOC))
4934	zero_size = orig_size;
4935
4936	/*
4937	* When slab_debug is enabled, avoid memory initialization integrated
4938	* into KASAN and instead zero out the memory via the memset below with
4939	* the proper size. Otherwise, KASAN might overwrite SLUB redzones and
4940	* cause false-positive reports. This does not lead to a performance
4941	* penalty on production builds, as slab_debug is not intended to be
4942	* enabled there.
4943	*/
4944	if (__slub_debug_enabled())
4945	kasan_init = false;
4946
4947	/*
4948	* As memory initialization might be integrated into KASAN,
4949	* kasan_slab_alloc and initialization memset must be
4950	* kept together to avoid discrepancies in behavior.
4951	*
4952	* As p[i] might get tagged, memset and kmemleak hook come after KASAN.
4953	*/
4954	for (i = `0`; i < size; i++) {
4955	p[i] = kasan_slab_alloc(s, object: p[i], flags: init_flags, init: kasan_init);
4956	if (p[i] && init && (!kasan_init \|\|
4957	!kasan_has_integrated_init()))
4958	memset(s: p[i], c: `0`, n: zero_size);
4959	if (gfpflags_allow_spinning(gfp_flags: flags))
4960	kmemleak_alloc_recursive(ptr: p[i], size: s->object_size, min_count: `1`,
4961	flags: s->flags, gfp: init_flags);
4962	kmsan_slab_alloc(s, object: p[i], flags: init_flags);
4963	alloc_tagging_slab_alloc_hook(s, object: p[i], flags);
4964	}
4965
4966	return memcg_slab_post_alloc_hook(s, lru, flags, size, p);
4967	}
4968
4969	/*
4970	* Replace the empty main sheaf with a (at least partially) full sheaf.
4971	*
4972	* Must be called with the cpu_sheaves local lock locked. If successful, returns
4973	* the pcs pointer and the local lock locked (possibly on a different cpu than
4974	* initially called). If not successful, returns NULL and the local lock
4975	* unlocked.
4976	*/
4977	static struct slub_percpu_sheaves *
4978	__pcs_replace_empty_main(struct kmem_cache s, struct* slub_percpu_sheaves *pcs, gfp_t gfp)
4979	{
4980	struct slab_sheaf *empty = NULL;
4981	struct slab_sheaf *full;
4982	struct node_barn *barn;
4983	bool can_alloc;
4984
4985	lockdep_assert_held(this_cpu_ptr(&s->cpu_sheaves->lock));
4986
4987	if (pcs->spare && pcs->spare->size > `0`) {
4988	swap(pcs->main, pcs->spare);
4989	return pcs;
4990	}
4991
4992	barn = get_barn(s);
4993	if (!barn) {
4994	local_unlock(&s->cpu_sheaves->lock);
4995	return NULL;
4996	}
4997
4998	full = barn_replace_empty_sheaf(barn, empty: pcs->main);
4999
5000	if (full) {
5001	stat(s, si: BARN_GET);
5002	pcs->main = full;
5003	return pcs;
5004	}
5005
5006	stat(s, si: BARN_GET_FAIL);
5007
5008	can_alloc = gfpflags_allow_blocking(gfp_flags: gfp);
5009
5010	if (can_alloc) {
5011	if (pcs->spare) {
5012	empty = pcs->spare;
5013	pcs->spare = NULL;
5014	} else {
5015	empty = barn_get_empty_sheaf(barn);
5016	}
5017	}
5018
5019	local_unlock(&s->cpu_sheaves->lock);
5020
5021	if (!can_alloc)
5022	return NULL;
5023
5024	if (empty) {
5025	if (!refill_sheaf(s, sheaf: empty, gfp)) {
5026	full = empty;
5027	} else {
5028	/*
5029	* we must be very low on memory so don't bother
5030	* with the barn
5031	*/
5032	free_empty_sheaf(s, sheaf: empty);
5033	}
5034	} else {
5035	full = alloc_full_sheaf(s, gfp);
5036	}
5037
5038	if (!full)
5039	return NULL;
5040
5041	/*
5042	* we can reach here only when gfpflags_allow_blocking
5043	* so this must not be an irq
5044	*/
5045	local_lock(&s->cpu_sheaves->lock);
5046	pcs = this_cpu_ptr(s->cpu_sheaves);
5047
5048	/*
5049	* If we are returning empty sheaf, we either got it from the
5050	* barn or had to allocate one. If we are returning a full
5051	* sheaf, it's due to racing or being migrated to a different
5052	* cpu. Breaching the barn's sheaf limits should be thus rare
5053	* enough so just ignore them to simplify the recovery.
5054	*/
5055
5056	if (pcs->main->size == `0`) {
5057	barn_put_empty_sheaf(barn, sheaf: pcs->main);
5058	pcs->main = full;
5059	return pcs;
5060	}
5061
5062	if (!pcs->spare) {
5063	pcs->spare = full;
5064	return pcs;
5065	}
5066
5067	if (pcs->spare->size == `0`) {
5068	barn_put_empty_sheaf(barn, sheaf: pcs->spare);
5069	pcs->spare = full;
5070	return pcs;
5071	}
5072
5073	barn_put_full_sheaf(barn, sheaf: full);
5074	stat(s, si: BARN_PUT);
5075
5076	return pcs;
5077	}
5078
5079	static __fastpath_inline
5080	void alloc_from_pcs(struct* kmem_cache s, gfp_t gfp, int* node)
5081	{
5082	struct slub_percpu_sheaves *pcs;
5083	bool node_requested;
5084	void *object;
5085
5086	#ifdef CONFIG_NUMA
5087	if (static_branch_unlikely(&strict_numa) &&
5088	node == NUMA_NO_NODE) {
5089
5090	struct mempolicy *mpol = current->mempolicy;
5091
5092	if (mpol) {
5093	/*
5094	* Special BIND rule support. If the local node
5095	* is in permitted set then do not redirect
5096	* to a particular node.
5097	* Otherwise we apply the memory policy to get
5098	* the node we need to allocate on.
5099	*/
5100	if (mpol->mode != MPOL_BIND \|\|
5101	!node_isset(numa_mem_id(), mpol->nodes))
5102
5103	node = mempolicy_slab_node();
5104	}
5105	}
5106	#endif
5107
5108	node_requested = IS_ENABLED(CONFIG_NUMA) && node != NUMA_NO_NODE;
5109
5110	/*
5111	* We assume the percpu sheaves contain only local objects although it's
5112	* not completely guaranteed, so we verify later.
5113	*/
5114	if (unlikely(node_requested && node != numa_mem_id()))
5115	return NULL;
5116
5117	if (!local_trylock(&s->cpu_sheaves->lock))
5118	return NULL;
5119
5120	pcs = this_cpu_ptr(s->cpu_sheaves);
5121
5122	if (unlikely(pcs->main->size == `0`)) {
5123	pcs = __pcs_replace_empty_main(s, pcs, gfp);
5124	if (unlikely(!pcs))
5125	return NULL;
5126	}
5127
5128	object = pcs->main->objects[pcs->main->size - `1`];
5129
5130	if (unlikely(node_requested)) {
5131	/*
5132	* Verify that the object was from the node we want. This could
5133	* be false because of cpu migration during an unlocked part of
5134	* the current allocation or previous freeing process.
5135	*/
5136	if (folio_nid(folio: virt_to_folio(x: object)) != node) {
5137	local_unlock(&s->cpu_sheaves->lock);
5138	return NULL;
5139	}
5140	}
5141
5142	pcs->main->size--;
5143
5144	local_unlock(&s->cpu_sheaves->lock);
5145
5146	stat(s, si: ALLOC_PCS);
5147
5148	return object;
5149	}
5150
5151	static __fastpath_inline
5152	unsigned int alloc_from_pcs_bulk(struct kmem_cache s, size_t size, void* **p)
5153	{
5154	struct slub_percpu_sheaves *pcs;
5155	struct slab_sheaf *main;
5156	unsigned int allocated = `0`;
5157	unsigned int batch;
5158
5159	next_batch:
5160	if (!local_trylock(&s->cpu_sheaves->lock))
5161	return allocated;
5162
5163	pcs = this_cpu_ptr(s->cpu_sheaves);
5164
5165	if (unlikely(pcs->main->size == `0`)) {
5166
5167	struct slab_sheaf *full;
5168	struct node_barn *barn;
5169
5170	if (pcs->spare && pcs->spare->size > `0`) {
5171	swap(pcs->main, pcs->spare);
5172	goto do_alloc;
5173	}
5174
5175	barn = get_barn(s);
5176	if (!barn) {
5177	local_unlock(&s->cpu_sheaves->lock);
5178	return allocated;
5179	}
5180
5181	full = barn_replace_empty_sheaf(barn, empty: pcs->main);
5182
5183	if (full) {
5184	stat(s, si: BARN_GET);
5185	pcs->main = full;
5186	goto do_alloc;
5187	}
5188
5189	stat(s, si: BARN_GET_FAIL);
5190
5191	local_unlock(&s->cpu_sheaves->lock);
5192
5193	/*
5194	* Once full sheaves in barn are depleted, let the bulk
5195	* allocation continue from slab pages, otherwise we would just
5196	* be copying arrays of pointers twice.
5197	*/
5198	return allocated;
5199	}
5200
5201	do_alloc:
5202
5203	main = pcs->main;
5204	batch = min(size, main->size);
5205
5206	main->size -= batch;
5207	memcpy(to: p, from: main->objects + main->size, len: batch * sizeof(void *));
5208
5209	local_unlock(&s->cpu_sheaves->lock);
5210
5211	stat_add(s, si: ALLOC_PCS, v: batch);
5212
5213	allocated += batch;
5214
5215	if (batch < size) {
5216	p += batch;
5217	size -= batch;
5218	goto next_batch;
5219	}
5220
5221	return allocated;
5222	}
5223
5224
5225	/*
5226	* Inlined fastpath so that allocation functions (kmalloc, kmem_cache_alloc)
5227	* have the fastpath folded into their functions. So no function call
5228	* overhead for requests that can be satisfied on the fastpath.
5229	*
5230	* The fastpath works by first checking if the lockless freelist can be used.
5231	* If not then __slab_alloc is called for slow processing.
5232	*
5233	* Otherwise we can simply pick the next object from the lockless free list.
5234	*/
5235	static __fastpath_inline void slab_alloc_node(struct* kmem_cache s, struct* list_lru *lru,
5236	gfp_t gfpflags, int node, unsigned long addr, size_t orig_size)
5237	{
5238	void *object;
5239	bool init = false;
5240
5241	s = slab_pre_alloc_hook(s, flags: gfpflags);
5242	if (unlikely(!s))
5243	return NULL;
5244
5245	object = kfence_alloc(s, size: orig_size, flags: gfpflags);
5246	if (unlikely(object))
5247	goto out;
5248
5249	if (s->cpu_sheaves)
5250	object = alloc_from_pcs(s, gfp: gfpflags, node);
5251
5252	if (!object)
5253	object = __slab_alloc_node(s, gfpflags, node, addr, orig_size);
5254
5255	maybe_wipe_obj_freeptr(s, obj: object);
5256	init = slab_want_init_on_alloc(flags: gfpflags, c: s);
5257
5258	out:
5259	/*
5260	* When init equals 'true', like for kzalloc() family, only
5261	* @orig_size bytes might be zeroed instead of s->object_size
5262	* In case this fails due to memcg_slab_post_alloc_hook(),
5263	* object is set to NULL
5264	*/
5265	slab_post_alloc_hook(s, lru, flags: gfpflags, size: `1`, p: &object, init, orig_size);
5266
5267	return object;
5268	}
5269
5270	void kmem_cache_alloc_noprof(struct* kmem_cache *s, gfp_t gfpflags)
5271	{
5272	void *ret = slab_alloc_node(s, NULL, gfpflags, NUMA_NO_NODE, _RET_IP_,
5273	orig_size: s->object_size);
5274
5275	trace_kmem_cache_alloc(_RET_IP_, ptr: ret, s, gfp_flags: gfpflags, NUMA_NO_NODE);
5276
5277	return ret;
5278	}
5279	EXPORT_SYMBOL(kmem_cache_alloc_noprof);
5280
5281	void kmem_cache_alloc_lru_noprof(struct* kmem_cache s, struct* list_lru *lru,
5282	gfp_t gfpflags)
5283	{
5284	void *ret = slab_alloc_node(s, lru, gfpflags, NUMA_NO_NODE, _RET_IP_,
5285	orig_size: s->object_size);
5286
5287	trace_kmem_cache_alloc(_RET_IP_, ptr: ret, s, gfp_flags: gfpflags, NUMA_NO_NODE);
5288
5289	return ret;
5290	}
5291	EXPORT_SYMBOL(kmem_cache_alloc_lru_noprof);
5292
5293	bool kmem_cache_charge(void *objp, gfp_t gfpflags)
5294	{
5295	if (!memcg_kmem_online())
5296	return true;
5297
5298	return memcg_slab_post_charge(p: objp, flags: gfpflags);
5299	}
5300	EXPORT_SYMBOL(kmem_cache_charge);
5301
5302	/**
5303	* kmem_cache_alloc_node - Allocate an object on the specified node
5304	* @s: The cache to allocate from.
5305	* @gfpflags: See kmalloc().
5306	* @node: node number of the target node.
5307	*
5308	* Identical to kmem_cache_alloc but it will allocate memory on the given
5309	* node, which can improve the performance for cpu bound structures.
5310	*
5311	* Fallback to other node is possible if __GFP_THISNODE is not set.
5312	*
5313	* Return: pointer to the new object or %NULL in case of error
5314	*/
5315	void kmem_cache_alloc_node_noprof(struct* kmem_cache s, gfp_t gfpflags, int* node)
5316	{
5317	void *ret = slab_alloc_node(s, NULL, gfpflags, node, _RET_IP_, orig_size: s->object_size);
5318
5319	trace_kmem_cache_alloc(_RET_IP_, ptr: ret, s, gfp_flags: gfpflags, node);
5320
5321	return ret;
5322	}
5323	EXPORT_SYMBOL(kmem_cache_alloc_node_noprof);
5324
5325	/*
5326	* returns a sheaf that has at least the requested size
5327	* when prefilling is needed, do so with given gfp flags
5328	*
5329	* return NULL if sheaf allocation or prefilling failed
5330	*/
5331	struct slab_sheaf *
5332	kmem_cache_prefill_sheaf(struct kmem_cache s, gfp_t gfp, unsigned* int size)
5333	{
5334	struct slub_percpu_sheaves *pcs;
5335	struct slab_sheaf *sheaf = NULL;
5336	struct node_barn *barn;
5337
5338	if (unlikely(size > s->sheaf_capacity)) {
5339
5340	/*
5341	* slab_debug disables cpu sheaves intentionally so all
5342	* prefilled sheaves become "oversize" and we give up on
5343	* performance for the debugging. Same with SLUB_TINY.
5344	* Creating a cache without sheaves and then requesting a
5345	* prefilled sheaf is however not expected, so warn.
5346	*/
5347	WARN_ON_ONCE(s->sheaf_capacity == `0` &&
5348	!IS_ENABLED(CONFIG_SLUB_TINY) &&
5349	!(s->flags & SLAB_DEBUG_FLAGS));
5350
5351	sheaf = kzalloc(struct_size(sheaf, objects, size), gfp);
5352	if (!sheaf)
5353	return NULL;
5354
5355	stat(s, si: SHEAF_PREFILL_OVERSIZE);
5356	sheaf->cache = s;
5357	sheaf->capacity = size;
5358
5359	if (!__kmem_cache_alloc_bulk(s, flags: gfp, size,
5360	p: &sheaf->objects[`0`])) {
5361	kfree(objp: sheaf);
5362	return NULL;
5363	}
5364
5365	sheaf->size = size;
5366
5367	return sheaf;
5368	}
5369
5370	local_lock(&s->cpu_sheaves->lock);
5371	pcs = this_cpu_ptr(s->cpu_sheaves);
5372
5373	if (pcs->spare) {
5374	sheaf = pcs->spare;
5375	pcs->spare = NULL;
5376	stat(s, si: SHEAF_PREFILL_FAST);
5377	} else {
5378	barn = get_barn(s);
5379
5380	stat(s, si: SHEAF_PREFILL_SLOW);
5381	if (barn)
5382	sheaf = barn_get_full_or_empty_sheaf(barn);
5383	if (sheaf && sheaf->size)
5384	stat(s, si: BARN_GET);
5385	else
5386	stat(s, si: BARN_GET_FAIL);
5387	}
5388
5389	local_unlock(&s->cpu_sheaves->lock);
5390
5391
5392	if (!sheaf)
5393	sheaf = alloc_empty_sheaf(s, gfp);
5394
5395	if (sheaf && sheaf->size < size) {
5396	if (refill_sheaf(s, sheaf, gfp)) {
5397	sheaf_flush_unused(s, sheaf);
5398	free_empty_sheaf(s, sheaf);
5399	sheaf = NULL;
5400	}
5401	}
5402
5403	if (sheaf)
5404	sheaf->capacity = s->sheaf_capacity;
5405
5406	return sheaf;
5407	}
5408
5409	/*
5410	* Use this to return a sheaf obtained by kmem_cache_prefill_sheaf()
5411	*
5412	* If the sheaf cannot simply become the percpu spare sheaf, but there's space
5413	* for a full sheaf in the barn, we try to refill the sheaf back to the cache's
5414	* sheaf_capacity to avoid handling partially full sheaves.
5415	*
5416	* If the refill fails because gfp is e.g. GFP_NOWAIT, or the barn is full, the
5417	* sheaf is instead flushed and freed.
5418	*/
5419	void kmem_cache_return_sheaf(struct kmem_cache *s, gfp_t gfp,
5420	struct slab_sheaf *sheaf)
5421	{
5422	struct slub_percpu_sheaves *pcs;
5423	struct node_barn *barn;
5424
5425	if (unlikely(sheaf->capacity != s->sheaf_capacity)) {
5426	sheaf_flush_unused(s, sheaf);
5427	kfree(objp: sheaf);
5428	return;
5429	}
5430
5431	local_lock(&s->cpu_sheaves->lock);
5432	pcs = this_cpu_ptr(s->cpu_sheaves);
5433	barn = get_barn(s);
5434
5435	if (!pcs->spare) {
5436	pcs->spare = sheaf;
5437	sheaf = NULL;
5438	stat(s, si: SHEAF_RETURN_FAST);
5439	}
5440
5441	local_unlock(&s->cpu_sheaves->lock);
5442
5443	if (!sheaf)
5444	return;
5445
5446	stat(s, si: SHEAF_RETURN_SLOW);
5447
5448	/*
5449	* If the barn has too many full sheaves or we fail to refill the sheaf,
5450	* simply flush and free it.
5451	*/
5452	if (!barn \|\| data_race(barn->nr_full) >= MAX_FULL_SHEAVES \|\|
5453	refill_sheaf(s, sheaf, gfp)) {
5454	sheaf_flush_unused(s, sheaf);
5455	free_empty_sheaf(s, sheaf);
5456	return;
5457	}
5458
5459	barn_put_full_sheaf(barn, sheaf);
5460	stat(s, si: BARN_PUT);
5461	}
5462
5463	/*
5464	* refill a sheaf previously returned by kmem_cache_prefill_sheaf to at least
5465	* the given size
5466	*
5467	* the sheaf might be replaced by a new one when requesting more than
5468	* s->sheaf_capacity objects if such replacement is necessary, but the refill
5469	* fails (returning -ENOMEM), the existing sheaf is left intact
5470	*
5471	* In practice we always refill to full sheaf's capacity.
5472	*/
5473	int kmem_cache_refill_sheaf(struct kmem_cache *s, gfp_t gfp,
5474	struct slab_sheaf *sheafp, unsigned* int size)
5475	{
5476	struct slab_sheaf *sheaf;
5477
5478	/*
5479	* TODO: do we want to support *sheaf == NULL to be equivalent of
5480	* kmem_cache_prefill_sheaf() ?
5481	*/
5482	if (!sheafp \|\| !(*sheafp))
5483	return -EINVAL;
5484
5485	sheaf = *sheafp;
5486	if (sheaf->size >= size)
5487	return `0`;
5488
5489	if (likely(sheaf->capacity >= size)) {
5490	if (likely(sheaf->capacity == s->sheaf_capacity))
5491	return refill_sheaf(s, sheaf, gfp);
5492
5493	if (!__kmem_cache_alloc_bulk(s, flags: gfp, size: sheaf->capacity - sheaf->size,
5494	p: &sheaf->objects[sheaf->size])) {
5495	return -ENOMEM;
5496	}
5497	sheaf->size = sheaf->capacity;
5498
5499	return `0`;
5500	}
5501
5502	/*
5503	* We had a regular sized sheaf and need an oversize one, or we had an
5504	* oversize one already but need a larger one now.
5505	* This should be a very rare path so let's not complicate it.
5506	*/
5507	sheaf = kmem_cache_prefill_sheaf(s, gfp, size);
5508	if (!sheaf)
5509	return -ENOMEM;
5510
5511	kmem_cache_return_sheaf(s, gfp, sheaf: *sheafp);
5512	*sheafp = sheaf;
5513	return `0`;
5514	}
5515
5516	/*
5517	* Allocate from a sheaf obtained by kmem_cache_prefill_sheaf()
5518	*
5519	* Guaranteed not to fail as many allocations as was the requested size.
5520	* After the sheaf is emptied, it fails - no fallback to the slab cache itself.
5521	*
5522	* The gfp parameter is meant only to specify __GFP_ZERO or __GFP_ACCOUNT
5523	* memcg charging is forced over limit if necessary, to avoid failure.
5524	*/
5525	void *
5526	kmem_cache_alloc_from_sheaf_noprof(struct kmem_cache *s, gfp_t gfp,
5527	struct slab_sheaf *sheaf)
5528	{
5529	void *ret = NULL;
5530	bool init;
5531
5532	if (sheaf->size == `0`)
5533	goto out;
5534
5535	ret = sheaf->objects[--sheaf->size];
5536
5537	init = slab_want_init_on_alloc(flags: gfp, c: s);
5538
5539	/ add __GFP_NOFAIL to force successful memcg charging /
5540	slab_post_alloc_hook(s, NULL, flags: gfp \| __GFP_NOFAIL, size: `1`, p: &ret, init, orig_size: s->object_size);
5541	out:
5542	trace_kmem_cache_alloc(_RET_IP_, ptr: ret, s, gfp_flags: gfp, NUMA_NO_NODE);
5543
5544	return ret;
5545	}
5546
5547	unsigned int kmem_cache_sheaf_size(struct slab_sheaf *sheaf)
5548	{
5549	return sheaf->size;
5550	}
5551	/*
5552	* To avoid unnecessary overhead, we pass through large allocation requests
5553	* directly to the page allocator. We use __GFP_COMP, because we will need to
5554	* know the allocation order to free the pages properly in kfree.
5555	*/
5556	static void ___kmalloc_large_node(size_t size, gfp_t flags, int* node)
5557	{
5558	struct folio *folio;
5559	void *ptr = NULL;
5560	unsigned int order = get_order(size);
5561
5562	if (unlikely(flags & GFP_SLAB_BUG_MASK))
5563	flags = kmalloc_fix_flags(flags);
5564
5565	flags \|= __GFP_COMP;
5566
5567	if (node == NUMA_NO_NODE)
5568	folio = (struct folio *)alloc_frozen_pages_noprof(flags, order);
5569	else
5570	folio = (struct folio *)__alloc_frozen_pages_noprof(flags, order, nid: node, NULL);
5571
5572	if (folio) {
5573	ptr = folio_address(folio);
5574	lruvec_stat_mod_folio(folio, idx: NR_SLAB_UNRECLAIMABLE_B,
5575	PAGE_SIZE << order);
5576	__folio_set_large_kmalloc(folio);
5577	}
5578
5579	ptr = kasan_kmalloc_large(ptr, size, flags);
5580	/ As ptr might get tagged, call kmemleak hook after KASAN. /
5581	kmemleak_alloc(ptr, size, min_count: `1`, gfp: flags);
5582	kmsan_kmalloc_large(ptr, size, flags);
5583
5584	return ptr;
5585	}
5586
5587	void *__kmalloc_large_noprof(size_t size, gfp_t flags)
5588	{
5589	void *ret = ___kmalloc_large_node(size, flags, NUMA_NO_NODE);
5590
5591	trace_kmalloc(_RET_IP_, ptr: ret, bytes_req: size, PAGE_SIZE << get_order(size),
5592	gfp_flags: flags, NUMA_NO_NODE);
5593	return ret;
5594	}
5595	EXPORT_SYMBOL(__kmalloc_large_noprof);
5596
5597	void __kmalloc_large_node_noprof(size_t size, gfp_t flags, int* node)
5598	{
5599	void *ret = ___kmalloc_large_node(size, flags, node);
5600
5601	trace_kmalloc(_RET_IP_, ptr: ret, bytes_req: size, PAGE_SIZE << get_order(size),
5602	gfp_flags: flags, node);
5603	return ret;
5604	}
5605	EXPORT_SYMBOL(__kmalloc_large_node_noprof);
5606
5607	static __always_inline
5608	void __do_kmalloc_node(size_t size, kmem_buckets b, gfp_t flags, int node,
5609	unsigned long caller)
5610	{
5611	struct kmem_cache *s;
5612	void *ret;
5613
5614	if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) {
5615	ret = __kmalloc_large_node_noprof(size, flags, node);
5616	trace_kmalloc(call_site: caller, ptr: ret, bytes_req: size,
5617	PAGE_SIZE << get_order(size), gfp_flags: flags, node);
5618	return ret;
5619	}
5620
5621	if (unlikely(!size))
5622	return ZERO_SIZE_PTR;
5623
5624	s = kmalloc_slab(size, b, flags, caller);
5625
5626	ret = slab_alloc_node(s, NULL, gfpflags: flags, node, addr: caller, orig_size: size);
5627	ret = kasan_kmalloc(s, object: ret, size, flags);
5628	trace_kmalloc(call_site: caller, ptr: ret, bytes_req: size, bytes_alloc: s->size, gfp_flags: flags, node);
5629	return ret;
5630	}
5631	void __kmalloc_node_noprof(DECL_BUCKET_PARAMS(size, b), gfp_t flags, int* node)
5632	{
5633	return __do_kmalloc_node(size, PASS_BUCKET_PARAM(b), flags, node, _RET_IP_);
5634	}
5635	EXPORT_SYMBOL(__kmalloc_node_noprof);
5636
5637	void *__kmalloc_noprof(size_t size, gfp_t flags)
5638	{
5639	return __do_kmalloc_node(size, NULL, flags, NUMA_NO_NODE, _RET_IP_);
5640	}
5641	EXPORT_SYMBOL(__kmalloc_noprof);
5642
5643	/**
5644	* kmalloc_nolock - Allocate an object of given size from any context.
5645	* @size: size to allocate
5646	* @gfp_flags: GFP flags. Only __GFP_ACCOUNT, __GFP_ZERO, __GFP_NO_OBJ_EXT
5647	* allowed.
5648	* @node: node number of the target node.
5649	*
5650	* Return: pointer to the new object or NULL in case of error.
5651	* NULL does not mean EBUSY or EAGAIN. It means ENOMEM.
5652	* There is no reason to call it again and expect !NULL.
5653	*/
5654	void kmalloc_nolock_noprof(size_t size, gfp_t gfp_flags, int* node)
5655	{
5656	gfp_t alloc_gfp = __GFP_NOWARN \| __GFP_NOMEMALLOC \| gfp_flags;
5657	struct kmem_cache *s;
5658	bool can_retry = true;
5659	void *ret = ERR_PTR(error: -EBUSY);
5660
5661	VM_WARN_ON_ONCE(gfp_flags & ~(__GFP_ACCOUNT \| __GFP_ZERO \|
5662	__GFP_NO_OBJ_EXT));
5663
5664	if (unlikely(!size))
5665	return ZERO_SIZE_PTR;
5666
5667	if (IS_ENABLED(CONFIG_PREEMPT_RT) && (in_nmi() \|\| in_hardirq()))
5668	/ kmalloc_nolock() in PREEMPT_RT is not supported from irq /
5669	return NULL;
5670	retry:
5671	if (unlikely(size > KMALLOC_MAX_CACHE_SIZE))
5672	return NULL;
5673	s = kmalloc_slab(size, NULL, flags: alloc_gfp, _RET_IP_);
5674
5675	if (!(s->flags & __CMPXCHG_DOUBLE) && !kmem_cache_debug(s))
5676	/*
5677	* kmalloc_nolock() is not supported on architectures that
5678	* don't implement cmpxchg16b, but debug caches don't use
5679	* per-cpu slab and per-cpu partial slabs. They rely on
5680	* kmem_cache_node->list_lock, so kmalloc_nolock() can
5681	* attempt to allocate from debug caches by
5682	* spin_trylock_irqsave(&n->list_lock, ...)
5683	*/
5684	return NULL;
5685
5686	/*
5687	* Do not call slab_alloc_node(), since trylock mode isn't
5688	* compatible with slab_pre_alloc_hook/should_failslab and
5689	* kfence_alloc. Hence call __slab_alloc_node() (at most twice)
5690	* and slab_post_alloc_hook() directly.
5691	*
5692	* In !PREEMPT_RT ___slab_alloc() manipulates (freelist,tid) pair
5693	* in irq saved region. It assumes that the same cpu will not
5694	* __update_cpu_freelist_fast() into the same (freelist,tid) pair.
5695	* Therefore use in_nmi() to check whether particular bucket is in
5696	* irq protected section.
5697	*
5698	* If in_nmi() && local_lock_is_locked(s->cpu_slab) then it means that
5699	* this cpu was interrupted somewhere inside ___slab_alloc() after
5700	* it did local_lock_irqsave(&s->cpu_slab->lock, flags).
5701	* In this case fast path with __update_cpu_freelist_fast() is not safe.
5702	*/
5703	#ifndef CONFIG_SLUB_TINY
5704	if (!in_nmi() \|\| !local_lock_is_locked(&s->cpu_slab->lock))
5705	#endif
5706	ret = __slab_alloc_node(s, gfpflags: alloc_gfp, node, _RET_IP_, orig_size: size);
5707
5708	if (PTR_ERR(ptr: ret) == -EBUSY) {
5709	if (can_retry) {
5710	/ pick the next kmalloc bucket /
5711	size = s->object_size + `1`;
5712	/*
5713	* Another alternative is to
5714	* if (memcg) alloc_gfp &= ~__GFP_ACCOUNT;
5715	* else if (!memcg) alloc_gfp \|= __GFP_ACCOUNT;
5716	* to retry from bucket of the same size.
5717	*/
5718	can_retry = false;
5719	goto retry;
5720	}
5721	ret = NULL;
5722	}
5723
5724	maybe_wipe_obj_freeptr(s, obj: ret);
5725	slab_post_alloc_hook(s, NULL, flags: alloc_gfp, size: `1`, p: &ret,
5726	init: slab_want_init_on_alloc(flags: alloc_gfp, c: s), orig_size: size);
5727
5728	ret = kasan_kmalloc(s, object: ret, size, flags: alloc_gfp);
5729	return ret;
5730	}
5731	EXPORT_SYMBOL_GPL(kmalloc_nolock_noprof);
5732
5733	void *__kmalloc_node_track_caller_noprof(DECL_BUCKET_PARAMS(size, b), gfp_t flags,
5734	int node, unsigned long caller)
5735	{
5736	return __do_kmalloc_node(size, PASS_BUCKET_PARAM(b), flags, node, caller);
5737
5738	}
5739	EXPORT_SYMBOL(__kmalloc_node_track_caller_noprof);
5740
5741	void __kmalloc_cache_noprof(struct* kmem_cache *s, gfp_t gfpflags, size_t size)
5742	{
5743	void *ret = slab_alloc_node(s, NULL, gfpflags, NUMA_NO_NODE,
5744	_RET_IP_, orig_size: size);
5745
5746	trace_kmalloc(_RET_IP_, ptr: ret, bytes_req: size, bytes_alloc: s->size, gfp_flags: gfpflags, NUMA_NO_NODE);
5747
5748	ret = kasan_kmalloc(s, object: ret, size, flags: gfpflags);
5749	return ret;
5750	}
5751	EXPORT_SYMBOL(__kmalloc_cache_noprof);
5752
5753	void __kmalloc_cache_node_noprof(struct* kmem_cache *s, gfp_t gfpflags,
5754	int node, size_t size)
5755	{
5756	void *ret = slab_alloc_node(s, NULL, gfpflags, node, _RET_IP_, orig_size: size);
5757
5758	trace_kmalloc(_RET_IP_, ptr: ret, bytes_req: size, bytes_alloc: s->size, gfp_flags: gfpflags, node);
5759
5760	ret = kasan_kmalloc(s, object: ret, size, flags: gfpflags);
5761	return ret;
5762	}
5763	EXPORT_SYMBOL(__kmalloc_cache_node_noprof);
5764
5765	static noinline void free_to_partial_list(
5766	struct kmem_cache s, struct* slab *slab,
5767	void head, void* tail, int* bulk_cnt,
5768	unsigned long addr)
5769	{
5770	struct kmem_cache_node *n = get_node(s, node: slab_nid(slab));
5771	struct slab *slab_free = NULL;
5772	int cnt = bulk_cnt;
5773	unsigned long flags;
5774	depot_stack_handle_t handle = `0`;
5775
5776	/*
5777	* We cannot use GFP_NOWAIT as there are callsites where waking up
5778	* kswapd could deadlock
5779	*/
5780	if (s->flags & SLAB_STORE_USER)
5781	handle = set_track_prepare(__GFP_NOWARN);
5782
5783	spin_lock_irqsave(&n->list_lock, flags);
5784
5785	if (free_debug_processing(s, slab, head, tail, bulk_cnt: &cnt, addr, handle)) {
5786	void *prior = slab->freelist;
5787
5788	/ Perform the actual freeing while we still hold the locks /
5789	slab->inuse -= cnt;
5790	set_freepointer(s, object: tail, fp: prior);
5791	slab->freelist = head;
5792
5793	/*
5794	* If the slab is empty, and node's partial list is full,
5795	* it should be discarded anyway no matter it's on full or
5796	* partial list.
5797	*/
5798	if (slab->inuse == `0` && n->nr_partial >= s->min_partial)
5799	slab_free = slab;
5800
5801	if (!prior) {
5802	/ was on full list /
5803	remove_full(s, n, slab);
5804	if (!slab_free) {
5805	add_partial(n, slab, tail: DEACTIVATE_TO_TAIL);
5806	stat(s, si: FREE_ADD_PARTIAL);
5807	}
5808	} else if (slab_free) {
5809	remove_partial(n, slab);
5810	stat(s, si: FREE_REMOVE_PARTIAL);
5811	}
5812	}
5813
5814	if (slab_free) {
5815	/*
5816	* Update the counters while still holding n->list_lock to
5817	* prevent spurious validation warnings
5818	*/
5819	dec_slabs_node(s, node: slab_nid(slab: slab_free), objects: slab_free->objects);
5820	}
5821
5822	spin_unlock_irqrestore(lock: &n->list_lock, flags);
5823
5824	if (slab_free) {
5825	stat(s, si: FREE_SLAB);
5826	free_slab(s, slab: slab_free);
5827	}
5828	}
5829
5830	/*
5831	* Slow path handling. This may still be called frequently since objects
5832	* have a longer lifetime than the cpu slabs in most processing loads.
5833	*
5834	* So we still attempt to reduce cache line usage. Just take the slab
5835	* lock and free the item. If there is no additional partial slab
5836	* handling required then we can return immediately.
5837	*/
5838	static void __slab_free(struct kmem_cache s, struct* slab *slab,
5839	void head, void* tail, int* cnt,
5840	unsigned long addr)
5841
5842	{
5843	void *prior;
5844	int was_frozen;
5845	struct slab new;
5846	unsigned long counters;
5847	struct kmem_cache_node *n = NULL;
5848	unsigned long flags;
5849	bool on_node_partial;
5850
5851	stat(s, si: FREE_SLOWPATH);
5852
5853	if (IS_ENABLED(CONFIG_SLUB_TINY) \|\| kmem_cache_debug(s)) {
5854	free_to_partial_list(s, slab, head, tail, bulk_cnt: cnt, addr);
5855	return;
5856	}
5857
5858	do {
5859	if (unlikely(n)) {
5860	spin_unlock_irqrestore(lock: &n->list_lock, flags);
5861	n = NULL;
5862	}
5863	prior = slab->freelist;
5864	counters = slab->counters;
5865	set_freepointer(s, object: tail, fp: prior);
5866	new.counters = counters;
5867	was_frozen = new.frozen;
5868	new.inuse -= cnt;
5869	if ((!new.inuse \|\| !prior) && !was_frozen) {
5870	/ Needs to be taken off a list /
5871	if (!kmem_cache_has_cpu_partial(s) \|\| prior) {
5872
5873	n = get_node(s, node: slab_nid(slab));
5874	/*
5875	* Speculatively acquire the list_lock.
5876	* If the cmpxchg does not succeed then we may
5877	* drop the list_lock without any processing.
5878	*
5879	* Otherwise the list_lock will synchronize with
5880	* other processors updating the list of slabs.
5881	*/
5882	spin_lock_irqsave(&n->list_lock, flags);
5883
5884	on_node_partial = slab_test_node_partial(slab);
5885	}
5886	}
5887
5888	} while (!slab_update_freelist(s, slab,
5889	freelist_old: prior, counters_old: counters,
5890	freelist_new: head, counters_new: new.counters,
5891	n: "__slab_free"));
5892
5893	if (likely(!n)) {
5894
5895	if (likely(was_frozen)) {
5896	/*
5897	* The list lock was not taken therefore no list
5898	* activity can be necessary.
5899	*/
5900	stat(s, si: FREE_FROZEN);
5901	} else if (kmem_cache_has_cpu_partial(s) && !prior) {
5902	/*
5903	* If we started with a full slab then put it onto the
5904	* per cpu partial list.
5905	*/
5906	put_cpu_partial(s, slab, drain: `1`);
5907	stat(s, si: CPU_PARTIAL_FREE);
5908	}
5909
5910	return;
5911	}
5912
5913	/*
5914	* This slab was partially empty but not on the per-node partial list,
5915	* in which case we shouldn't manipulate its list, just return.
5916	*/
5917	if (prior && !on_node_partial) {
5918	spin_unlock_irqrestore(lock: &n->list_lock, flags);
5919	return;
5920	}
5921
5922	if (unlikely(!new.inuse && n->nr_partial >= s->min_partial))
5923	goto slab_empty;
5924
5925	/*
5926	* Objects left in the slab. If it was not on the partial list before
5927	* then add it.
5928	*/
5929	if (!kmem_cache_has_cpu_partial(s) && unlikely(!prior)) {
5930	add_partial(n, slab, tail: DEACTIVATE_TO_TAIL);
5931	stat(s, si: FREE_ADD_PARTIAL);
5932	}
5933	spin_unlock_irqrestore(lock: &n->list_lock, flags);
5934	return;
5935
5936	slab_empty:
5937	if (prior) {
5938	/*
5939	* Slab on the partial list.
5940	*/
5941	remove_partial(n, slab);
5942	stat(s, si: FREE_REMOVE_PARTIAL);
5943	}
5944
5945	spin_unlock_irqrestore(lock: &n->list_lock, flags);
5946	stat(s, si: FREE_SLAB);
5947	discard_slab(s, slab);
5948	}
5949
5950	/*
5951	* pcs is locked. We should have get rid of the spare sheaf and obtained an
5952	* empty sheaf, while the main sheaf is full. We want to install the empty sheaf
5953	* as a main sheaf, and make the current main sheaf a spare sheaf.
5954	*
5955	* However due to having relinquished the cpu_sheaves lock when obtaining
5956	* the empty sheaf, we need to handle some unlikely but possible cases.
5957	*
5958	* If we put any sheaf to barn here, it's because we were interrupted or have
5959	* been migrated to a different cpu, which should be rare enough so just ignore
5960	* the barn's limits to simplify the handling.
5961	*
5962	* An alternative scenario that gets us here is when we fail
5963	* barn_replace_full_sheaf(), because there's no empty sheaf available in the
5964	* barn, so we had to allocate it by alloc_empty_sheaf(). But because we saw the
5965	* limit on full sheaves was not exceeded, we assume it didn't change and just
5966	* put the full sheaf there.
5967	*/
5968	static void __pcs_install_empty_sheaf(struct kmem_cache *s,
5969	struct slub_percpu_sheaves pcs, struct* slab_sheaf *empty,
5970	struct node_barn *barn)
5971	{
5972	lockdep_assert_held(this_cpu_ptr(&s->cpu_sheaves->lock));
5973
5974	/ This is what we expect to find if nobody interrupted us. /
5975	if (likely(!pcs->spare)) {
5976	pcs->spare = pcs->main;
5977	pcs->main = empty;
5978	return;
5979	}
5980
5981	/*
5982	* Unlikely because if the main sheaf had space, we would have just
5983	* freed to it. Get rid of our empty sheaf.
5984	*/
5985	if (pcs->main->size < s->sheaf_capacity) {
5986	barn_put_empty_sheaf(barn, sheaf: empty);
5987	return;
5988	}
5989
5990	/ Also unlikely for the same reason /
5991	if (pcs->spare->size < s->sheaf_capacity) {
5992	swap(pcs->main, pcs->spare);
5993	barn_put_empty_sheaf(barn, sheaf: empty);
5994	return;
5995	}
5996
5997	/*
5998	* We probably failed barn_replace_full_sheaf() due to no empty sheaf
5999	* available there, but we allocated one, so finish the job.
6000	*/
6001	barn_put_full_sheaf(barn, sheaf: pcs->main);
6002	stat(s, si: BARN_PUT);
6003	pcs->main = empty;
6004	}
6005
6006	/*
6007	* Replace the full main sheaf with a (at least partially) empty sheaf.
6008	*
6009	* Must be called with the cpu_sheaves local lock locked. If successful, returns
6010	* the pcs pointer and the local lock locked (possibly on a different cpu than
6011	* initially called). If not successful, returns NULL and the local lock
6012	* unlocked.
6013	*/
6014	static struct slub_percpu_sheaves *
6015	__pcs_replace_full_main(struct kmem_cache s, struct* slub_percpu_sheaves *pcs)
6016	{
6017	struct slab_sheaf *empty;
6018	struct node_barn *barn;
6019	bool put_fail;
6020
6021	restart:
6022	lockdep_assert_held(this_cpu_ptr(&s->cpu_sheaves->lock));
6023
6024	barn = get_barn(s);
6025	if (!barn) {
6026	local_unlock(&s->cpu_sheaves->lock);
6027	return NULL;
6028	}
6029
6030	put_fail = false;
6031
6032	if (!pcs->spare) {
6033	empty = barn_get_empty_sheaf(barn);
6034	if (empty) {
6035	pcs->spare = pcs->main;
6036	pcs->main = empty;
6037	return pcs;
6038	}
6039	goto alloc_empty;
6040	}
6041
6042	if (pcs->spare->size < s->sheaf_capacity) {
6043	swap(pcs->main, pcs->spare);
6044	return pcs;
6045	}
6046
6047	empty = barn_replace_full_sheaf(barn, full: pcs->main);
6048
6049	if (!IS_ERR(ptr: empty)) {
6050	stat(s, si: BARN_PUT);
6051	pcs->main = empty;
6052	return pcs;
6053	}
6054
6055	if (PTR_ERR(ptr: empty) == -E2BIG) {
6056	/ Since we got here, spare exists and is full /
6057	struct slab_sheaf *to_flush = pcs->spare;
6058
6059	stat(s, si: BARN_PUT_FAIL);
6060
6061	pcs->spare = NULL;
6062	local_unlock(&s->cpu_sheaves->lock);
6063
6064	sheaf_flush_unused(s, sheaf: to_flush);
6065	empty = to_flush;
6066	goto got_empty;
6067	}
6068
6069	/*
6070	* We could not replace full sheaf because barn had no empty
6071	* sheaves. We can still allocate it and put the full sheaf in
6072	* __pcs_install_empty_sheaf(), but if we fail to allocate it,
6073	* make sure to count the fail.
6074	*/
6075	put_fail = true;
6076
6077	alloc_empty:
6078	local_unlock(&s->cpu_sheaves->lock);
6079
6080	empty = alloc_empty_sheaf(s, GFP_NOWAIT);
6081	if (empty)
6082	goto got_empty;
6083
6084	if (put_fail)
6085	stat(s, si: BARN_PUT_FAIL);
6086
6087	if (!sheaf_flush_main(s))
6088	return NULL;
6089
6090	if (!local_trylock(&s->cpu_sheaves->lock))
6091	return NULL;
6092
6093	pcs = this_cpu_ptr(s->cpu_sheaves);
6094
6095	/*
6096	* we flushed the main sheaf so it should be empty now,
6097	* but in case we got preempted or migrated, we need to
6098	* check again
6099	*/
6100	if (pcs->main->size == s->sheaf_capacity)
6101	goto restart;
6102
6103	return pcs;
6104
6105	got_empty:
6106	if (!local_trylock(&s->cpu_sheaves->lock)) {
6107	barn_put_empty_sheaf(barn, sheaf: empty);
6108	return NULL;
6109	}
6110
6111	pcs = this_cpu_ptr(s->cpu_sheaves);
6112	__pcs_install_empty_sheaf(s, pcs, empty, barn);
6113
6114	return pcs;
6115	}
6116
6117	/*
6118	* Free an object to the percpu sheaves.
6119	* The object is expected to have passed slab_free_hook() already.
6120	*/
6121	static __fastpath_inline
6122	bool free_to_pcs(struct kmem_cache s, void* *object)
6123	{
6124	struct slub_percpu_sheaves *pcs;
6125
6126	if (!local_trylock(&s->cpu_sheaves->lock))
6127	return false;
6128
6129	pcs = this_cpu_ptr(s->cpu_sheaves);
6130
6131	if (unlikely(pcs->main->size == s->sheaf_capacity)) {
6132
6133	pcs = __pcs_replace_full_main(s, pcs);
6134	if (unlikely(!pcs))
6135	return false;
6136	}
6137
6138	pcs->main->objects[pcs->main->size++] = object;
6139
6140	local_unlock(&s->cpu_sheaves->lock);
6141
6142	stat(s, si: FREE_PCS);
6143
6144	return true;
6145	}
6146
6147	static void rcu_free_sheaf(struct rcu_head *head)
6148	{
6149	struct kmem_cache_node *n;
6150	struct slab_sheaf *sheaf;
6151	struct node_barn *barn = NULL;
6152	struct kmem_cache *s;
6153
6154	sheaf = container_of(head, struct slab_sheaf, rcu_head);
6155
6156	s = sheaf->cache;
6157
6158	/*
6159	* This may remove some objects due to slab_free_hook() returning false,
6160	* so that the sheaf might no longer be completely full. But it's easier
6161	* to handle it as full (unless it became completely empty), as the code
6162	* handles it fine. The only downside is that sheaf will serve fewer
6163	* allocations when reused. It only happens due to debugging, which is a
6164	* performance hit anyway.
6165	*/
6166	__rcu_free_sheaf_prepare(s, sheaf);
6167
6168	n = get_node(s, node: sheaf->node);
6169	if (!n)
6170	goto flush;
6171
6172	barn = n->barn;
6173
6174	/ due to slab_free_hook() /
6175	if (unlikely(sheaf->size == `0`))
6176	goto empty;
6177
6178	/*
6179	* Checking nr_full/nr_empty outside lock avoids contention in case the
6180	* barn is at the respective limit. Due to the race we might go over the
6181	* limit but that should be rare and harmless.
6182	*/
6183
6184	if (data_race(barn->nr_full) < MAX_FULL_SHEAVES) {
6185	stat(s, si: BARN_PUT);
6186	barn_put_full_sheaf(barn, sheaf);
6187	return;
6188	}
6189
6190	flush:
6191	stat(s, si: BARN_PUT_FAIL);
6192	sheaf_flush_unused(s, sheaf);
6193
6194	empty:
6195	if (barn && data_race(barn->nr_empty) < MAX_EMPTY_SHEAVES) {
6196	barn_put_empty_sheaf(barn, sheaf);
6197	return;
6198	}
6199
6200	free_empty_sheaf(s, sheaf);
6201	}
6202
6203	bool __kfree_rcu_sheaf(struct kmem_cache s, void* *obj)
6204	{
6205	struct slub_percpu_sheaves *pcs;
6206	struct slab_sheaf *rcu_sheaf;
6207
6208	if (!local_trylock(&s->cpu_sheaves->lock))
6209	goto fail;
6210
6211	pcs = this_cpu_ptr(s->cpu_sheaves);
6212
6213	if (unlikely(!pcs->rcu_free)) {
6214
6215	struct slab_sheaf *empty;
6216	struct node_barn *barn;
6217
6218	if (pcs->spare && pcs->spare->size == `0`) {
6219	pcs->rcu_free = pcs->spare;
6220	pcs->spare = NULL;
6221	goto do_free;
6222	}
6223
6224	barn = get_barn(s);
6225	if (!barn) {
6226	local_unlock(&s->cpu_sheaves->lock);
6227	goto fail;
6228	}
6229
6230	empty = barn_get_empty_sheaf(barn);
6231
6232	if (empty) {
6233	pcs->rcu_free = empty;
6234	goto do_free;
6235	}
6236
6237	local_unlock(&s->cpu_sheaves->lock);
6238
6239	empty = alloc_empty_sheaf(s, GFP_NOWAIT);
6240
6241	if (!empty)
6242	goto fail;
6243
6244	if (!local_trylock(&s->cpu_sheaves->lock)) {
6245	barn_put_empty_sheaf(barn, sheaf: empty);
6246	goto fail;
6247	}
6248
6249	pcs = this_cpu_ptr(s->cpu_sheaves);
6250
6251	if (unlikely(pcs->rcu_free))
6252	barn_put_empty_sheaf(barn, sheaf: empty);
6253	else
6254	pcs->rcu_free = empty;
6255	}
6256
6257	do_free:
6258
6259	rcu_sheaf = pcs->rcu_free;
6260
6261	/*
6262	* Since we flush immediately when size reaches capacity, we never reach
6263	* this with size already at capacity, so no OOB write is possible.
6264	*/
6265	rcu_sheaf->objects[rcu_sheaf->size++] = obj;
6266
6267	if (likely(rcu_sheaf->size < s->sheaf_capacity)) {
6268	rcu_sheaf = NULL;
6269	} else {
6270	pcs->rcu_free = NULL;
6271	rcu_sheaf->node = numa_mem_id();
6272	}
6273
6274	/*
6275	* we flush before local_unlock to make sure a racing
6276	* flush_all_rcu_sheaves() doesn't miss this sheaf
6277	*/
6278	if (rcu_sheaf)
6279	call_rcu(head: &rcu_sheaf->rcu_head, func: rcu_free_sheaf);
6280
6281	local_unlock(&s->cpu_sheaves->lock);
6282
6283	stat(s, si: FREE_RCU_SHEAF);
6284	return true;
6285
6286	fail:
6287	stat(s, si: FREE_RCU_SHEAF_FAIL);
6288	return false;
6289	}
6290
6291	/*
6292	* Bulk free objects to the percpu sheaves.
6293	* Unlike free_to_pcs() this includes the calls to all necessary hooks
6294	* and the fallback to freeing to slab pages.
6295	*/
6296	static void free_to_pcs_bulk(struct kmem_cache s, size_t size, void* **p)
6297	{
6298	struct slub_percpu_sheaves *pcs;
6299	struct slab_sheaf main, empty;
6300	bool init = slab_want_init_on_free(c: s);
6301	unsigned int batch, i = `0`;
6302	struct node_barn *barn;
6303	void *remote_objects[PCS_BATCH_MAX];
6304	unsigned int remote_nr = `0`;
6305	int node = numa_mem_id();
6306
6307	next_remote_batch:
6308	while (i < size) {
6309	struct slab *slab = virt_to_slab(addr: p[i]);
6310
6311	memcg_slab_free_hook(s, slab, p: p + i, objects: `1`);
6312	alloc_tagging_slab_free_hook(s, slab, p: p + i, objects: `1`);
6313
6314	if (unlikely(!slab_free_hook(s, p[i], init, false))) {
6315	p[i] = p[--size];
6316	if (!size)
6317	goto flush_remote;
6318	continue;
6319	}
6320
6321	if (unlikely(IS_ENABLED(CONFIG_NUMA) && slab_nid(slab) != node)) {
6322	remote_objects[remote_nr] = p[i];
6323	p[i] = p[--size];
6324	if (++remote_nr >= PCS_BATCH_MAX)
6325	goto flush_remote;
6326	continue;
6327	}
6328
6329	i++;
6330	}
6331
6332	next_batch:
6333	if (!local_trylock(&s->cpu_sheaves->lock))
6334	goto fallback;
6335
6336	pcs = this_cpu_ptr(s->cpu_sheaves);
6337
6338	if (likely(pcs->main->size < s->sheaf_capacity))
6339	goto do_free;
6340
6341	barn = get_barn(s);
6342	if (!barn)
6343	goto no_empty;
6344
6345	if (!pcs->spare) {
6346	empty = barn_get_empty_sheaf(barn);
6347	if (!empty)
6348	goto no_empty;
6349
6350	pcs->spare = pcs->main;
6351	pcs->main = empty;
6352	goto do_free;
6353	}
6354
6355	if (pcs->spare->size < s->sheaf_capacity) {
6356	swap(pcs->main, pcs->spare);
6357	goto do_free;
6358	}
6359
6360	empty = barn_replace_full_sheaf(barn, full: pcs->main);
6361	if (IS_ERR(ptr: empty)) {
6362	stat(s, si: BARN_PUT_FAIL);
6363	goto no_empty;
6364	}
6365
6366	stat(s, si: BARN_PUT);
6367	pcs->main = empty;
6368
6369	do_free:
6370	main = pcs->main;
6371	batch = min(size, s->sheaf_capacity - main->size);
6372
6373	memcpy(to: main->objects + main->size, from: p, len: batch * sizeof(void *));
6374	main->size += batch;
6375
6376	local_unlock(&s->cpu_sheaves->lock);
6377
6378	stat_add(s, si: FREE_PCS, v: batch);
6379
6380	if (batch < size) {
6381	p += batch;
6382	size -= batch;
6383	goto next_batch;
6384	}
6385
6386	return;
6387
6388	no_empty:
6389	local_unlock(&s->cpu_sheaves->lock);
6390
6391	/*
6392	* if we depleted all empty sheaves in the barn or there are too
6393	* many full sheaves, free the rest to slab pages
6394	*/
6395	fallback:
6396	__kmem_cache_free_bulk(s, size, p);
6397
6398	flush_remote:
6399	if (remote_nr) {
6400	__kmem_cache_free_bulk(s, size: remote_nr, p: &remote_objects[`0`]);
6401	if (i < size) {
6402	remote_nr = `0`;
6403	goto next_remote_batch;
6404	}
6405	}
6406	}
6407
6408	struct defer_free {
6409	struct llist_head objects;
6410	struct llist_head slabs;
6411	struct irq_work work;
6412	};
6413
6414	static void free_deferred_objects(struct irq_work *work);
6415
6416	static DEFINE_PER_CPU(struct defer_free, defer_free_objects) = {
6417	.objects = LLIST_HEAD_INIT(objects),
6418	.slabs = LLIST_HEAD_INIT(slabs),
6419	.work = IRQ_WORK_INIT(free_deferred_objects),
6420	};
6421
6422	/*
6423	* In PREEMPT_RT irq_work runs in per-cpu kthread, so it's safe
6424	* to take sleeping spin_locks from __slab_free() and deactivate_slab().
6425	* In !PREEMPT_RT irq_work will run after local_unlock_irqrestore().
6426	*/
6427	static void free_deferred_objects(struct irq_work *work)
6428	{
6429	struct defer_free df = container_of(work, struct* defer_free, work);
6430	struct llist_head *objs = &df->objects;
6431	struct llist_head *slabs = &df->slabs;
6432	struct llist_node llnode, pos, *t;
6433
6434	if (llist_empty(head: objs) && llist_empty(head: slabs))
6435	return;
6436
6437	llnode = llist_del_all(head: objs);
6438	llist_for_each_safe(pos, t, llnode) {
6439	struct kmem_cache *s;
6440	struct slab *slab;
6441	void *x = pos;
6442
6443	slab = virt_to_slab(addr: x);
6444	s = slab->slab_cache;
6445
6446	/*
6447	* We used freepointer in 'x' to link 'x' into df->objects.
6448	* Clear it to NULL to avoid false positive detection
6449	* of "Freepointer corruption".
6450	*/
6451	(void* **)x = NULL;
6452
6453	/ Point 'x' back to the beginning of allocated object /
6454	x -= s->offset;
6455	__slab_free(s, slab, head: x, tail: x, cnt: `1`, _THIS_IP_);
6456	}
6457
6458	llnode = llist_del_all(head: slabs);
6459	llist_for_each_safe(pos, t, llnode) {
6460	struct slab slab = container_of(pos, struct* slab, llnode);
6461
6462	#ifdef CONFIG_SLUB_TINY
6463	discard_slab(slab->slab_cache, slab);
6464	#else
6465	deactivate_slab(s: slab->slab_cache, slab, freelist: slab->flush_freelist);
6466	#endif
6467	}
6468	}
6469
6470	static void defer_free(struct kmem_cache s, void* *head)
6471	{
6472	struct defer_free *df;
6473
6474	guard(preempt)();
6475
6476	df = this_cpu_ptr(&defer_free_objects);
6477	if (llist_add(new: head + s->offset, head: &df->objects))
6478	irq_work_queue(work: &df->work);
6479	}
6480
6481	static void defer_deactivate_slab(struct slab slab, void* *flush_freelist)
6482	{
6483	struct defer_free *df;
6484
6485	slab->flush_freelist = flush_freelist;
6486
6487	guard(preempt)();
6488
6489	df = this_cpu_ptr(&defer_free_objects);
6490	if (llist_add(new: &slab->llnode, head: &df->slabs))
6491	irq_work_queue(work: &df->work);
6492	}
6493
6494	void defer_free_barrier(void)
6495	{
6496	int cpu;
6497
6498	for_each_possible_cpu(cpu)
6499	irq_work_sync(work: &per_cpu_ptr(&defer_free_objects, cpu)->work);
6500	}
6501
6502	#ifndef CONFIG_SLUB_TINY
6503	/*
6504	* Fastpath with forced inlining to produce a kfree and kmem_cache_free that
6505	* can perform fastpath freeing without additional function calls.
6506	*
6507	* The fastpath is only possible if we are freeing to the current cpu slab
6508	* of this processor. This typically the case if we have just allocated
6509	* the item before.
6510	*
6511	* If fastpath is not possible then fall back to __slab_free where we deal
6512	* with all sorts of special processing.
6513	*
6514	* Bulk free of a freelist with several objects (all pointing to the
6515	* same slab) possible by specifying head and tail ptr, plus objects
6516	* count (cnt). Bulk free indicated by tail pointer being set.
6517	*/
6518	static __always_inline void do_slab_free(struct kmem_cache *s,
6519	struct slab slab, void* head, void* *tail,
6520	int cnt, unsigned long addr)
6521	{
6522	/ cnt == 0 signals that it's called from kfree_nolock() /
6523	bool allow_spin = cnt;
6524	struct kmem_cache_cpu *c;
6525	unsigned long tid;
6526	void **freelist;
6527
6528	redo:
6529	/*
6530	* Determine the currently cpus per cpu slab.
6531	* The cpu may change afterward. However that does not matter since
6532	* data is retrieved via this pointer. If we are on the same cpu
6533	* during the cmpxchg then the free will succeed.
6534	*/
6535	c = raw_cpu_ptr(s->cpu_slab);
6536	tid = READ_ONCE(c->tid);
6537
6538	/ Same with comment on barrier() in __slab_alloc_node() /
6539	barrier();
6540
6541	if (unlikely(slab != c->slab)) {
6542	if (unlikely(!allow_spin)) {
6543	/*
6544	* __slab_free() can locklessly cmpxchg16 into a slab,
6545	* but then it might need to take spin_lock or local_lock
6546	* in put_cpu_partial() for further processing.
6547	* Avoid the complexity and simply add to a deferred list.
6548	*/
6549	defer_free(s, head);
6550	} else {
6551	__slab_free(s, slab, head, tail, cnt, addr);
6552	}
6553	return;
6554	}
6555
6556	if (unlikely(!allow_spin)) {
6557	if ((in_nmi() \|\| !USE_LOCKLESS_FAST_PATH()) &&
6558	local_lock_is_locked(&s->cpu_slab->lock)) {
6559	defer_free(s, head);
6560	return;
6561	}
6562	cnt = `1`; / restore cnt. kfree_nolock() frees one object at a time /
6563	}
6564
6565	if (USE_LOCKLESS_FAST_PATH()) {
6566	freelist = READ_ONCE(c->freelist);
6567
6568	set_freepointer(s, object: tail, fp: freelist);
6569
6570	if (unlikely(!__update_cpu_freelist_fast(s, freelist, head, tid))) {
6571	note_cmpxchg_failure(n: "slab_free", s, tid);
6572	goto redo;
6573	}
6574	} else {
6575	__maybe_unused unsigned long flags = `0`;
6576
6577	/ Update the free list under the local lock /
6578	local_lock_cpu_slab(s, flags);
6579	c = this_cpu_ptr(s->cpu_slab);
6580	if (unlikely(slab != c->slab)) {
6581	local_unlock_cpu_slab(s, flags);
6582	goto redo;
6583	}
6584	tid = c->tid;
6585	freelist = c->freelist;
6586
6587	set_freepointer(s, object: tail, fp: freelist);
6588	c->freelist = head;
6589	c->tid = next_tid(tid);
6590
6591	local_unlock_cpu_slab(s, flags);
6592	}
6593	stat_add(s, si: FREE_FASTPATH, v: cnt);
6594	}
6595	#else /* CONFIG_SLUB_TINY */
6596	static void do_slab_free(struct kmem_cache *s,
6597	struct slab slab, void* head, void* *tail,
6598	int cnt, unsigned long addr)
6599	{
6600	__slab_free(s, slab, head, tail, cnt, addr);
6601	}
6602	#endif /* CONFIG_SLUB_TINY */
6603
6604	static __fastpath_inline
6605	void slab_free(struct kmem_cache s, struct* slab slab, void* *object,
6606	unsigned long addr)
6607	{
6608	memcg_slab_free_hook(s, slab, p: &object, objects: `1`);
6609	alloc_tagging_slab_free_hook(s, slab, p: &object, objects: `1`);
6610
6611	if (unlikely(!slab_free_hook(s, object, slab_want_init_on_free(s), false)))
6612	return;
6613
6614	if (s->cpu_sheaves && likely(!IS_ENABLED(CONFIG_NUMA) \|\|
6615	slab_nid(slab) == numa_mem_id())) {
6616	if (likely(free_to_pcs(s, object)))
6617	return;
6618	}
6619
6620	do_slab_free(s, slab, head: object, tail: object, cnt: `1`, addr);
6621	}
6622
6623	#ifdef CONFIG_MEMCG
6624	/ Do not inline the rare memcg charging failed path into the allocation path /
6625	static noinline
6626	void memcg_alloc_abort_single(struct kmem_cache s, void* *object)
6627	{
6628	if (likely(slab_free_hook(s, object, slab_want_init_on_free(s), false)))
6629	do_slab_free(s, virt_to_slab(object), object, object, `1`, _RET_IP_);
6630	}
6631	#endif
6632
6633	static __fastpath_inline
6634	void slab_free_bulk(struct kmem_cache s, struct* slab slab, void* *head,
6635	void tail, void* *p, int* cnt, unsigned long addr)
6636	{
6637	memcg_slab_free_hook(s, slab, p, objects: cnt);
6638	alloc_tagging_slab_free_hook(s, slab, p, objects: cnt);
6639	/*
6640	* With KASAN enabled slab_free_freelist_hook modifies the freelist
6641	* to remove objects, whose reuse must be delayed.
6642	*/
6643	if (likely(slab_free_freelist_hook(s, &head, &tail, &cnt)))
6644	do_slab_free(s, slab, head, tail, cnt, addr);
6645	}
6646
6647	#ifdef CONFIG_SLUB_RCU_DEBUG
6648	static void slab_free_after_rcu_debug(struct rcu_head *rcu_head)
6649	{
6650	struct rcu_delayed_free *delayed_free =
6651	container_of(rcu_head, struct rcu_delayed_free, head);
6652	void *object = delayed_free->object;
6653	struct slab *slab = virt_to_slab(object);
6654	struct kmem_cache *s;
6655
6656	kfree(delayed_free);
6657
6658	if (WARN_ON(is_kfence_address(object)))
6659	return;
6660
6661	/ find the object and the cache again /
6662	if (WARN_ON(!slab))
6663	return;
6664	s = slab->slab_cache;
6665	if (WARN_ON(!(s->flags & SLAB_TYPESAFE_BY_RCU)))
6666	return;
6667
6668	/ resume freeing /
6669	if (slab_free_hook(s, object, slab_want_init_on_free(s), true))
6670	do_slab_free(s, slab, object, object, `1`, _THIS_IP_);
6671	}
6672	#endif /* CONFIG_SLUB_RCU_DEBUG */
6673
6674	#ifdef CONFIG_KASAN_GENERIC
6675	void ___cache_free(struct kmem_cache cache, void* x, unsigned* long addr)
6676	{
6677	do_slab_free(cache, virt_to_slab(x), x, x, `1`, addr);
6678	}
6679	#endif
6680
6681	static inline struct kmem_cache virt_to_cache(const* void *obj)
6682	{
6683	struct slab *slab;
6684
6685	slab = virt_to_slab(addr: obj);
6686	if (WARN_ONCE(!slab, "%s: Object is not a Slab page!\n", __func__))
6687	return NULL;
6688	return slab->slab_cache;
6689	}
6690
6691	static inline struct kmem_cache cache_from_obj(struct* kmem_cache s, void* *x)
6692	{
6693	struct kmem_cache *cachep;
6694
6695	if (!IS_ENABLED(CONFIG_SLAB_FREELIST_HARDENED) &&
6696	!kmem_cache_debug_flags(s, SLAB_CONSISTENCY_CHECKS))
6697	return s;
6698
6699	cachep = virt_to_cache(obj: x);
6700	if (WARN(cachep && cachep != s,
6701	"%s: Wrong slab cache. %s but object is from %s\n",
6702	__func__, s->name, cachep->name))
6703	print_tracking(s: cachep, object: x);
6704	return cachep;
6705	}
6706
6707	/**
6708	* kmem_cache_free - Deallocate an object
6709	* @s: The cache the allocation was from.
6710	* @x: The previously allocated object.
6711	*
6712	* Free an object which was previously allocated from this
6713	* cache.
6714	*/
6715	void kmem_cache_free(struct kmem_cache s, void* *x)
6716	{
6717	s = cache_from_obj(s, x);
6718	if (!s)
6719	return;
6720	trace_kmem_cache_free(_RET_IP_, ptr: x, s);
6721	slab_free(s, slab: virt_to_slab(addr: x), object: x, _RET_IP_);
6722	}
6723	EXPORT_SYMBOL(kmem_cache_free);
6724
6725	static void free_large_kmalloc(struct folio folio, void* *object)
6726	{
6727	unsigned int order = folio_order(folio);
6728
6729	if (WARN_ON_ONCE(!folio_test_large_kmalloc(folio))) {
6730	dump_page(page: &folio->page, reason: "Not a kmalloc allocation");
6731	return;
6732	}
6733
6734	if (WARN_ON_ONCE(order == `0`))
6735	pr_warn_once("object pointer: 0x%p\n", object);
6736
6737	kmemleak_free(ptr: object);
6738	kasan_kfree_large(ptr: object);
6739	kmsan_kfree_large(ptr: object);
6740
6741	lruvec_stat_mod_folio(folio, idx: NR_SLAB_UNRECLAIMABLE_B,
6742	val: -(PAGE_SIZE << order));
6743	__folio_clear_large_kmalloc(folio);
6744	free_frozen_pages(page: &folio->page, order);
6745	}
6746
6747	/*
6748	* Given an rcu_head embedded within an object obtained from kvmalloc at an
6749	* offset < 4k, free the object in question.
6750	*/
6751	void kvfree_rcu_cb(struct rcu_head *head)
6752	{
6753	void *obj = head;
6754	struct folio *folio;
6755	struct slab *slab;
6756	struct kmem_cache *s;
6757	void *slab_addr;
6758
6759	if (is_vmalloc_addr(x: obj)) {
6760	obj = (void ) PAGE_ALIGN_DOWN((unsigned* long)obj);
6761	vfree(addr: obj);
6762	return;
6763	}
6764
6765	folio = virt_to_folio(x: obj);
6766	if (!folio_test_slab(folio)) {
6767	/*
6768	* rcu_head offset can be only less than page size so no need to
6769	* consider folio order
6770	*/
6771	obj = (void ) PAGE_ALIGN_DOWN((unsigned* long)obj);
6772	free_large_kmalloc(folio, object: obj);
6773	return;
6774	}
6775
6776	slab = folio_slab(folio);
6777	s = slab->slab_cache;
6778	slab_addr = folio_address(folio);
6779
6780	if (is_kfence_address(addr: obj)) {
6781	obj = kfence_object_start(addr: obj);
6782	} else {
6783	unsigned int idx = __obj_to_index(cache: s, addr: slab_addr, obj);
6784
6785	obj = slab_addr + s->size * idx;
6786	obj = fixup_red_left(s, p: obj);
6787	}
6788
6789	slab_free(s, slab, object: obj, _RET_IP_);
6790	}
6791
6792	/**
6793	* kfree - free previously allocated memory
6794	* @object: pointer returned by kmalloc() or kmem_cache_alloc()
6795	*
6796	* If @object is NULL, no operation is performed.
6797	*/
6798	void kfree(const void *object)
6799	{
6800	struct folio *folio;
6801	struct slab *slab;
6802	struct kmem_cache *s;
6803	void x = (void* *)object;
6804
6805	trace_kfree(_RET_IP_, ptr: object);
6806
6807	if (unlikely(ZERO_OR_NULL_PTR(object)))
6808	return;
6809
6810	folio = virt_to_folio(x: object);
6811	if (unlikely(!folio_test_slab(folio))) {
6812	free_large_kmalloc(folio, object: (void *)object);
6813	return;
6814	}
6815
6816	slab = folio_slab(folio);
6817	s = slab->slab_cache;
6818	slab_free(s, slab, object: x, _RET_IP_);
6819	}
6820	EXPORT_SYMBOL(kfree);
6821
6822	/*
6823	* Can be called while holding raw_spinlock_t or from IRQ and NMI,
6824	* but ONLY for objects allocated by kmalloc_nolock().
6825	* Debug checks (like kmemleak and kfence) were skipped on allocation,
6826	* hence
6827	* obj = kmalloc(); kfree_nolock(obj);
6828	* will miss kmemleak/kfence book keeping and will cause false positives.
6829	* large_kmalloc is not supported either.
6830	*/
6831	void kfree_nolock(const void *object)
6832	{
6833	struct folio *folio;
6834	struct slab *slab;
6835	struct kmem_cache *s;
6836	void x = (void* *)object;
6837
6838	if (unlikely(ZERO_OR_NULL_PTR(object)))
6839	return;
6840
6841	folio = virt_to_folio(x: object);
6842	if (unlikely(!folio_test_slab(folio))) {
6843	WARN_ONCE(`1`, "large_kmalloc is not supported by kfree_nolock()");
6844	return;
6845	}
6846
6847	slab = folio_slab(folio);
6848	s = slab->slab_cache;
6849
6850	memcg_slab_free_hook(s, slab, p: &x, objects: `1`);
6851	alloc_tagging_slab_free_hook(s, slab, p: &x, objects: `1`);
6852	/*
6853	* Unlike slab_free() do NOT call the following:
6854	* kmemleak_free_recursive(x, s->flags);
6855	* debug_check_no_locks_freed(x, s->object_size);
6856	* debug_check_no_obj_freed(x, s->object_size);
6857	* __kcsan_check_access(x, s->object_size, ..);
6858	* kfence_free(x);
6859	* since they take spinlocks or not safe from any context.
6860	*/
6861	kmsan_slab_free(s, object: x);
6862	/*
6863	* If KASAN finds a kernel bug it will do kasan_report_invalid_free()
6864	* which will call raw_spin_lock_irqsave() which is technically
6865	* unsafe from NMI, but take chance and report kernel bug.
6866	* The sequence of
6867	* kasan_report_invalid_free() -> raw_spin_lock_irqsave() -> NMI
6868	* -> kfree_nolock() -> kasan_report_invalid_free() on the same CPU
6869	* is double buggy and deserves to deadlock.
6870	*/
6871	if (kasan_slab_pre_free(s, object: x))
6872	return;
6873	/*
6874	* memcg, kasan_slab_pre_free are done for 'x'.
6875	* The only thing left is kasan_poison without quarantine,
6876	* since kasan quarantine takes locks and not supported from NMI.
6877	*/
6878	kasan_slab_free(s, object: x, init: false, still_accessible: false, / skip quarantine /no_quarantine: true);
6879	#ifndef CONFIG_SLUB_TINY
6880	do_slab_free(s, slab, head: x, tail: x, cnt: `0`, _RET_IP_);
6881	#else
6882	defer_free(s, x);
6883	#endif
6884	}
6885	EXPORT_SYMBOL_GPL(kfree_nolock);
6886
6887	static __always_inline __realloc_size(`2`) void *
6888	__do_krealloc(const void p, size_t new_size, unsigned* long align, gfp_t flags, int nid)
6889	{
6890	void *ret;
6891	size_t ks = `0`;
6892	int orig_size = `0`;
6893	struct kmem_cache *s = NULL;
6894
6895	if (unlikely(ZERO_OR_NULL_PTR(p)))
6896	goto alloc_new;
6897
6898	/ Check for double-free. /
6899	if (!kasan_check_byte(address: p))
6900	return NULL;
6901
6902	/*
6903	* If reallocation is not necessary (e. g. the new size is less
6904	* than the current allocated size), the current allocation will be
6905	* preserved unless __GFP_THISNODE is set. In the latter case a new
6906	* allocation on the requested node will be attempted.
6907	*/
6908	if (unlikely(flags & __GFP_THISNODE) && nid != NUMA_NO_NODE &&
6909	nid != page_to_nid(virt_to_page(p)))
6910	goto alloc_new;
6911
6912	if (is_kfence_address(addr: p)) {
6913	ks = orig_size = kfence_ksize(addr: p);
6914	} else {
6915	struct folio *folio;
6916
6917	folio = virt_to_folio(x: p);
6918	if (unlikely(!folio_test_slab(folio))) {
6919	/ Big kmalloc object /
6920	WARN_ON(folio_size(folio) <= KMALLOC_MAX_CACHE_SIZE);
6921	WARN_ON(p != folio_address(folio));
6922	ks = folio_size(folio);
6923	} else {
6924	s = folio_slab(folio)->slab_cache;
6925	orig_size = get_orig_size(s, object: (void *)p);
6926	ks = s->object_size;
6927	}
6928	}
6929
6930	/ If the old object doesn't fit, allocate a bigger one /
6931	if (new_size > ks)
6932	goto alloc_new;
6933
6934	/ If the old object doesn't satisfy the new alignment, allocate a new one /
6935	if (!IS_ALIGNED((unsigned long)p, align))
6936	goto alloc_new;
6937
6938	/ Zero out spare memory. /
6939	if (want_init_on_alloc(flags)) {
6940	kasan_disable_current();
6941	if (orig_size && orig_size < new_size)
6942	memset(s: kasan_reset_tag(addr: p) + orig_size, c: `0`, n: new_size - orig_size);
6943	else
6944	memset(s: kasan_reset_tag(addr: p) + new_size, c: `0`, n: ks - new_size);
6945	kasan_enable_current();
6946	}
6947
6948	/ Setup kmalloc redzone when needed /
6949	if (s && slub_debug_orig_size(s)) {
6950	set_orig_size(s, object: (void *)p, orig_size: new_size);
6951	if (s->flags & SLAB_RED_ZONE && new_size < ks)
6952	memset_no_sanitize_memory(s: kasan_reset_tag(addr: p) + new_size,
6953	SLUB_RED_ACTIVE, n: ks - new_size);
6954	}
6955
6956	p = kasan_krealloc(object: p, new_size, flags);
6957	return (void *)p;
6958
6959	alloc_new:
6960	ret = kmalloc_node_track_caller_noprof(new_size, flags, nid, _RET_IP_);
6961	if (ret && p) {
6962	/ Disable KASAN checks as the object's redzone is accessed. /
6963	kasan_disable_current();
6964	memcpy(to: ret, from: kasan_reset_tag(addr: p), len: orig_size ?: ks);
6965	kasan_enable_current();
6966	}
6967
6968	return ret;
6969	}
6970
6971	/**
6972	* krealloc_node_align - reallocate memory. The contents will remain unchanged.
6973	* @p: object to reallocate memory for.
6974	* @new_size: how many bytes of memory are required.
6975	* @align: desired alignment.
6976	* @flags: the type of memory to allocate.
6977	* @nid: NUMA node or NUMA_NO_NODE
6978	*
6979	* If @p is %NULL, krealloc() behaves exactly like kmalloc(). If @new_size
6980	* is 0 and @p is not a %NULL pointer, the object pointed to is freed.
6981	*
6982	* Only alignments up to those guaranteed by kmalloc() will be honored. Please see
6983	* Documentation/core-api/memory-allocation.rst for more details.
6984	*
6985	* If __GFP_ZERO logic is requested, callers must ensure that, starting with the
6986	* initial memory allocation, every subsequent call to this API for the same
6987	* memory allocation is flagged with __GFP_ZERO. Otherwise, it is possible that
6988	* __GFP_ZERO is not fully honored by this API.
6989	*
6990	* When slub_debug_orig_size() is off, krealloc() only knows about the bucket
6991	* size of an allocation (but not the exact size it was allocated with) and
6992	* hence implements the following semantics for shrinking and growing buffers
6993	* with __GFP_ZERO::
6994	*
6995	* new bucket
6996	* 0 size size
6997	* \|--------\|----------------\|
6998	* \| keep \| zero \|
6999	*
7000	* Otherwise, the original allocation size 'orig_size' could be used to
7001	* precisely clear the requested size, and the new size will also be stored
7002	* as the new 'orig_size'.
7003	*
7004	* In any case, the contents of the object pointed to are preserved up to the
7005	* lesser of the new and old sizes.
7006	*
7007	* Return: pointer to the allocated memory or %NULL in case of error
7008	*/
7009	void krealloc_node_align_noprof(const* void p, size_t new_size, unsigned* long align,
7010	gfp_t flags, int nid)
7011	{
7012	void *ret;
7013
7014	if (unlikely(!new_size)) {
7015	kfree(p);
7016	return ZERO_SIZE_PTR;
7017	}
7018
7019	ret = __do_krealloc(p, new_size, align, flags, nid);
7020	if (ret && kasan_reset_tag(addr: p) != kasan_reset_tag(addr: ret))
7021	kfree(p);
7022
7023	return ret;
7024	}
7025	EXPORT_SYMBOL(krealloc_node_align_noprof);
7026
7027	static gfp_t kmalloc_gfp_adjust(gfp_t flags, size_t size)
7028	{
7029	/*
7030	* We want to attempt a large physically contiguous block first because
7031	* it is less likely to fragment multiple larger blocks and therefore
7032	* contribute to a long term fragmentation less than vmalloc fallback.
7033	* However make sure that larger requests are not too disruptive - i.e.
7034	* do not direct reclaim unless physically continuous memory is preferred
7035	* (__GFP_RETRY_MAYFAIL mode). We still kick in kswapd/kcompactd to
7036	* start working in the background
7037	*/
7038	if (size > PAGE_SIZE) {
7039	flags \|= __GFP_NOWARN;
7040
7041	if (!(flags & __GFP_RETRY_MAYFAIL))
7042	flags &= ~__GFP_DIRECT_RECLAIM;
7043
7044	/ nofail semantic is implemented by the vmalloc fallback /
7045	flags &= ~__GFP_NOFAIL;
7046	}
7047
7048	return flags;
7049	}
7050
7051	/**
7052	* __kvmalloc_node - attempt to allocate physically contiguous memory, but upon
7053	* failure, fall back to non-contiguous (vmalloc) allocation.
7054	* @size: size of the request.
7055	* @b: which set of kmalloc buckets to allocate from.
7056	* @align: desired alignment.
7057	* @flags: gfp mask for the allocation - must be compatible (superset) with GFP_KERNEL.
7058	* @node: numa node to allocate from
7059	*
7060	* Only alignments up to those guaranteed by kmalloc() will be honored. Please see
7061	* Documentation/core-api/memory-allocation.rst for more details.
7062	*
7063	* Uses kmalloc to get the memory but if the allocation fails then falls back
7064	* to the vmalloc allocator. Use kvfree for freeing the memory.
7065	*
7066	* GFP_NOWAIT and GFP_ATOMIC are not supported, neither is the __GFP_NORETRY modifier.
7067	* __GFP_RETRY_MAYFAIL is supported, and it should be used only if kmalloc is
7068	* preferable to the vmalloc fallback, due to visible performance drawbacks.
7069	*
7070	* Return: pointer to the allocated memory of %NULL in case of failure
7071	*/
7072	void __kvmalloc_node_noprof(DECL_BUCKET_PARAMS(size, b), unsigned* long align,
7073	gfp_t flags, int node)
7074	{
7075	void *ret;
7076
7077	/*
7078	* It doesn't really make sense to fallback to vmalloc for sub page
7079	* requests
7080	*/
7081	ret = __do_kmalloc_node(size, PASS_BUCKET_PARAM(b),
7082	flags: kmalloc_gfp_adjust(flags, size),
7083	node, _RET_IP_);
7084	if (ret \|\| size <= PAGE_SIZE)
7085	return ret;
7086
7087	/ non-sleeping allocations are not supported by vmalloc /
7088	if (!gfpflags_allow_blocking(gfp_flags: flags))
7089	return NULL;
7090
7091	/ Don't even allow crazy sizes /
7092	if (unlikely(size > INT_MAX)) {
7093	WARN_ON_ONCE(!(flags & __GFP_NOWARN));
7094	return NULL;
7095	}
7096
7097	/*
7098	* kvmalloc() can always use VM_ALLOW_HUGE_VMAP,
7099	* since the callers already cannot assume anything
7100	* about the resulting pointer, and cannot play
7101	* protection games.
7102	*/
7103	return __vmalloc_node_range_noprof(size, align, VMALLOC_START, VMALLOC_END,
7104	gfp_mask: flags, PAGE_KERNEL, VM_ALLOW_HUGE_VMAP,
7105	node, caller: __builtin_return_address(`0`));
7106	}
7107	EXPORT_SYMBOL(__kvmalloc_node_noprof);
7108
7109	/**
7110	* kvfree() - Free memory.
7111	* @addr: Pointer to allocated memory.
7112	*
7113	* kvfree frees memory allocated by any of vmalloc(), kmalloc() or kvmalloc().
7114	* It is slightly more efficient to use kfree() or vfree() if you are certain
7115	* that you know which one to use.
7116	*
7117	* Context: Either preemptible task context or not-NMI interrupt.
7118	*/
7119	void kvfree(const void *addr)
7120	{
7121	if (is_vmalloc_addr(x: addr))
7122	vfree(addr);
7123	else
7124	kfree(addr);
7125	}
7126	EXPORT_SYMBOL(kvfree);
7127
7128	/**
7129	* kvfree_sensitive - Free a data object containing sensitive information.
7130	* @addr: address of the data object to be freed.
7131	* @len: length of the data object.
7132	*
7133	* Use the special memzero_explicit() function to clear the content of a
7134	* kvmalloc'ed object containing sensitive data to make sure that the
7135	* compiler won't optimize out the data clearing.
7136	*/
7137	void kvfree_sensitive(const void *addr, size_t len)
7138	{
7139	if (likely(!ZERO_OR_NULL_PTR(addr))) {
7140	memzero_explicit(s: (void *)addr, count: len);
7141	kvfree(addr);
7142	}
7143	}
7144	EXPORT_SYMBOL(kvfree_sensitive);
7145
7146	/**
7147	* kvrealloc_node_align - reallocate memory; contents remain unchanged
7148	* @p: object to reallocate memory for
7149	* @size: the size to reallocate
7150	* @align: desired alignment
7151	* @flags: the flags for the page level allocator
7152	* @nid: NUMA node id
7153	*
7154	* If @p is %NULL, kvrealloc() behaves exactly like kvmalloc(). If @size is 0
7155	* and @p is not a %NULL pointer, the object pointed to is freed.
7156	*
7157	* Only alignments up to those guaranteed by kmalloc() will be honored. Please see
7158	* Documentation/core-api/memory-allocation.rst for more details.
7159	*
7160	* If __GFP_ZERO logic is requested, callers must ensure that, starting with the
7161	* initial memory allocation, every subsequent call to this API for the same
7162	* memory allocation is flagged with __GFP_ZERO. Otherwise, it is possible that
7163	* __GFP_ZERO is not fully honored by this API.
7164	*
7165	* In any case, the contents of the object pointed to are preserved up to the
7166	* lesser of the new and old sizes.
7167	*
7168	* This function must not be called concurrently with itself or kvfree() for the
7169	* same memory allocation.
7170	*
7171	* Return: pointer to the allocated memory or %NULL in case of error
7172	*/
7173	void kvrealloc_node_align_noprof(const* void p, size_t size, unsigned* long align,
7174	gfp_t flags, int nid)
7175	{
7176	void *n;
7177
7178	if (is_vmalloc_addr(x: p))
7179	return vrealloc_node_align_noprof(p, size, align, flags, nid);
7180
7181	n = krealloc_node_align_noprof(p, size, align, kmalloc_gfp_adjust(flags, size), nid);
7182	if (!n) {
7183	/ We failed to krealloc(), fall back to kvmalloc(). /
7184	n = kvmalloc_node_align_noprof(size, align, flags, nid);
7185	if (!n)
7186	return NULL;
7187
7188	if (p) {
7189	/ We already know that `p` is not a vmalloc address. /
7190	kasan_disable_current();
7191	memcpy(to: n, from: kasan_reset_tag(addr: p), len: ksize(objp: p));
7192	kasan_enable_current();
7193
7194	kfree(p);
7195	}
7196	}
7197
7198	return n;
7199	}
7200	EXPORT_SYMBOL(kvrealloc_node_align_noprof);
7201
7202	struct detached_freelist {
7203	struct slab *slab;
7204	void *tail;
7205	void *freelist;
7206	int cnt;
7207	struct kmem_cache *s;
7208	};
7209
7210	/*
7211	* This function progressively scans the array with free objects (with
7212	* a limited look ahead) and extract objects belonging to the same
7213	* slab. It builds a detached freelist directly within the given
7214	* slab/objects. This can happen without any need for
7215	* synchronization, because the objects are owned by running process.
7216	* The freelist is build up as a single linked list in the objects.
7217	* The idea is, that this detached freelist can then be bulk
7218	* transferred to the real freelist(s), but only requiring a single
7219	* synchronization primitive. Look ahead in the array is limited due
7220	* to performance reasons.
7221	*/
7222	static inline
7223	int build_detached_freelist(struct kmem_cache *s, size_t size,
7224	void p, struct** detached_freelist *df)
7225	{
7226	int lookahead = `3`;
7227	void *object;
7228	struct folio *folio;
7229	size_t same;
7230
7231	object = p[--size];
7232	folio = virt_to_folio(x: object);
7233	if (!s) {
7234	/ Handle kalloc'ed objects /
7235	if (unlikely(!folio_test_slab(folio))) {
7236	free_large_kmalloc(folio, object);
7237	df->slab = NULL;
7238	return size;
7239	}
7240	/ Derive kmem_cache from object /
7241	df->slab = folio_slab(folio);
7242	df->s = df->slab->slab_cache;
7243	} else {
7244	df->slab = folio_slab(folio);
7245	df->s = cache_from_obj(s, x: object); / Support for memcg /
7246	}
7247
7248	/ Start new detached freelist /
7249	df->tail = object;
7250	df->freelist = object;
7251	df->cnt = `1`;
7252
7253	if (is_kfence_address(addr: object))
7254	return size;
7255
7256	set_freepointer(s: df->s, object, NULL);
7257
7258	same = size;
7259	while (size) {
7260	object = p[--size];
7261	/ df->slab is always set at this point /
7262	if (df->slab == virt_to_slab(addr: object)) {
7263	/ Opportunity build freelist /
7264	set_freepointer(s: df->s, object, fp: df->freelist);
7265	df->freelist = object;
7266	df->cnt++;
7267	same--;
7268	if (size != same)
7269	swap(p[size], p[same]);
7270	continue;
7271	}
7272
7273	/ Limit look ahead search /
7274	if (!--lookahead)
7275	break;
7276	}
7277
7278	return same;
7279	}
7280
7281	/*
7282	* Internal bulk free of objects that were not initialised by the post alloc
7283	* hooks and thus should not be processed by the free hooks
7284	*/
7285	static void __kmem_cache_free_bulk(struct kmem_cache s, size_t size, void* **p)
7286	{
7287	if (!size)
7288	return;
7289
7290	do {
7291	struct detached_freelist df;
7292
7293	size = build_detached_freelist(s, size, p, df: &df);
7294	if (!df.slab)
7295	continue;
7296
7297	if (kfence_free(addr: df.freelist))
7298	continue;
7299
7300	do_slab_free(s: df.s, slab: df.slab, head: df.freelist, tail: df.tail, cnt: df.cnt,
7301	_RET_IP_);
7302	} while (likely(size));
7303	}
7304
7305	/ Note that interrupts must be enabled when calling this function. /
7306	void kmem_cache_free_bulk(struct kmem_cache s, size_t size, void* **p)
7307	{
7308	if (!size)
7309	return;
7310
7311	/*
7312	* freeing to sheaves is so incompatible with the detached freelist so
7313	* once we go that way, we have to do everything differently
7314	*/
7315	if (s && s->cpu_sheaves) {
7316	free_to_pcs_bulk(s, size, p);
7317	return;
7318	}
7319
7320	do {
7321	struct detached_freelist df;
7322
7323	size = build_detached_freelist(s, size, p, df: &df);
7324	if (!df.slab)
7325	continue;
7326
7327	slab_free_bulk(s: df.s, slab: df.slab, head: df.freelist, tail: df.tail, p: &p[size],
7328	cnt: df.cnt, _RET_IP_);
7329	} while (likely(size));
7330	}
7331	EXPORT_SYMBOL(kmem_cache_free_bulk);
7332
7333	#ifndef CONFIG_SLUB_TINY
7334	static inline
7335	int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
7336	void **p)
7337	{
7338	struct kmem_cache_cpu *c;
7339	unsigned long irqflags;
7340	int i;
7341
7342	/*
7343	* Drain objects in the per cpu slab, while disabling local
7344	* IRQs, which protects against PREEMPT and interrupts
7345	* handlers invoking normal fastpath.
7346	*/
7347	c = slub_get_cpu_ptr(s->cpu_slab);
7348	local_lock_irqsave(&s->cpu_slab->lock, irqflags);
7349
7350	for (i = `0`; i < size; i++) {
7351	void *object = kfence_alloc(s, size: s->object_size, flags);
7352
7353	if (unlikely(object)) {
7354	p[i] = object;
7355	continue;
7356	}
7357
7358	object = c->freelist;
7359	if (unlikely(!object)) {
7360	/*
7361	* We may have removed an object from c->freelist using
7362	* the fastpath in the previous iteration; in that case,
7363	* c->tid has not been bumped yet.
7364	* Since ___slab_alloc() may reenable interrupts while
7365	* allocating memory, we should bump c->tid now.
7366	*/
7367	c->tid = next_tid(tid: c->tid);
7368
7369	local_unlock_irqrestore(&s->cpu_slab->lock, irqflags);
7370
7371	/*
7372	* Invoking slow path likely have side-effect
7373	* of re-populating per CPU c->freelist
7374	*/
7375	p[i] = ___slab_alloc(s, gfpflags: flags, NUMA_NO_NODE,
7376	_RET_IP_, c, orig_size: s->object_size);
7377	if (unlikely(!p[i]))
7378	goto error;
7379
7380	c = this_cpu_ptr(s->cpu_slab);
7381	maybe_wipe_obj_freeptr(s, obj: p[i]);
7382
7383	local_lock_irqsave(&s->cpu_slab->lock, irqflags);
7384
7385	continue; / goto for-loop /
7386	}
7387	c->freelist = get_freepointer(s, object);
7388	p[i] = object;
7389	maybe_wipe_obj_freeptr(s, obj: p[i]);
7390	stat(s, si: ALLOC_FASTPATH);
7391	}
7392	c->tid = next_tid(tid: c->tid);
7393	local_unlock_irqrestore(&s->cpu_slab->lock, irqflags);
7394	slub_put_cpu_ptr(s->cpu_slab);
7395
7396	return i;
7397
7398	error:
7399	slub_put_cpu_ptr(s->cpu_slab);
7400	__kmem_cache_free_bulk(s, size: i, p);
7401	return `0`;
7402
7403	}
7404	#else /* CONFIG_SLUB_TINY */
7405	static int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags,
7406	size_t size, void **p)
7407	{
7408	int i;
7409
7410	for (i = `0`; i < size; i++) {
7411	void *object = kfence_alloc(s, s->object_size, flags);
7412
7413	if (unlikely(object)) {
7414	p[i] = object;
7415	continue;
7416	}
7417
7418	p[i] = __slab_alloc_node(s, flags, NUMA_NO_NODE,
7419	_RET_IP_, s->object_size);
7420	if (unlikely(!p[i]))
7421	goto error;
7422
7423	maybe_wipe_obj_freeptr(s, p[i]);
7424	}
7425
7426	return i;
7427
7428	error:
7429	__kmem_cache_free_bulk(s, i, p);
7430	return `0`;
7431	}
7432	#endif /* CONFIG_SLUB_TINY */
7433
7434	/ Note that interrupts must be enabled when calling this function. /
7435	int kmem_cache_alloc_bulk_noprof(struct kmem_cache *s, gfp_t flags, size_t size,
7436	void **p)
7437	{
7438	unsigned int i = `0`;
7439
7440	if (!size)
7441	return `0`;
7442
7443	s = slab_pre_alloc_hook(s, flags);
7444	if (unlikely(!s))
7445	return `0`;
7446
7447	if (s->cpu_sheaves)
7448	i = alloc_from_pcs_bulk(s, size, p);
7449
7450	if (i < size) {
7451	/*
7452	* If we ran out of memory, don't bother with freeing back to
7453	* the percpu sheaves, we have bigger problems.
7454	*/
7455	if (unlikely(__kmem_cache_alloc_bulk(s, flags, size - i, p + i) == `0`)) {
7456	if (i > `0`)
7457	__kmem_cache_free_bulk(s, size: i, p);
7458	return `0`;
7459	}
7460	}
7461
7462	/*
7463	* memcg and kmem_cache debug support and memory initialization.
7464	* Done outside of the IRQ disabled fastpath loop.
7465	*/
7466	if (unlikely(!slab_post_alloc_hook(s, NULL, flags, size, p,
7467	slab_want_init_on_alloc(flags, s), s->object_size))) {
7468	return `0`;
7469	}
7470
7471	return size;
7472	}
7473	EXPORT_SYMBOL(kmem_cache_alloc_bulk_noprof);
7474
7475	/*
7476	* Object placement in a slab is made very easy because we always start at
7477	* offset 0. If we tune the size of the object to the alignment then we can
7478	* get the required alignment by putting one properly sized object after
7479	* another.
7480	*
7481	* Notice that the allocation order determines the sizes of the per cpu
7482	* caches. Each processor has always one slab available for allocations.
7483	* Increasing the allocation order reduces the number of times that slabs
7484	* must be moved on and off the partial lists and is therefore a factor in
7485	* locking overhead.
7486	*/
7487
7488	/*
7489	* Minimum / Maximum order of slab pages. This influences locking overhead
7490	* and slab fragmentation. A higher order reduces the number of partial slabs
7491	* and increases the number of allocations possible without having to
7492	* take the list_lock.
7493	*/
7494	static unsigned int slub_min_order;
7495	static unsigned int slub_max_order =
7496	IS_ENABLED(CONFIG_SLUB_TINY) ? `1` : PAGE_ALLOC_COSTLY_ORDER;
7497	static unsigned int slub_min_objects;
7498
7499	/*
7500	* Calculate the order of allocation given an slab object size.
7501	*
7502	* The order of allocation has significant impact on performance and other
7503	* system components. Generally order 0 allocations should be preferred since
7504	* order 0 does not cause fragmentation in the page allocator. Larger objects
7505	* be problematic to put into order 0 slabs because there may be too much
7506	* unused space left. We go to a higher order if more than 1/16th of the slab
7507	* would be wasted.
7508	*
7509	* In order to reach satisfactory performance we must ensure that a minimum
7510	* number of objects is in one slab. Otherwise we may generate too much
7511	* activity on the partial lists which requires taking the list_lock. This is
7512	* less a concern for large slabs though which are rarely used.
7513	*
7514	* slab_max_order specifies the order where we begin to stop considering the
7515	* number of objects in a slab as critical. If we reach slab_max_order then
7516	* we try to keep the page order as low as possible. So we accept more waste
7517	* of space in favor of a small page order.
7518	*
7519	* Higher order allocations also allow the placement of more objects in a
7520	* slab and thereby reduce object handling overhead. If the user has
7521	* requested a higher minimum order then we start with that one instead of
7522	* the smallest order which will fit the object.
7523	*/
7524	static inline unsigned int calc_slab_order(unsigned int size,
7525	unsigned int min_order, unsigned int max_order,
7526	unsigned int fract_leftover)
7527	{
7528	unsigned int order;
7529
7530	for (order = min_order; order <= max_order; order++) {
7531
7532	unsigned int slab_size = (unsigned int)PAGE_SIZE << order;
7533	unsigned int rem;
7534
7535	rem = slab_size % size;
7536
7537	if (rem <= slab_size / fract_leftover)
7538	break;
7539	}
7540
7541	return order;
7542	}
7543
7544	static inline int calculate_order(unsigned int size)
7545	{
7546	unsigned int order;
7547	unsigned int min_objects;
7548	unsigned int max_objects;
7549	unsigned int min_order;
7550
7551	min_objects = slub_min_objects;
7552	if (!min_objects) {
7553	/*
7554	* Some architectures will only update present cpus when
7555	* onlining them, so don't trust the number if it's just 1. But
7556	* we also don't want to use nr_cpu_ids always, as on some other
7557	* architectures, there can be many possible cpus, but never
7558	* onlined. Here we compromise between trying to avoid too high
7559	* order on systems that appear larger than they are, and too
7560	* low order on systems that appear smaller than they are.
7561	*/
7562	unsigned int nr_cpus = num_present_cpus();
7563	if (nr_cpus <= `1`)
7564	nr_cpus = nr_cpu_ids;
7565	min_objects = `4` * (fls(x: nr_cpus) + `1`);
7566	}
7567	/ min_objects can't be 0 because get_order(0) is undefined /
7568	max_objects = max(order_objects(slub_max_order, size), `1U`);
7569	min_objects = min(min_objects, max_objects);
7570
7571	min_order = max_t(unsigned int, slub_min_order,
7572	get_order(min_objects * size));
7573	if (order_objects(order: min_order, size) > MAX_OBJS_PER_PAGE)
7574	return get_order(size: size * MAX_OBJS_PER_PAGE) - `1`;
7575
7576	/*
7577	* Attempt to find best configuration for a slab. This works by first
7578	* attempting to generate a layout with the best possible configuration
7579	* and backing off gradually.
7580	*
7581	* We start with accepting at most 1/16 waste and try to find the
7582	* smallest order from min_objects-derived/slab_min_order up to
7583	* slab_max_order that will satisfy the constraint. Note that increasing
7584	* the order can only result in same or less fractional waste, not more.
7585	*
7586	* If that fails, we increase the acceptable fraction of waste and try
7587	* again. The last iteration with fraction of 1/2 would effectively
7588	* accept any waste and give us the order determined by min_objects, as
7589	* long as at least single object fits within slab_max_order.
7590	*/
7591	for (unsigned int fraction = `16`; fraction > `1`; fraction /= `2`) {
7592	order = calc_slab_order(size, min_order, max_order: slub_max_order,
7593	fract_leftover: fraction);
7594	if (order <= slub_max_order)
7595	return order;
7596	}
7597
7598	/*
7599	* Doh this slab cannot be placed using slab_max_order.
7600	*/
7601	order = get_order(size);
7602	if (order <= MAX_PAGE_ORDER)
7603	return order;
7604	return -ENOSYS;
7605	}
7606
7607	static void
7608	init_kmem_cache_node(struct kmem_cache_node n, struct* node_barn *barn)
7609	{
7610	n->nr_partial = `0`;
7611	spin_lock_init(&n->list_lock);
7612	INIT_LIST_HEAD(list: &n->partial);
7613	#ifdef CONFIG_SLUB_DEBUG
7614	atomic_long_set(v: &n->nr_slabs, i: `0`);
7615	atomic_long_set(v: &n->total_objects, i: `0`);
7616	INIT_LIST_HEAD(list: &n->full);
7617	#endif
7618	n->barn = barn;
7619	if (barn)
7620	barn_init(barn);
7621	}
7622
7623	#ifndef CONFIG_SLUB_TINY
7624	static inline int alloc_kmem_cache_cpus(struct kmem_cache *s)
7625	{
7626	BUILD_BUG_ON(PERCPU_DYNAMIC_EARLY_SIZE <
7627	NR_KMALLOC_TYPES * KMALLOC_SHIFT_HIGH *
7628	sizeof(struct kmem_cache_cpu));
7629
7630	/*
7631	* Must align to double word boundary for the double cmpxchg
7632	* instructions to work; see __pcpu_double_call_return_bool().
7633	*/
7634	s->cpu_slab = __alloc_percpu(sizeof(struct kmem_cache_cpu),
7635	`2` * sizeof(void *));
7636
7637	if (!s->cpu_slab)
7638	return `0`;
7639
7640	init_kmem_cache_cpus(s);
7641
7642	return `1`;
7643	}
7644	#else
7645	static inline int alloc_kmem_cache_cpus(struct kmem_cache *s)
7646	{
7647	return `1`;
7648	}
7649	#endif /* CONFIG_SLUB_TINY */
7650
7651	static int init_percpu_sheaves(struct kmem_cache *s)
7652	{
7653	int cpu;
7654
7655	for_each_possible_cpu(cpu) {
7656	struct slub_percpu_sheaves *pcs;
7657
7658	pcs = per_cpu_ptr(s->cpu_sheaves, cpu);
7659
7660	local_trylock_init(&pcs->lock);
7661
7662	pcs->main = alloc_empty_sheaf(s, GFP_KERNEL);
7663
7664	if (!pcs->main)
7665	return -ENOMEM;
7666	}
7667
7668	return `0`;
7669	}
7670
7671	static struct kmem_cache *kmem_cache_node;
7672
7673	/*
7674	* No kmalloc_node yet so do it by hand. We know that this is the first
7675	* slab on the node for this slabcache. There are no concurrent accesses
7676	* possible.
7677	*
7678	* Note that this function only works on the kmem_cache_node
7679	* when allocating for the kmem_cache_node. This is used for bootstrapping
7680	* memory on a fresh node that has no slab structures yet.
7681	*/
7682	static void early_kmem_cache_node_alloc(int node)
7683	{
7684	struct slab *slab;
7685	struct kmem_cache_node *n;
7686
7687	BUG_ON(kmem_cache_node->size < sizeof(struct kmem_cache_node));
7688
7689	slab = new_slab(s: kmem_cache_node, GFP_NOWAIT, node);
7690
7691	BUG_ON(!slab);
7692	if (slab_nid(slab) != node) {
7693	pr_err("SLUB: Unable to allocate memory from node %d\n", node);
7694	pr_err("SLUB: Allocating a useless per node structure in order to be able to continue\n");
7695	}
7696
7697	n = slab->freelist;
7698	BUG_ON(!n);
7699	#ifdef CONFIG_SLUB_DEBUG
7700	init_object(s: kmem_cache_node, object: n, SLUB_RED_ACTIVE);
7701	#endif
7702	n = kasan_slab_alloc(s: kmem_cache_node, object: n, GFP_KERNEL, init: false);
7703	slab->freelist = get_freepointer(s: kmem_cache_node, object: n);
7704	slab->inuse = `1`;
7705	kmem_cache_node->node[node] = n;
7706	init_kmem_cache_node(n, NULL);
7707	inc_slabs_node(s: kmem_cache_node, node, objects: slab->objects);
7708
7709	/*
7710	* No locks need to be taken here as it has just been
7711	* initialized and there is no concurrent access.
7712	*/
7713	__add_partial(n, slab, tail: DEACTIVATE_TO_HEAD);
7714	}
7715
7716	static void free_kmem_cache_nodes(struct kmem_cache *s)
7717	{
7718	int node;
7719	struct kmem_cache_node *n;
7720
7721	for_each_kmem_cache_node(s, node, n) {
7722	if (n->barn) {
7723	WARN_ON(n->barn->nr_full);
7724	WARN_ON(n->barn->nr_empty);
7725	kfree(n->barn);
7726	n->barn = NULL;
7727	}
7728
7729	s->node[node] = NULL;
7730	kmem_cache_free(kmem_cache_node, n);
7731	}
7732	}
7733
7734	void __kmem_cache_release(struct kmem_cache *s)
7735	{
7736	cache_random_seq_destroy(cachep: s);
7737	if (s->cpu_sheaves)
7738	pcs_destroy(s);
7739	#ifndef CONFIG_SLUB_TINY
7740	#ifdef CONFIG_PREEMPT_RT
7741	if (s->cpu_slab)
7742	lockdep_unregister_key(&s->lock_key);
7743	#endif
7744	free_percpu(pdata: s->cpu_slab);
7745	#endif
7746	free_kmem_cache_nodes(s);
7747	}
7748
7749	static int init_kmem_cache_nodes(struct kmem_cache *s)
7750	{
7751	int node;
7752
7753	for_each_node_mask(node, slab_nodes) {
7754	struct kmem_cache_node *n;
7755	struct node_barn *barn = NULL;
7756
7757	if (slab_state == DOWN) {
7758	early_kmem_cache_node_alloc(node);
7759	continue;
7760	}
7761
7762	if (s->cpu_sheaves) {
7763	barn = kmalloc_node(sizeof(*barn), GFP_KERNEL, node);
7764
7765	if (!barn)
7766	return `0`;
7767	}
7768
7769	n = kmem_cache_alloc_node(kmem_cache_node,
7770	GFP_KERNEL, node);
7771	if (!n) {
7772	kfree(barn);
7773	return `0`;
7774	}
7775
7776	init_kmem_cache_node(n, barn);
7777
7778	s->node[node] = n;
7779	}
7780	return `1`;
7781	}
7782
7783	static void set_cpu_partial(struct kmem_cache *s)
7784	{
7785	#ifdef CONFIG_SLUB_CPU_PARTIAL
7786	unsigned int nr_objects;
7787
7788	/*
7789	* cpu_partial determined the maximum number of objects kept in the
7790	* per cpu partial lists of a processor.
7791	*
7792	* Per cpu partial lists mainly contain slabs that just have one
7793	* object freed. If they are used for allocation then they can be
7794	* filled up again with minimal effort. The slab will never hit the
7795	* per node partial lists and therefore no locking will be required.
7796	*
7797	* For backwards compatibility reasons, this is determined as number
7798	* of objects, even though we now limit maximum number of pages, see
7799	* slub_set_cpu_partial()
7800	*/
7801	if (!kmem_cache_has_cpu_partial(s))
7802	nr_objects = `0`;
7803	else if (s->size >= PAGE_SIZE)
7804	nr_objects = `6`;
7805	else if (s->size >= `1024`)
7806	nr_objects = `24`;
7807	else if (s->size >= `256`)
7808	nr_objects = `52`;
7809	else
7810	nr_objects = `120`;
7811
7812	slub_set_cpu_partial(s, nr_objects);
7813	#endif
7814	}
7815
7816	/*
7817	* calculate_sizes() determines the order and the distribution of data within
7818	* a slab object.
7819	*/
7820	static int calculate_sizes(struct kmem_cache_args args, struct* kmem_cache *s)
7821	{
7822	slab_flags_t flags = s->flags;
7823	unsigned int size = s->object_size;
7824	unsigned int order;
7825
7826	/*
7827	* Round up object size to the next word boundary. We can only
7828	* place the free pointer at word boundaries and this determines
7829	* the possible location of the free pointer.
7830	*/
7831	size = ALIGN(size, sizeof(void *));
7832
7833	#ifdef CONFIG_SLUB_DEBUG
7834	/*
7835	* Determine if we can poison the object itself. If the user of
7836	* the slab may touch the object after free or before allocation
7837	* then we should never poison the object itself.
7838	*/
7839	if ((flags & SLAB_POISON) && !(flags & SLAB_TYPESAFE_BY_RCU) &&
7840	!s->ctor)
7841	s->flags \|= __OBJECT_POISON;
7842	else
7843	s->flags &= ~__OBJECT_POISON;
7844
7845
7846	/*
7847	* If we are Redzoning then check if there is some space between the
7848	* end of the object and the free pointer. If not then add an
7849	* additional word to have some bytes to store Redzone information.
7850	*/
7851	if ((flags & SLAB_RED_ZONE) && size == s->object_size)
7852	size += sizeof(void *);
7853	#endif
7854
7855	/*
7856	* With that we have determined the number of bytes in actual use
7857	* by the object and redzoning.
7858	*/
7859	s->inuse = size;
7860
7861	if (((flags & SLAB_TYPESAFE_BY_RCU) && !args->use_freeptr_offset) \|\|
7862	(flags & SLAB_POISON) \|\| s->ctor \|\|
7863	((flags & SLAB_RED_ZONE) &&
7864	(s->object_size < sizeof(void *) \|\| slub_debug_orig_size(s)))) {
7865	/*
7866	* Relocate free pointer after the object if it is not
7867	* permitted to overwrite the first word of the object on
7868	* kmem_cache_free.
7869	*
7870	* This is the case if we do RCU, have a constructor or
7871	* destructor, are poisoning the objects, or are
7872	* redzoning an object smaller than sizeof(void *) or are
7873	* redzoning an object with slub_debug_orig_size() enabled,
7874	* in which case the right redzone may be extended.
7875	*
7876	* The assumption that s->offset >= s->inuse means free
7877	* pointer is outside of the object is used in the
7878	* freeptr_outside_object() function. If that is no
7879	* longer true, the function needs to be modified.
7880	*/
7881	s->offset = size;
7882	size += sizeof(void *);
7883	} else if ((flags & SLAB_TYPESAFE_BY_RCU) && args->use_freeptr_offset) {
7884	s->offset = args->freeptr_offset;
7885	} else {
7886	/*
7887	* Store freelist pointer near middle of object to keep
7888	* it away from the edges of the object to avoid small
7889	* sized over/underflows from neighboring allocations.
7890	*/
7891	s->offset = ALIGN_DOWN(s->object_size / `2`, sizeof(void *));
7892	}
7893
7894	#ifdef CONFIG_SLUB_DEBUG
7895	if (flags & SLAB_STORE_USER) {
7896	/*
7897	* Need to store information about allocs and frees after
7898	* the object.
7899	*/
7900	size += `2` * sizeof(struct track);
7901
7902	/ Save the original kmalloc request size /
7903	if (flags & SLAB_KMALLOC)
7904	size += sizeof(unsigned int);
7905	}
7906	#endif
7907
7908	kasan_cache_create(cache: s, size: &size, flags: &s->flags);
7909	#ifdef CONFIG_SLUB_DEBUG
7910	if (flags & SLAB_RED_ZONE) {
7911	/*
7912	* Add some empty padding so that we can catch
7913	* overwrites from earlier objects rather than let
7914	* tracking information or the free pointer be
7915	* corrupted if a user writes before the start
7916	* of the object.
7917	*/
7918	size += sizeof(void *);
7919
7920	s->red_left_pad = sizeof(void *);
7921	s->red_left_pad = ALIGN(s->red_left_pad, s->align);
7922	size += s->red_left_pad;
7923	}
7924	#endif
7925
7926	/*
7927	* SLUB stores one object immediately after another beginning from
7928	* offset 0. In order to align the objects we have to simply size
7929	* each object to conform to the alignment.
7930	*/
7931	size = ALIGN(size, s->align);
7932	s->size = size;
7933	s->reciprocal_size = reciprocal_value(d: size);
7934	order = calculate_order(size);
7935
7936	if ((int)order < `0`)
7937	return `0`;
7938
7939	s->allocflags = __GFP_COMP;
7940
7941	if (s->flags & SLAB_CACHE_DMA)
7942	s->allocflags \|= GFP_DMA;
7943
7944	if (s->flags & SLAB_CACHE_DMA32)
7945	s->allocflags \|= GFP_DMA32;
7946
7947	if (s->flags & SLAB_RECLAIM_ACCOUNT)
7948	s->allocflags \|= __GFP_RECLAIMABLE;
7949
7950	/*
7951	* Determine the number of objects per slab
7952	*/
7953	s->oo = oo_make(order, size);
7954	s->min = oo_make(order: get_order(size), size);
7955
7956	return !!oo_objects(x: s->oo);
7957	}
7958
7959	static void list_slab_objects(struct kmem_cache s, struct* slab *slab)
7960	{
7961	#ifdef CONFIG_SLUB_DEBUG
7962	void *addr = slab_address(slab);
7963	void *p;
7964
7965	if (!slab_add_kunit_errors())
7966	slab_bug(s, fmt: "Objects remaining on __kmem_cache_shutdown()");
7967
7968	spin_lock(lock: &object_map_lock);
7969	__fill_map(obj_map: object_map, s, slab);
7970
7971	for_each_object(p, s, addr, slab->objects) {
7972
7973	if (!test_bit(__obj_to_index(s, addr, p), object_map)) {
7974	if (slab_add_kunit_errors())
7975	continue;
7976	pr_err("Object 0x%p @offset=%tu\n", p, p - addr);
7977	print_tracking(s, object: p);
7978	}
7979	}
7980	spin_unlock(lock: &object_map_lock);
7981
7982	__slab_err(slab);
7983	#endif
7984	}
7985
7986	/*
7987	* Attempt to free all partial slabs on a node.
7988	* This is called from __kmem_cache_shutdown(). We must take list_lock
7989	* because sysfs file might still access partial list after the shutdowning.
7990	*/
7991	static void free_partial(struct kmem_cache s, struct* kmem_cache_node *n)
7992	{
7993	LIST_HEAD(discard);
7994	struct slab slab, h;
7995
7996	BUG_ON(irqs_disabled());
7997	spin_lock_irq(lock: &n->list_lock);
7998	list_for_each_entry_safe(slab, h, &n->partial, slab_list) {
7999	if (!slab->inuse) {
8000	remove_partial(n, slab);
8001	list_add(new: &slab->slab_list, head: &discard);
8002	} else {
8003	list_slab_objects(s, slab);
8004	}
8005	}
8006	spin_unlock_irq(lock: &n->list_lock);
8007
8008	list_for_each_entry_safe(slab, h, &discard, slab_list)
8009	discard_slab(s, slab);
8010	}
8011
8012	bool __kmem_cache_empty(struct kmem_cache *s)
8013	{
8014	int node;
8015	struct kmem_cache_node *n;
8016
8017	for_each_kmem_cache_node(s, node, n)
8018	if (n->nr_partial \|\| node_nr_slabs(n))
8019	return false;
8020	return true;
8021	}
8022
8023	/*
8024	* Release all resources used by a slab cache.
8025	*/
8026	int __kmem_cache_shutdown(struct kmem_cache *s)
8027	{
8028	int node;
8029	struct kmem_cache_node *n;
8030
8031	flush_all_cpus_locked(s);
8032
8033	/ we might have rcu sheaves in flight /
8034	if (s->cpu_sheaves)
8035	rcu_barrier();
8036
8037	/ Attempt to free all objects /
8038	for_each_kmem_cache_node(s, node, n) {
8039	if (n->barn)
8040	barn_shrink(s, barn: n->barn);
8041	free_partial(s, n);
8042	if (n->nr_partial \|\| node_nr_slabs(n))
8043	return `1`;
8044	}
8045	return `0`;
8046	}
8047
8048	#ifdef CONFIG_PRINTK
8049	void __kmem_obj_info(struct kmem_obj_info kpp, void* object, struct* slab *slab)
8050	{
8051	void *base;
8052	int __maybe_unused i;
8053	unsigned int objnr;
8054	void *objp;
8055	void *objp0;
8056	struct kmem_cache *s = slab->slab_cache;
8057	struct track __maybe_unused *trackp;
8058
8059	kpp->kp_ptr = object;
8060	kpp->kp_slab = slab;
8061	kpp->kp_slab_cache = s;
8062	base = slab_address(slab);
8063	objp0 = kasan_reset_tag(addr: object);
8064	#ifdef CONFIG_SLUB_DEBUG
8065	objp = restore_red_left(s, p: objp0);
8066	#else
8067	objp = objp0;
8068	#endif
8069	objnr = obj_to_index(cache: s, slab, obj: objp);
8070	kpp->kp_data_offset = (unsigned long)((char )objp0 - (char* *)objp);
8071	objp = base + s->size * objnr;
8072	kpp->kp_objp = objp;
8073	if (WARN_ON_ONCE(objp < base \|\| objp >= base + slab->objects * s->size
8074	\|\| (objp - base) % s->size) \|\|
8075	!(s->flags & SLAB_STORE_USER))
8076	return;
8077	#ifdef CONFIG_SLUB_DEBUG
8078	objp = fixup_red_left(s, p: objp);
8079	trackp = get_track(s, object: objp, alloc: TRACK_ALLOC);
8080	kpp->kp_ret = (void *)trackp->addr;
8081	#ifdef CONFIG_STACKDEPOT
8082	{
8083	depot_stack_handle_t handle;
8084	unsigned long *entries;
8085	unsigned int nr_entries;
8086
8087	handle = READ_ONCE(trackp->handle);
8088	if (handle) {
8089	nr_entries = stack_depot_fetch(handle, entries: &entries);
8090	for (i = `0`; i < KS_ADDRS_COUNT && i < nr_entries; i++)
8091	kpp->kp_stack[i] = (void *)entries[i];
8092	}
8093
8094	trackp = get_track(s, object: objp, alloc: TRACK_FREE);
8095	handle = READ_ONCE(trackp->handle);
8096	if (handle) {
8097	nr_entries = stack_depot_fetch(handle, entries: &entries);
8098	for (i = `0`; i < KS_ADDRS_COUNT && i < nr_entries; i++)
8099	kpp->kp_free_stack[i] = (void *)entries[i];
8100	}
8101	}
8102	#endif
8103	#endif
8104	}
8105	#endif
8106
8107	/********************************************************************
8108	* Kmalloc subsystem
8109	*******************************************************************/
8110
8111	static int __init setup_slub_min_order(char *str)
8112	{
8113	get_option(str: &str, pint: (int *)&slub_min_order);
8114
8115	if (slub_min_order > slub_max_order)
8116	slub_max_order = slub_min_order;
8117
8118	return `1`;
8119	}
8120
8121	__setup("slab_min_order=", setup_slub_min_order);
8122	__setup_param("slub_min_order=", slub_min_order, setup_slub_min_order, `0`);
8123
8124
8125	static int __init setup_slub_max_order(char *str)
8126	{
8127	get_option(str: &str, pint: (int *)&slub_max_order);
8128	slub_max_order = min_t(unsigned int, slub_max_order, MAX_PAGE_ORDER);
8129
8130	if (slub_min_order > slub_max_order)
8131	slub_min_order = slub_max_order;
8132
8133	return `1`;
8134	}
8135
8136	__setup("slab_max_order=", setup_slub_max_order);
8137	__setup_param("slub_max_order=", slub_max_order, setup_slub_max_order, `0`);
8138
8139	static int __init setup_slub_min_objects(char *str)
8140	{
8141	get_option(str: &str, pint: (int *)&slub_min_objects);
8142
8143	return `1`;
8144	}
8145
8146	__setup("slab_min_objects=", setup_slub_min_objects);
8147	__setup_param("slub_min_objects=", slub_min_objects, setup_slub_min_objects, `0`);
8148
8149	#ifdef CONFIG_NUMA
8150	static int __init setup_slab_strict_numa(char *str)
8151	{
8152	if (nr_node_ids > `1`) {
8153	static_branch_enable(&strict_numa);
8154	pr_info("SLUB: Strict NUMA enabled.\n");
8155	} else {
8156	pr_warn("slab_strict_numa parameter set on non NUMA system.\n");
8157	}
8158
8159	return `1`;
8160	}
8161
8162	__setup("slab_strict_numa", setup_slab_strict_numa);
8163	#endif
8164
8165
8166	#ifdef CONFIG_HARDENED_USERCOPY
8167	/*
8168	* Rejects incorrectly sized objects and objects that are to be copied
8169	* to/from userspace but do not fall entirely within the containing slab
8170	* cache's usercopy region.
8171	*
8172	* Returns NULL if check passes, otherwise const char * to name of cache
8173	* to indicate an error.
8174	*/
8175	void __check_heap_object(const void ptr, unsigned* long n,
8176	const struct slab *slab, bool to_user)
8177	{
8178	struct kmem_cache *s;
8179	unsigned int offset;
8180	bool is_kfence = is_kfence_address(ptr);
8181
8182	ptr = kasan_reset_tag(ptr);
8183
8184	/ Find object and usable object size. /
8185	s = slab->slab_cache;
8186
8187	/ Reject impossible pointers. /
8188	if (ptr < slab_address(slab))
8189	usercopy_abort("SLUB object not in SLUB page?!", NULL,
8190	to_user, `0`, n);
8191
8192	/ Find offset within object. /
8193	if (is_kfence)
8194	offset = ptr - kfence_object_start(ptr);
8195	else
8196	offset = (ptr - slab_address(slab)) % s->size;
8197
8198	/ Adjust for redzone and reject if within the redzone. /
8199	if (!is_kfence && kmem_cache_debug_flags(s, SLAB_RED_ZONE)) {
8200	if (offset < s->red_left_pad)
8201	usercopy_abort("SLUB object in left red zone",
8202	s->name, to_user, offset, n);
8203	offset -= s->red_left_pad;
8204	}
8205
8206	/ Allow address range falling entirely within usercopy region. /
8207	if (offset >= s->useroffset &&
8208	offset - s->useroffset <= s->usersize &&
8209	n <= s->useroffset - offset + s->usersize)
8210	return;
8211
8212	usercopy_abort("SLUB object", s->name, to_user, offset, n);
8213	}
8214	#endif /* CONFIG_HARDENED_USERCOPY */
8215
8216	#define SHRINK_PROMOTE_MAX 32
8217
8218	/*
8219	* kmem_cache_shrink discards empty slabs and promotes the slabs filled
8220	* up most to the head of the partial lists. New allocations will then
8221	* fill those up and thus they can be removed from the partial lists.
8222	*
8223	* The slabs with the least items are placed last. This results in them
8224	* being allocated from last increasing the chance that the last objects
8225	* are freed in them.
8226	*/
8227	static int __kmem_cache_do_shrink(struct kmem_cache *s)
8228	{
8229	int node;
8230	int i;
8231	struct kmem_cache_node *n;
8232	struct slab *slab;
8233	struct slab *t;
8234	struct list_head discard;
8235	struct list_head promote[SHRINK_PROMOTE_MAX];
8236	unsigned long flags;
8237	int ret = `0`;
8238
8239	for_each_kmem_cache_node(s, node, n) {
8240	INIT_LIST_HEAD(list: &discard);
8241	for (i = `0`; i < SHRINK_PROMOTE_MAX; i++)
8242	INIT_LIST_HEAD(list: promote + i);
8243
8244	if (n->barn)
8245	barn_shrink(s, barn: n->barn);
8246
8247	spin_lock_irqsave(&n->list_lock, flags);
8248
8249	/*
8250	* Build lists of slabs to discard or promote.
8251	*
8252	* Note that concurrent frees may occur while we hold the
8253	* list_lock. slab->inuse here is the upper limit.
8254	*/
8255	list_for_each_entry_safe(slab, t, &n->partial, slab_list) {
8256	int free = slab->objects - slab->inuse;
8257
8258	/ Do not reread slab->inuse /
8259	barrier();
8260
8261	/ We do not keep full slabs on the list /
8262	BUG_ON(free <= `0`);
8263
8264	if (free == slab->objects) {
8265	list_move(list: &slab->slab_list, head: &discard);
8266	slab_clear_node_partial(slab);
8267	n->nr_partial--;
8268	dec_slabs_node(s, node, objects: slab->objects);
8269	} else if (free <= SHRINK_PROMOTE_MAX)
8270	list_move(list: &slab->slab_list, head: promote + free - `1`);
8271	}
8272
8273	/*
8274	* Promote the slabs filled up most to the head of the
8275	* partial list.
8276	*/
8277	for (i = SHRINK_PROMOTE_MAX - `1`; i >= `0`; i--)
8278	list_splice(list: promote + i, head: &n->partial);
8279
8280	spin_unlock_irqrestore(lock: &n->list_lock, flags);
8281
8282	/ Release empty slabs /
8283	list_for_each_entry_safe(slab, t, &discard, slab_list)
8284	free_slab(s, slab);
8285
8286	if (node_nr_slabs(n))
8287	ret = `1`;
8288	}
8289
8290	return ret;
8291	}
8292
8293	int __kmem_cache_shrink(struct kmem_cache *s)
8294	{
8295	flush_all(s);
8296	return __kmem_cache_do_shrink(s);
8297	}
8298
8299	static int slab_mem_going_offline_callback(void)
8300	{
8301	struct kmem_cache *s;
8302
8303	mutex_lock(lock: &slab_mutex);
8304	list_for_each_entry(s, &slab_caches, list) {
8305	flush_all_cpus_locked(s);
8306	__kmem_cache_do_shrink(s);
8307	}
8308	mutex_unlock(lock: &slab_mutex);
8309
8310	return `0`;
8311	}
8312
8313	static int slab_mem_going_online_callback(int nid)
8314	{
8315	struct kmem_cache_node *n;
8316	struct kmem_cache *s;
8317	int ret = `0`;
8318
8319	/*
8320	* We are bringing a node online. No memory is available yet. We must
8321	* allocate a kmem_cache_node structure in order to bring the node
8322	* online.
8323	*/
8324	mutex_lock(lock: &slab_mutex);
8325	list_for_each_entry(s, &slab_caches, list) {
8326	struct node_barn *barn = NULL;
8327
8328	/*
8329	* The structure may already exist if the node was previously
8330	* onlined and offlined.
8331	*/
8332	if (get_node(s, node: nid))
8333	continue;
8334
8335	if (s->cpu_sheaves) {
8336	barn = kmalloc_node(sizeof(*barn), GFP_KERNEL, nid);
8337
8338	if (!barn) {
8339	ret = -ENOMEM;
8340	goto out;
8341	}
8342	}
8343
8344	/*
8345	* XXX: kmem_cache_alloc_node will fallback to other nodes
8346	* since memory is not yet available from the node that
8347	* is brought up.
8348	*/
8349	n = kmem_cache_alloc(kmem_cache_node, GFP_KERNEL);
8350	if (!n) {
8351	kfree(barn);
8352	ret = -ENOMEM;
8353	goto out;
8354	}
8355
8356	init_kmem_cache_node(n, barn);
8357
8358	s->node[nid] = n;
8359	}
8360	/*
8361	* Any cache created after this point will also have kmem_cache_node
8362	* initialized for the new node.
8363	*/
8364	node_set(nid, slab_nodes);
8365	out:
8366	mutex_unlock(lock: &slab_mutex);
8367	return ret;
8368	}
8369
8370	static int slab_memory_callback(struct notifier_block *self,
8371	unsigned long action, void *arg)
8372	{
8373	struct node_notify *nn = arg;
8374	int nid = nn->nid;
8375	int ret = `0`;
8376
8377	switch (action) {
8378	case NODE_ADDING_FIRST_MEMORY:
8379	ret = slab_mem_going_online_callback(nid);
8380	break;
8381	case NODE_REMOVING_LAST_MEMORY:
8382	ret = slab_mem_going_offline_callback();
8383	break;
8384	}
8385	if (ret)
8386	ret = notifier_from_errno(err: ret);
8387	else
8388	ret = NOTIFY_OK;
8389	return ret;
8390	}
8391
8392	/********************************************************************
8393	* Basic setup of slabs
8394	*******************************************************************/
8395
8396	/*
8397	* Used for early kmem_cache structures that were allocated using
8398	* the page allocator. Allocate them properly then fix up the pointers
8399	* that may be pointing to the wrong kmem_cache structure.
8400	*/
8401
8402	static struct kmem_cache * __init bootstrap(struct kmem_cache *static_cache)
8403	{
8404	int node;
8405	struct kmem_cache *s = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT);
8406	struct kmem_cache_node *n;
8407
8408	memcpy(to: s, from: static_cache, len: kmem_cache->object_size);
8409
8410	/*
8411	* This runs very early, and only the boot processor is supposed to be
8412	* up. Even if it weren't true, IRQs are not up so we couldn't fire
8413	* IPIs around.
8414	*/
8415	__flush_cpu_slab(s, smp_processor_id());
8416	for_each_kmem_cache_node(s, node, n) {
8417	struct slab *p;
8418
8419	list_for_each_entry(p, &n->partial, slab_list)
8420	p->slab_cache = s;
8421
8422	#ifdef CONFIG_SLUB_DEBUG
8423	list_for_each_entry(p, &n->full, slab_list)
8424	p->slab_cache = s;
8425	#endif
8426	}
8427	list_add(new: &s->list, head: &slab_caches);
8428	return s;
8429	}
8430
8431	void __init kmem_cache_init(void)
8432	{
8433	static __initdata struct kmem_cache boot_kmem_cache,
8434	boot_kmem_cache_node;
8435	int node;
8436
8437	if (debug_guardpage_minorder())
8438	slub_max_order = `0`;
8439
8440	/ Inform pointer hashing choice about slub debugging state. /
8441	hash_pointers_finalize(slub_debug: __slub_debug_enabled());
8442
8443	kmem_cache_node = &boot_kmem_cache_node;
8444	kmem_cache = &boot_kmem_cache;
8445
8446	/*
8447	* Initialize the nodemask for which we will allocate per node
8448	* structures. Here we don't need taking slab_mutex yet.
8449	*/
8450	for_each_node_state(node, N_MEMORY)
8451	node_set(node, slab_nodes);
8452
8453	create_boot_cache(kmem_cache_node, name: "kmem_cache_node",
8454	size: sizeof(struct kmem_cache_node),
8455	SLAB_HWCACHE_ALIGN \| SLAB_NO_OBJ_EXT, useroffset: `0`, usersize: `0`);
8456
8457	hotplug_node_notifier(fn: slab_memory_callback, SLAB_CALLBACK_PRI);
8458
8459	/ Able to allocate the per node structures /
8460	slab_state = PARTIAL;
8461
8462	create_boot_cache(kmem_cache, name: "kmem_cache",
8463	offsetof(struct kmem_cache, node) +
8464	nr_node_ids * sizeof(struct kmem_cache_node *),
8465	SLAB_HWCACHE_ALIGN \| SLAB_NO_OBJ_EXT, useroffset: `0`, usersize: `0`);
8466
8467	kmem_cache = bootstrap(static_cache: &boot_kmem_cache);
8468	kmem_cache_node = bootstrap(static_cache: &boot_kmem_cache_node);
8469
8470	/ Now we can use the kmem_cache to allocate kmalloc slabs /
8471	setup_kmalloc_cache_index_table();
8472	create_kmalloc_caches();
8473
8474	/ Setup random freelists for each cache /
8475	init_freelist_randomization();
8476
8477	cpuhp_setup_state_nocalls(state: CPUHP_SLUB_DEAD, name: "slub:dead", NULL,
8478	teardown: slub_cpu_dead);
8479
8480	pr_info("SLUB: HWalign=%d, Order=%u-%u, MinObjects=%u, CPUs=%u, Nodes=%u\n",
8481	cache_line_size(),
8482	slub_min_order, slub_max_order, slub_min_objects,
8483	nr_cpu_ids, nr_node_ids);
8484	}
8485
8486	void __init kmem_cache_init_late(void)
8487	{
8488	#ifndef CONFIG_SLUB_TINY
8489	flushwq = alloc_workqueue("slub_flushwq", WQ_MEM_RECLAIM, `0`);
8490	WARN_ON(!flushwq);
8491	#endif
8492	}
8493
8494	struct kmem_cache *
8495	__kmem_cache_alias(const char name, unsigned* int size, unsigned int align,
8496	slab_flags_t flags, void (ctor)(void* *))
8497	{
8498	struct kmem_cache *s;
8499
8500	s = find_mergeable(size, align, flags, name, ctor);
8501	if (s) {
8502	if (sysfs_slab_alias(s, name))
8503	pr_err("SLUB: Unable to add cache alias %s to sysfs\n",
8504	name);
8505
8506	s->refcount++;
8507
8508	/*
8509	* Adjust the object sizes so that we clear
8510	* the complete object on kzalloc.
8511	*/
8512	s->object_size = max(s->object_size, size);
8513	s->inuse = max(s->inuse, ALIGN(size, sizeof(void *)));
8514	}
8515
8516	return s;
8517	}
8518
8519	int do_kmem_cache_create(struct kmem_cache s, const* char *name,
8520	unsigned int size, struct kmem_cache_args *args,
8521	slab_flags_t flags)
8522	{
8523	int err = -EINVAL;
8524
8525	s->name = name;
8526	s->size = s->object_size = size;
8527
8528	s->flags = kmem_cache_flags(flags, name: s->name);
8529	#ifdef CONFIG_SLAB_FREELIST_HARDENED
8530	s->random = get_random_long();
8531	#endif
8532	s->align = args->align;
8533	s->ctor = args->ctor;
8534	#ifdef CONFIG_HARDENED_USERCOPY
8535	s->useroffset = args->useroffset;
8536	s->usersize = args->usersize;
8537	#endif
8538
8539	if (!calculate_sizes(args, s))
8540	goto out;
8541	if (disable_higher_order_debug) {
8542	/*
8543	* Disable debugging flags that store metadata if the min slab
8544	* order increased.
8545	*/
8546	if (get_order(size: s->size) > get_order(size: s->object_size)) {
8547	s->flags &= ~DEBUG_METADATA_FLAGS;
8548	s->offset = `0`;
8549	if (!calculate_sizes(args, s))
8550	goto out;
8551	}
8552	}
8553
8554	#ifdef system_has_freelist_aba
8555	if (system_has_freelist_aba() && !(s->flags & SLAB_NO_CMPXCHG)) {
8556	/ Enable fast mode /
8557	s->flags \|= __CMPXCHG_DOUBLE;
8558	}
8559	#endif
8560
8561	/*
8562	* The larger the object size is, the more slabs we want on the partial
8563	* list to avoid pounding the page allocator excessively.
8564	*/
8565	s->min_partial = min_t(unsigned long, MAX_PARTIAL, ilog2(s->size) / `2`);
8566	s->min_partial = max_t(unsigned long, MIN_PARTIAL, s->min_partial);
8567
8568	set_cpu_partial(s);
8569
8570	if (args->sheaf_capacity && !IS_ENABLED(CONFIG_SLUB_TINY)
8571	&& !(s->flags & SLAB_DEBUG_FLAGS)) {
8572	s->cpu_sheaves = alloc_percpu(struct slub_percpu_sheaves);
8573	if (!s->cpu_sheaves) {
8574	err = -ENOMEM;
8575	goto out;
8576	}
8577	// TODO: increase capacity to grow slab_sheaf up to next kmalloc size?
8578	s->sheaf_capacity = args->sheaf_capacity;
8579	}
8580
8581	#ifdef CONFIG_NUMA
8582	s->remote_node_defrag_ratio = `1000`;
8583	#endif
8584
8585	/ Initialize the pre-computed randomized freelist if slab is up /
8586	if (slab_state >= UP) {
8587	if (init_cache_random_seq(s))
8588	goto out;
8589	}
8590
8591	if (!init_kmem_cache_nodes(s))
8592	goto out;
8593
8594	if (!alloc_kmem_cache_cpus(s))
8595	goto out;
8596
8597	if (s->cpu_sheaves) {
8598	err = init_percpu_sheaves(s);
8599	if (err)
8600	goto out;
8601	}
8602
8603	err = `0`;
8604
8605	/ Mutex is not taken during early boot /
8606	if (slab_state <= UP)
8607	goto out;
8608
8609	/*
8610	* Failing to create sysfs files is not critical to SLUB functionality.
8611	* If it fails, proceed with cache creation without these files.
8612	*/
8613	if (sysfs_slab_add(s))
8614	pr_err("SLUB: Unable to add cache %s to sysfs\n", s->name);
8615
8616	if (s->flags & SLAB_STORE_USER)
8617	debugfs_slab_add(s);
8618
8619	out:
8620	if (err)
8621	__kmem_cache_release(s);
8622	return err;
8623	}
8624
8625	#ifdef SLAB_SUPPORTS_SYSFS
8626	static int count_inuse(struct slab *slab)
8627	{
8628	return slab->inuse;
8629	}
8630
8631	static int count_total(struct slab *slab)
8632	{
8633	return slab->objects;
8634	}
8635	#endif
8636
8637	#ifdef CONFIG_SLUB_DEBUG
8638	static void validate_slab(struct kmem_cache s, struct* slab *slab,
8639	unsigned long *obj_map)
8640	{
8641	void *p;
8642	void *addr = slab_address(slab);
8643
8644	if (!validate_slab_ptr(slab)) {
8645	slab_err(s, slab, fmt: "Not a valid slab page");
8646	return;
8647	}
8648
8649	if (!check_slab(s, slab) \|\| !on_freelist(s, slab, NULL))
8650	return;
8651
8652	/ Now we know that a valid freelist exists /
8653	__fill_map(obj_map, s, slab);
8654	for_each_object(p, s, addr, slab->objects) {
8655	u8 val = test_bit(__obj_to_index(s, addr, p), obj_map) ?
8656	SLUB_RED_INACTIVE : SLUB_RED_ACTIVE;
8657
8658	if (!check_object(s, slab, object: p, val))
8659	break;
8660	}
8661	}
8662
8663	static int validate_slab_node(struct kmem_cache *s,
8664	struct kmem_cache_node n, unsigned* long *obj_map)
8665	{
8666	unsigned long count = `0`;
8667	struct slab *slab;
8668	unsigned long flags;
8669
8670	spin_lock_irqsave(&n->list_lock, flags);
8671
8672	list_for_each_entry(slab, &n->partial, slab_list) {
8673	validate_slab(s, slab, obj_map);
8674	count++;
8675	}
8676	if (count != n->nr_partial) {
8677	pr_err("SLUB %s: %ld partial slabs counted but counter=%ld\n",
8678	s->name, count, n->nr_partial);
8679	slab_add_kunit_errors();
8680	}
8681
8682	if (!(s->flags & SLAB_STORE_USER))
8683	goto out;
8684
8685	list_for_each_entry(slab, &n->full, slab_list) {
8686	validate_slab(s, slab, obj_map);
8687	count++;
8688	}
8689	if (count != node_nr_slabs(n)) {
8690	pr_err("SLUB: %s %ld slabs counted but counter=%ld\n",
8691	s->name, count, node_nr_slabs(n));
8692	slab_add_kunit_errors();
8693	}
8694
8695	out:
8696	spin_unlock_irqrestore(lock: &n->list_lock, flags);
8697	return count;
8698	}
8699
8700	long validate_slab_cache(struct kmem_cache *s)
8701	{
8702	int node;
8703	unsigned long count = `0`;
8704	struct kmem_cache_node *n;
8705	unsigned long *obj_map;
8706
8707	obj_map = bitmap_alloc(nbits: oo_objects(x: s->oo), GFP_KERNEL);
8708	if (!obj_map)
8709	return -ENOMEM;
8710
8711	flush_all(s);
8712	for_each_kmem_cache_node(s, node, n)
8713	count += validate_slab_node(s, n, obj_map);
8714
8715	bitmap_free(bitmap: obj_map);
8716
8717	return count;
8718	}
8719	EXPORT_SYMBOL(validate_slab_cache);
8720
8721	#ifdef CONFIG_DEBUG_FS
8722	/*
8723	* Generate lists of code addresses where slabcache objects are allocated
8724	* and freed.
8725	*/
8726
8727	struct location {
8728	depot_stack_handle_t handle;
8729	unsigned long count;
8730	unsigned long addr;
8731	unsigned long waste;
8732	long long sum_time;
8733	long min_time;
8734	long max_time;
8735	long min_pid;
8736	long max_pid;
8737	DECLARE_BITMAP(cpus, NR_CPUS);
8738	nodemask_t nodes;
8739	};
8740
8741	struct loc_track {
8742	unsigned long max;
8743	unsigned long count;
8744	struct location *loc;
8745	loff_t idx;
8746	};
8747
8748	static struct dentry *slab_debugfs_root;
8749
8750	static void free_loc_track(struct loc_track *t)
8751	{
8752	if (t->max)
8753	free_pages(addr: (unsigned long)t->loc,
8754	order: get_order(size: sizeof(struct location) * t->max));
8755	}
8756
8757	static int alloc_loc_track(struct loc_track t, unsigned* long max, gfp_t flags)
8758	{
8759	struct location *l;
8760	int order;
8761
8762	order = get_order(size: sizeof(struct location) * max);
8763
8764	l = (void *)__get_free_pages(flags, order);
8765	if (!l)
8766	return `0`;
8767
8768	if (t->count) {
8769	memcpy(to: l, from: t->loc, len: sizeof(struct location) * t->count);
8770	free_loc_track(t);
8771	}
8772	t->max = max;
8773	t->loc = l;
8774	return `1`;
8775	}
8776
8777	static int add_location(struct loc_track t, struct* kmem_cache *s,
8778	const struct track *track,
8779	unsigned int orig_size)
8780	{
8781	long start, end, pos;
8782	struct location *l;
8783	unsigned long caddr, chandle, cwaste;
8784	unsigned long age = jiffies - track->when;
8785	depot_stack_handle_t handle = `0`;
8786	unsigned int waste = s->object_size - orig_size;
8787
8788	#ifdef CONFIG_STACKDEPOT
8789	handle = READ_ONCE(track->handle);
8790	#endif
8791	start = -`1`;
8792	end = t->count;
8793
8794	for ( ; ; ) {
8795	pos = start + (end - start + `1`) / `2`;
8796
8797	/*
8798	* There is nothing at "end". If we end up there
8799	* we need to add something to before end.
8800	*/
8801	if (pos == end)
8802	break;
8803
8804	l = &t->loc[pos];
8805	caddr = l->addr;
8806	chandle = l->handle;
8807	cwaste = l->waste;
8808	if ((track->addr == caddr) && (handle == chandle) &&
8809	(waste == cwaste)) {
8810
8811	l->count++;
8812	if (track->when) {
8813	l->sum_time += age;
8814	if (age < l->min_time)
8815	l->min_time = age;
8816	if (age > l->max_time)
8817	l->max_time = age;
8818
8819	if (track->pid < l->min_pid)
8820	l->min_pid = track->pid;
8821	if (track->pid > l->max_pid)
8822	l->max_pid = track->pid;
8823
8824	cpumask_set_cpu(cpu: track->cpu,
8825	to_cpumask(l->cpus));
8826	}
8827	node_set(page_to_nid(virt_to_page(track)), l->nodes);
8828	return `1`;
8829	}
8830
8831	if (track->addr < caddr)
8832	end = pos;
8833	else if (track->addr == caddr && handle < chandle)
8834	end = pos;
8835	else if (track->addr == caddr && handle == chandle &&
8836	waste < cwaste)
8837	end = pos;
8838	else
8839	start = pos;
8840	}
8841
8842	/*
8843	* Not found. Insert new tracking element.
8844	*/
8845	if (t->count >= t->max && !alloc_loc_track(t, max: `2` * t->max, GFP_ATOMIC))
8846	return `0`;
8847
8848	l = t->loc + pos;
8849	if (pos < t->count)
8850	memmove(dest: l + `1`, src: l,
8851	count: (t->count - pos) * sizeof(struct location));
8852	t->count++;
8853	l->count = `1`;
8854	l->addr = track->addr;
8855	l->sum_time = age;
8856	l->min_time = age;
8857	l->max_time = age;
8858	l->min_pid = track->pid;
8859	l->max_pid = track->pid;
8860	l->handle = handle;
8861	l->waste = waste;
8862	cpumask_clear(to_cpumask(l->cpus));
8863	cpumask_set_cpu(cpu: track->cpu, to_cpumask(l->cpus));
8864	nodes_clear(l->nodes);
8865	node_set(page_to_nid(virt_to_page(track)), l->nodes);
8866	return `1`;
8867	}
8868
8869	static void process_slab(struct loc_track t, struct* kmem_cache *s,
8870	struct slab slab, enum* track_item alloc,
8871	unsigned long *obj_map)
8872	{
8873	void *addr = slab_address(slab);
8874	bool is_alloc = (alloc == TRACK_ALLOC);
8875	void *p;
8876
8877	__fill_map(obj_map, s, slab);
8878
8879	for_each_object(p, s, addr, slab->objects)
8880	if (!test_bit(__obj_to_index(s, addr, p), obj_map))
8881	add_location(t, s, track: get_track(s, object: p, alloc),
8882	orig_size: is_alloc ? get_orig_size(s, object: p) :
8883	s->object_size);
8884	}
8885	#endif /* CONFIG_DEBUG_FS */
8886	#endif /* CONFIG_SLUB_DEBUG */
8887
8888	#ifdef SLAB_SUPPORTS_SYSFS
8889	enum slab_stat_type {
8890	SL_ALL, / All slabs /
8891	SL_PARTIAL, / Only partially allocated slabs /
8892	SL_CPU, / Only slabs used for cpu caches /
8893	SL_OBJECTS, / Determine allocated objects not slabs /
8894	SL_TOTAL / Determine object capacity not slabs /
8895	};
8896
8897	#define SO_ALL (1 << SL_ALL)
8898	#define SO_PARTIAL (1 << SL_PARTIAL)
8899	#define SO_CPU (1 << SL_CPU)
8900	#define SO_OBJECTS (1 << SL_OBJECTS)
8901	#define SO_TOTAL (1 << SL_TOTAL)
8902
8903	static ssize_t show_slab_objects(struct kmem_cache *s,
8904	char buf, unsigned* long flags)
8905	{
8906	unsigned long total = `0`;
8907	int node;
8908	int x;
8909	unsigned long *nodes;
8910	int len = `0`;
8911
8912	nodes = kcalloc(nr_node_ids, sizeof(unsigned long), GFP_KERNEL);
8913	if (!nodes)
8914	return -ENOMEM;
8915
8916	if (flags & SO_CPU) {
8917	int cpu;
8918
8919	for_each_possible_cpu(cpu) {
8920	struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab,
8921	cpu);
8922	int node;
8923	struct slab *slab;
8924
8925	slab = READ_ONCE(c->slab);
8926	if (!slab)
8927	continue;
8928
8929	node = slab_nid(slab);
8930	if (flags & SO_TOTAL)
8931	x = slab->objects;
8932	else if (flags & SO_OBJECTS)
8933	x = slab->inuse;
8934	else
8935	x = `1`;
8936
8937	total += x;
8938	nodes[node] += x;
8939
8940	#ifdef CONFIG_SLUB_CPU_PARTIAL
8941	slab = slub_percpu_partial_read_once(c);
8942	if (slab) {
8943	node = slab_nid(slab);
8944	if (flags & SO_TOTAL)
8945	WARN_ON_ONCE(`1`);
8946	else if (flags & SO_OBJECTS)
8947	WARN_ON_ONCE(`1`);
8948	else
8949	x = data_race(slab->slabs);
8950	total += x;
8951	nodes[node] += x;
8952	}
8953	#endif
8954	}
8955	}
8956
8957	/*
8958	* It is impossible to take "mem_hotplug_lock" here with "kernfs_mutex"
8959	* already held which will conflict with an existing lock order:
8960	*
8961	* mem_hotplug_lock->slab_mutex->kernfs_mutex
8962	*
8963	* We don't really need mem_hotplug_lock (to hold off
8964	* slab_mem_going_offline_callback) here because slab's memory hot
8965	* unplug code doesn't destroy the kmem_cache->node[] data.
8966	*/
8967
8968	#ifdef CONFIG_SLUB_DEBUG
8969	if (flags & SO_ALL) {
8970	struct kmem_cache_node *n;
8971
8972	for_each_kmem_cache_node(s, node, n) {
8973
8974	if (flags & SO_TOTAL)
8975	x = node_nr_objs(n);
8976	else if (flags & SO_OBJECTS)
8977	x = node_nr_objs(n) - count_partial(n, get_count: count_free);
8978	else
8979	x = node_nr_slabs(n);
8980	total += x;
8981	nodes[node] += x;
8982	}
8983
8984	} else
8985	#endif
8986	if (flags & SO_PARTIAL) {
8987	struct kmem_cache_node *n;
8988
8989	for_each_kmem_cache_node(s, node, n) {
8990	if (flags & SO_TOTAL)
8991	x = count_partial(n, get_count: count_total);
8992	else if (flags & SO_OBJECTS)
8993	x = count_partial(n, get_count: count_inuse);
8994	else
8995	x = n->nr_partial;
8996	total += x;
8997	nodes[node] += x;
8998	}
8999	}
9000
9001	len += sysfs_emit_at(buf, at: len, fmt: "%lu", total);
9002	#ifdef CONFIG_NUMA
9003	for (node = `0`; node < nr_node_ids; node++) {
9004	if (nodes[node])
9005	len += sysfs_emit_at(buf, at: len, fmt: " N%d=%lu",
9006	node, nodes[node]);
9007	}
9008	#endif
9009	len += sysfs_emit_at(buf, at: len, fmt: "\n");
9010	kfree(nodes);
9011
9012	return len;
9013	}
9014
9015	#define to_slab_attr(n) container_of(n, struct slab_attribute, attr)
9016	#define to_slab(n) container_of(n, struct kmem_cache, kobj)
9017
9018	struct slab_attribute {
9019	struct attribute attr;
9020	ssize_t (show)(struct* kmem_cache s, char* *buf);
9021	ssize_t (store)(struct* kmem_cache s, const* char *x, size_t count);
9022	};
9023
9024	#define SLAB_ATTR_RO(_name) \
9025	static struct slab_attribute _name##_attr = __ATTR_RO_MODE(_name, 0400)
9026
9027	#define SLAB_ATTR(_name) \
9028	static struct slab_attribute _name##_attr = __ATTR_RW_MODE(_name, 0600)
9029
9030	static ssize_t slab_size_show(struct kmem_cache s, char* *buf)
9031	{
9032	return sysfs_emit(buf, fmt: "%u\n", s->size);
9033	}
9034	SLAB_ATTR_RO(slab_size);
9035
9036	static ssize_t align_show(struct kmem_cache s, char* *buf)
9037	{
9038	return sysfs_emit(buf, fmt: "%u\n", s->align);
9039	}
9040	SLAB_ATTR_RO(align);
9041
9042	static ssize_t object_size_show(struct kmem_cache s, char* *buf)
9043	{
9044	return sysfs_emit(buf, fmt: "%u\n", s->object_size);
9045	}
9046	SLAB_ATTR_RO(object_size);
9047
9048	static ssize_t objs_per_slab_show(struct kmem_cache s, char* *buf)
9049	{
9050	return sysfs_emit(buf, fmt: "%u\n", oo_objects(x: s->oo));
9051	}
9052	SLAB_ATTR_RO(objs_per_slab);
9053
9054	static ssize_t order_show(struct kmem_cache s, char* *buf)
9055	{
9056	return sysfs_emit(buf, fmt: "%u\n", oo_order(x: s->oo));
9057	}
9058	SLAB_ATTR_RO(order);
9059
9060	static ssize_t sheaf_capacity_show(struct kmem_cache s, char* *buf)
9061	{
9062	return sysfs_emit(buf, fmt: "%u\n", s->sheaf_capacity);
9063	}
9064	SLAB_ATTR_RO(sheaf_capacity);
9065
9066	static ssize_t min_partial_show(struct kmem_cache s, char* *buf)
9067	{
9068	return sysfs_emit(buf, fmt: "%lu\n", s->min_partial);
9069	}
9070
9071	static ssize_t min_partial_store(struct kmem_cache s, const* char *buf,
9072	size_t length)
9073	{
9074	unsigned long min;
9075	int err;
9076
9077	err = kstrtoul(s: buf, base: `10`, res: &min);
9078	if (err)
9079	return err;
9080
9081	s->min_partial = min;
9082	return length;
9083	}
9084	SLAB_ATTR(min_partial);
9085
9086	static ssize_t cpu_partial_show(struct kmem_cache s, char* *buf)
9087	{
9088	unsigned int nr_partial = `0`;
9089	#ifdef CONFIG_SLUB_CPU_PARTIAL
9090	nr_partial = s->cpu_partial;
9091	#endif
9092
9093	return sysfs_emit(buf, fmt: "%u\n", nr_partial);
9094	}
9095
9096	static ssize_t cpu_partial_store(struct kmem_cache s, const* char *buf,
9097	size_t length)
9098	{
9099	unsigned int objects;
9100	int err;
9101
9102	err = kstrtouint(s: buf, base: `10`, res: &objects);
9103	if (err)
9104	return err;
9105	if (objects && !kmem_cache_has_cpu_partial(s))
9106	return -EINVAL;
9107
9108	slub_set_cpu_partial(s, nr_objects: objects);
9109	flush_all(s);
9110	return length;
9111	}
9112	SLAB_ATTR(cpu_partial);
9113
9114	static ssize_t ctor_show(struct kmem_cache s, char* *buf)
9115	{
9116	if (!s->ctor)
9117	return `0`;
9118	return sysfs_emit(buf, fmt: "%pS\n", s->ctor);
9119	}
9120	SLAB_ATTR_RO(ctor);
9121
9122	static ssize_t aliases_show(struct kmem_cache s, char* *buf)
9123	{
9124	return sysfs_emit(buf, fmt: "%d\n", s->refcount < `0` ? `0` : s->refcount - `1`);
9125	}
9126	SLAB_ATTR_RO(aliases);
9127
9128	static ssize_t partial_show(struct kmem_cache s, char* *buf)
9129	{
9130	return show_slab_objects(s, buf, SO_PARTIAL);
9131	}
9132	SLAB_ATTR_RO(partial);
9133
9134	static ssize_t cpu_slabs_show(struct kmem_cache s, char* *buf)
9135	{
9136	return show_slab_objects(s, buf, SO_CPU);
9137	}
9138	SLAB_ATTR_RO(cpu_slabs);
9139
9140	static ssize_t objects_partial_show(struct kmem_cache s, char* *buf)
9141	{
9142	return show_slab_objects(s, buf, SO_PARTIAL\|SO_OBJECTS);
9143	}
9144	SLAB_ATTR_RO(objects_partial);
9145
9146	static ssize_t slabs_cpu_partial_show(struct kmem_cache s, char* *buf)
9147	{
9148	int objects = `0`;
9149	int slabs = `0`;
9150	int cpu __maybe_unused;
9151	int len = `0`;
9152
9153	#ifdef CONFIG_SLUB_CPU_PARTIAL
9154	for_each_online_cpu(cpu) {
9155	struct slab *slab;
9156
9157	slab = slub_percpu_partial(per_cpu_ptr(s->cpu_slab, cpu));
9158
9159	if (slab)
9160	slabs += data_race(slab->slabs);
9161	}
9162	#endif
9163
9164	/ Approximate half-full slabs, see slub_set_cpu_partial() /
9165	objects = (slabs * oo_objects(x: s->oo)) / `2`;
9166	len += sysfs_emit_at(buf, at: len, fmt: "%d(%d)", objects, slabs);
9167
9168	#ifdef CONFIG_SLUB_CPU_PARTIAL
9169	for_each_online_cpu(cpu) {
9170	struct slab *slab;
9171
9172	slab = slub_percpu_partial(per_cpu_ptr(s->cpu_slab, cpu));
9173	if (slab) {
9174	slabs = data_race(slab->slabs);
9175	objects = (slabs * oo_objects(x: s->oo)) / `2`;
9176	len += sysfs_emit_at(buf, at: len, fmt: " C%d=%d(%d)",
9177	cpu, objects, slabs);
9178	}
9179	}
9180	#endif
9181	len += sysfs_emit_at(buf, at: len, fmt: "\n");
9182
9183	return len;
9184	}
9185	SLAB_ATTR_RO(slabs_cpu_partial);
9186
9187	static ssize_t reclaim_account_show(struct kmem_cache s, char* *buf)
9188	{
9189	return sysfs_emit(buf, fmt: "%d\n", !!(s->flags & SLAB_RECLAIM_ACCOUNT));
9190	}
9191	SLAB_ATTR_RO(reclaim_account);
9192
9193	static ssize_t hwcache_align_show(struct kmem_cache s, char* *buf)
9194	{
9195	return sysfs_emit(buf, fmt: "%d\n", !!(s->flags & SLAB_HWCACHE_ALIGN));
9196	}
9197	SLAB_ATTR_RO(hwcache_align);
9198
9199	#ifdef CONFIG_ZONE_DMA
9200	static ssize_t cache_dma_show(struct kmem_cache s, char* *buf)
9201	{
9202	return sysfs_emit(buf, fmt: "%d\n", !!(s->flags & SLAB_CACHE_DMA));
9203	}
9204	SLAB_ATTR_RO(cache_dma);
9205	#endif
9206
9207	#ifdef CONFIG_HARDENED_USERCOPY
9208	static ssize_t usersize_show(struct kmem_cache s, char* *buf)
9209	{
9210	return sysfs_emit(buf, "%u\n", s->usersize);
9211	}
9212	SLAB_ATTR_RO(usersize);
9213	#endif
9214
9215	static ssize_t destroy_by_rcu_show(struct kmem_cache s, char* *buf)
9216	{
9217	return sysfs_emit(buf, fmt: "%d\n", !!(s->flags & SLAB_TYPESAFE_BY_RCU));
9218	}
9219	SLAB_ATTR_RO(destroy_by_rcu);
9220
9221	#ifdef CONFIG_SLUB_DEBUG
9222	static ssize_t slabs_show(struct kmem_cache s, char* *buf)
9223	{
9224	return show_slab_objects(s, buf, SO_ALL);
9225	}
9226	SLAB_ATTR_RO(slabs);
9227
9228	static ssize_t total_objects_show(struct kmem_cache s, char* *buf)
9229	{
9230	return show_slab_objects(s, buf, SO_ALL\|SO_TOTAL);
9231	}
9232	SLAB_ATTR_RO(total_objects);
9233
9234	static ssize_t objects_show(struct kmem_cache s, char* *buf)
9235	{
9236	return show_slab_objects(s, buf, SO_ALL\|SO_OBJECTS);
9237	}
9238	SLAB_ATTR_RO(objects);
9239
9240	static ssize_t sanity_checks_show(struct kmem_cache s, char* *buf)
9241	{
9242	return sysfs_emit(buf, fmt: "%d\n", !!(s->flags & SLAB_CONSISTENCY_CHECKS));
9243	}
9244	SLAB_ATTR_RO(sanity_checks);
9245
9246	static ssize_t trace_show(struct kmem_cache s, char* *buf)
9247	{
9248	return sysfs_emit(buf, fmt: "%d\n", !!(s->flags & SLAB_TRACE));
9249	}
9250	SLAB_ATTR_RO(trace);
9251
9252	static ssize_t red_zone_show(struct kmem_cache s, char* *buf)
9253	{
9254	return sysfs_emit(buf, fmt: "%d\n", !!(s->flags & SLAB_RED_ZONE));
9255	}
9256
9257	SLAB_ATTR_RO(red_zone);
9258
9259	static ssize_t poison_show(struct kmem_cache s, char* *buf)
9260	{
9261	return sysfs_emit(buf, fmt: "%d\n", !!(s->flags & SLAB_POISON));
9262	}
9263
9264	SLAB_ATTR_RO(poison);
9265
9266	static ssize_t store_user_show(struct kmem_cache s, char* *buf)
9267	{
9268	return sysfs_emit(buf, fmt: "%d\n", !!(s->flags & SLAB_STORE_USER));
9269	}
9270
9271	SLAB_ATTR_RO(store_user);
9272
9273	static ssize_t validate_show(struct kmem_cache s, char* *buf)
9274	{
9275	return `0`;
9276	}
9277
9278	static ssize_t validate_store(struct kmem_cache *s,
9279	const char *buf, size_t length)
9280	{
9281	int ret = -EINVAL;
9282
9283	if (buf[`0`] == `'1'` && kmem_cache_debug(s)) {
9284	ret = validate_slab_cache(s);
9285	if (ret >= `0`)
9286	ret = length;
9287	}
9288	return ret;
9289	}
9290	SLAB_ATTR(validate);
9291
9292	#endif /* CONFIG_SLUB_DEBUG */
9293
9294	#ifdef CONFIG_FAILSLAB
9295	static ssize_t failslab_show(struct kmem_cache s, char* *buf)
9296	{
9297	return sysfs_emit(buf, "%d\n", !!(s->flags & SLAB_FAILSLAB));
9298	}
9299
9300	static ssize_t failslab_store(struct kmem_cache s, const* char *buf,
9301	size_t length)
9302	{
9303	if (s->refcount > `1`)
9304	return -EINVAL;
9305
9306	if (buf[`0`] == `'1'`)
9307	WRITE_ONCE(s->flags, s->flags \| SLAB_FAILSLAB);
9308	else
9309	WRITE_ONCE(s->flags, s->flags & ~SLAB_FAILSLAB);
9310
9311	return length;
9312	}
9313	SLAB_ATTR(failslab);
9314	#endif
9315
9316	static ssize_t shrink_show(struct kmem_cache s, char* *buf)
9317	{
9318	return `0`;
9319	}
9320
9321	static ssize_t shrink_store(struct kmem_cache *s,
9322	const char *buf, size_t length)
9323	{
9324	if (buf[`0`] == `'1'`)
9325	kmem_cache_shrink(s);
9326	else
9327	return -EINVAL;
9328	return length;
9329	}
9330	SLAB_ATTR(shrink);
9331
9332	#ifdef CONFIG_NUMA
9333	static ssize_t remote_node_defrag_ratio_show(struct kmem_cache s, char* *buf)
9334	{
9335	return sysfs_emit(buf, fmt: "%u\n", s->remote_node_defrag_ratio / `10`);
9336	}
9337
9338	static ssize_t remote_node_defrag_ratio_store(struct kmem_cache *s,
9339	const char *buf, size_t length)
9340	{
9341	unsigned int ratio;
9342	int err;
9343
9344	err = kstrtouint(s: buf, base: `10`, res: &ratio);
9345	if (err)
9346	return err;
9347	if (ratio > `100`)
9348	return -ERANGE;
9349
9350	s->remote_node_defrag_ratio = ratio * `10`;
9351
9352	return length;
9353	}
9354	SLAB_ATTR(remote_node_defrag_ratio);
9355	#endif
9356
9357	#ifdef CONFIG_SLUB_STATS
9358	static int show_stat(struct kmem_cache s, char* buf, enum* stat_item si)
9359	{
9360	unsigned long sum = `0`;
9361	int cpu;
9362	int len = `0`;
9363	int data = kmalloc_array(nr_cpu_ids, sizeof(int*), GFP_KERNEL);
9364
9365	if (!data)
9366	return -ENOMEM;
9367
9368	for_each_online_cpu(cpu) {
9369	unsigned x = per_cpu_ptr(s->cpu_slab, cpu)->stat[si];
9370
9371	data[cpu] = x;
9372	sum += x;
9373	}
9374
9375	len += sysfs_emit_at(buf, len, "%lu", sum);
9376
9377	#ifdef CONFIG_SMP
9378	for_each_online_cpu(cpu) {
9379	if (data[cpu])
9380	len += sysfs_emit_at(buf, len, " C%d=%u",
9381	cpu, data[cpu]);
9382	}
9383	#endif
9384	kfree(data);
9385	len += sysfs_emit_at(buf, len, "\n");
9386
9387	return len;
9388	}
9389
9390	static void clear_stat(struct kmem_cache s, enum* stat_item si)
9391	{
9392	int cpu;
9393
9394	for_each_online_cpu(cpu)
9395	per_cpu_ptr(s->cpu_slab, cpu)->stat[si] = `0`;
9396	}
9397
9398	#define STAT_ATTR(si, text) \
9399	static ssize_t text##_show(struct kmem_cache s, char buf) \
9400	{ \
9401	return show_stat(s, buf, si); \
9402	} \
9403	static ssize_t text##_store(struct kmem_cache *s, \
9404	const char *buf, size_t length) \
9405	{ \
9406	if (buf[0] != '0') \
9407	return -EINVAL; \
9408	clear_stat(s, si); \
9409	return length; \
9410	} \
9411	SLAB_ATTR(text); \
9412
9413	STAT_ATTR(ALLOC_PCS, alloc_cpu_sheaf);
9414	STAT_ATTR(ALLOC_FASTPATH, alloc_fastpath);
9415	STAT_ATTR(ALLOC_SLOWPATH, alloc_slowpath);
9416	STAT_ATTR(FREE_PCS, free_cpu_sheaf);
9417	STAT_ATTR(FREE_RCU_SHEAF, free_rcu_sheaf);
9418	STAT_ATTR(FREE_RCU_SHEAF_FAIL, free_rcu_sheaf_fail);
9419	STAT_ATTR(FREE_FASTPATH, free_fastpath);
9420	STAT_ATTR(FREE_SLOWPATH, free_slowpath);
9421	STAT_ATTR(FREE_FROZEN, free_frozen);
9422	STAT_ATTR(FREE_ADD_PARTIAL, free_add_partial);
9423	STAT_ATTR(FREE_REMOVE_PARTIAL, free_remove_partial);
9424	STAT_ATTR(ALLOC_FROM_PARTIAL, alloc_from_partial);
9425	STAT_ATTR(ALLOC_SLAB, alloc_slab);
9426	STAT_ATTR(ALLOC_REFILL, alloc_refill);
9427	STAT_ATTR(ALLOC_NODE_MISMATCH, alloc_node_mismatch);
9428	STAT_ATTR(FREE_SLAB, free_slab);
9429	STAT_ATTR(CPUSLAB_FLUSH, cpuslab_flush);
9430	STAT_ATTR(DEACTIVATE_FULL, deactivate_full);
9431	STAT_ATTR(DEACTIVATE_EMPTY, deactivate_empty);
9432	STAT_ATTR(DEACTIVATE_TO_HEAD, deactivate_to_head);
9433	STAT_ATTR(DEACTIVATE_TO_TAIL, deactivate_to_tail);
9434	STAT_ATTR(DEACTIVATE_REMOTE_FREES, deactivate_remote_frees);
9435	STAT_ATTR(DEACTIVATE_BYPASS, deactivate_bypass);
9436	STAT_ATTR(ORDER_FALLBACK, order_fallback);
9437	STAT_ATTR(CMPXCHG_DOUBLE_CPU_FAIL, cmpxchg_double_cpu_fail);
9438	STAT_ATTR(CMPXCHG_DOUBLE_FAIL, cmpxchg_double_fail);
9439	STAT_ATTR(CPU_PARTIAL_ALLOC, cpu_partial_alloc);
9440	STAT_ATTR(CPU_PARTIAL_FREE, cpu_partial_free);
9441	STAT_ATTR(CPU_PARTIAL_NODE, cpu_partial_node);
9442	STAT_ATTR(CPU_PARTIAL_DRAIN, cpu_partial_drain);
9443	STAT_ATTR(SHEAF_FLUSH, sheaf_flush);
9444	STAT_ATTR(SHEAF_REFILL, sheaf_refill);
9445	STAT_ATTR(SHEAF_ALLOC, sheaf_alloc);
9446	STAT_ATTR(SHEAF_FREE, sheaf_free);
9447	STAT_ATTR(BARN_GET, barn_get);
9448	STAT_ATTR(BARN_GET_FAIL, barn_get_fail);
9449	STAT_ATTR(BARN_PUT, barn_put);
9450	STAT_ATTR(BARN_PUT_FAIL, barn_put_fail);
9451	STAT_ATTR(SHEAF_PREFILL_FAST, sheaf_prefill_fast);
9452	STAT_ATTR(SHEAF_PREFILL_SLOW, sheaf_prefill_slow);
9453	STAT_ATTR(SHEAF_PREFILL_OVERSIZE, sheaf_prefill_oversize);
9454	STAT_ATTR(SHEAF_RETURN_FAST, sheaf_return_fast);
9455	STAT_ATTR(SHEAF_RETURN_SLOW, sheaf_return_slow);
9456	#endif /* CONFIG_SLUB_STATS */
9457
9458	#ifdef CONFIG_KFENCE
9459	static ssize_t skip_kfence_show(struct kmem_cache s, char* *buf)
9460	{
9461	return sysfs_emit(buf, "%d\n", !!(s->flags & SLAB_SKIP_KFENCE));
9462	}
9463
9464	static ssize_t skip_kfence_store(struct kmem_cache *s,
9465	const char *buf, size_t length)
9466	{
9467	int ret = length;
9468
9469	if (buf[`0`] == `'0'`)
9470	s->flags &= ~SLAB_SKIP_KFENCE;
9471	else if (buf[`0`] == `'1'`)
9472	s->flags \|= SLAB_SKIP_KFENCE;
9473	else
9474	ret = -EINVAL;
9475
9476	return ret;
9477	}
9478	SLAB_ATTR(skip_kfence);
9479	#endif
9480
9481	static struct attribute *slab_attrs[] = {
9482	&slab_size_attr.attr,
9483	&object_size_attr.attr,
9484	&objs_per_slab_attr.attr,
9485	&order_attr.attr,
9486	&sheaf_capacity_attr.attr,
9487	&min_partial_attr.attr,
9488	&cpu_partial_attr.attr,
9489	&objects_partial_attr.attr,
9490	&partial_attr.attr,
9491	&cpu_slabs_attr.attr,
9492	&ctor_attr.attr,
9493	&aliases_attr.attr,
9494	&align_attr.attr,
9495	&hwcache_align_attr.attr,
9496	&reclaim_account_attr.attr,
9497	&destroy_by_rcu_attr.attr,
9498	&shrink_attr.attr,
9499	&slabs_cpu_partial_attr.attr,
9500	#ifdef CONFIG_SLUB_DEBUG
9501	&total_objects_attr.attr,
9502	&objects_attr.attr,
9503	&slabs_attr.attr,
9504	&sanity_checks_attr.attr,
9505	&trace_attr.attr,
9506	&red_zone_attr.attr,
9507	&poison_attr.attr,
9508	&store_user_attr.attr,
9509	&validate_attr.attr,
9510	#endif
9511	#ifdef CONFIG_ZONE_DMA
9512	&cache_dma_attr.attr,
9513	#endif
9514	#ifdef CONFIG_NUMA
9515	&remote_node_defrag_ratio_attr.attr,
9516	#endif
9517	#ifdef CONFIG_SLUB_STATS
9518	&alloc_cpu_sheaf_attr.attr,
9519	&alloc_fastpath_attr.attr,
9520	&alloc_slowpath_attr.attr,
9521	&free_cpu_sheaf_attr.attr,
9522	&free_rcu_sheaf_attr.attr,
9523	&free_rcu_sheaf_fail_attr.attr,
9524	&free_fastpath_attr.attr,
9525	&free_slowpath_attr.attr,
9526	&free_frozen_attr.attr,
9527	&free_add_partial_attr.attr,
9528	&free_remove_partial_attr.attr,
9529	&alloc_from_partial_attr.attr,
9530	&alloc_slab_attr.attr,
9531	&alloc_refill_attr.attr,
9532	&alloc_node_mismatch_attr.attr,
9533	&free_slab_attr.attr,
9534	&cpuslab_flush_attr.attr,
9535	&deactivate_full_attr.attr,
9536	&deactivate_empty_attr.attr,
9537	&deactivate_to_head_attr.attr,
9538	&deactivate_to_tail_attr.attr,
9539	&deactivate_remote_frees_attr.attr,
9540	&deactivate_bypass_attr.attr,
9541	&order_fallback_attr.attr,
9542	&cmpxchg_double_fail_attr.attr,
9543	&cmpxchg_double_cpu_fail_attr.attr,
9544	&cpu_partial_alloc_attr.attr,
9545	&cpu_partial_free_attr.attr,
9546	&cpu_partial_node_attr.attr,
9547	&cpu_partial_drain_attr.attr,
9548	&sheaf_flush_attr.attr,
9549	&sheaf_refill_attr.attr,
9550	&sheaf_alloc_attr.attr,
9551	&sheaf_free_attr.attr,
9552	&barn_get_attr.attr,
9553	&barn_get_fail_attr.attr,
9554	&barn_put_attr.attr,
9555	&barn_put_fail_attr.attr,
9556	&sheaf_prefill_fast_attr.attr,
9557	&sheaf_prefill_slow_attr.attr,
9558	&sheaf_prefill_oversize_attr.attr,
9559	&sheaf_return_fast_attr.attr,
9560	&sheaf_return_slow_attr.attr,
9561	#endif
9562	#ifdef CONFIG_FAILSLAB
9563	&failslab_attr.attr,
9564	#endif
9565	#ifdef CONFIG_HARDENED_USERCOPY
9566	&usersize_attr.attr,
9567	#endif
9568	#ifdef CONFIG_KFENCE
9569	&skip_kfence_attr.attr,
9570	#endif
9571
9572	NULL
9573	};
9574
9575	static const struct attribute_group slab_attr_group = {
9576	.attrs = slab_attrs,
9577	};
9578
9579	static ssize_t slab_attr_show(struct kobject *kobj,
9580	struct attribute *attr,
9581	char *buf)
9582	{
9583	struct slab_attribute *attribute;
9584	struct kmem_cache *s;
9585
9586	attribute = to_slab_attr(attr);
9587	s = to_slab(kobj);
9588
9589	if (!attribute->show)
9590	return -EIO;
9591
9592	return attribute->show(s, buf);
9593	}
9594
9595	static ssize_t slab_attr_store(struct kobject *kobj,
9596	struct attribute *attr,
9597	const char *buf, size_t len)
9598	{
9599	struct slab_attribute *attribute;
9600	struct kmem_cache *s;
9601
9602	attribute = to_slab_attr(attr);
9603	s = to_slab(kobj);
9604
9605	if (!attribute->store)
9606	return -EIO;
9607
9608	return attribute->store(s, buf, len);
9609	}
9610
9611	static void kmem_cache_release(struct kobject *k)
9612	{
9613	slab_kmem_cache_release(to_slab(k));
9614	}
9615
9616	static const struct sysfs_ops slab_sysfs_ops = {
9617	.show = slab_attr_show,
9618	.store = slab_attr_store,
9619	};
9620
9621	static const struct kobj_type slab_ktype = {
9622	.sysfs_ops = &slab_sysfs_ops,
9623	.release = kmem_cache_release,
9624	};
9625
9626	static struct kset *slab_kset;
9627
9628	static inline struct kset cache_kset(struct* kmem_cache *s)
9629	{
9630	return slab_kset;
9631	}
9632
9633	#define ID_STR_LENGTH 32
9634
9635	/ Create a unique string id for a slab cache:*
9636	*
9637	* Format :[flags-]size
9638	*/
9639	static char create_unique_id(struct* kmem_cache *s)
9640	{
9641	char *name = kmalloc(ID_STR_LENGTH, GFP_KERNEL);
9642	char *p = name;
9643
9644	if (!name)
9645	return ERR_PTR(error: -ENOMEM);
9646
9647	*p++ = `':'`;
9648	/*
9649	* First flags affecting slabcache operations. We will only
9650	* get here for aliasable slabs so we do not need to support
9651	* too many flags. The flags here must cover all flags that
9652	* are matched during merging to guarantee that the id is
9653	* unique.
9654	*/
9655	if (s->flags & SLAB_CACHE_DMA)
9656	*p++ = `'d'`;
9657	if (s->flags & SLAB_CACHE_DMA32)
9658	*p++ = `'D'`;
9659	if (s->flags & SLAB_RECLAIM_ACCOUNT)
9660	*p++ = `'a'`;
9661	if (s->flags & SLAB_CONSISTENCY_CHECKS)
9662	*p++ = `'F'`;
9663	if (s->flags & SLAB_ACCOUNT)
9664	*p++ = `'A'`;
9665	if (p != name + `1`)
9666	*p++ = `'-'`;
9667	p += snprintf(buf: p, ID_STR_LENGTH - (p - name), fmt: "%07u", s->size);
9668
9669	if (WARN_ON(p > name + ID_STR_LENGTH - `1`)) {
9670	kfree(name);
9671	return ERR_PTR(error: -EINVAL);
9672	}
9673	kmsan_unpoison_memory(address: name, size: p - name);
9674	return name;
9675	}
9676
9677	static int sysfs_slab_add(struct kmem_cache *s)
9678	{
9679	int err;
9680	const char *name;
9681	struct kset *kset = cache_kset(s);
9682	int unmergeable = slab_unmergeable(s);
9683
9684	if (!unmergeable && disable_higher_order_debug &&
9685	(slub_debug & DEBUG_METADATA_FLAGS))
9686	unmergeable = `1`;
9687
9688	if (unmergeable) {
9689	/*
9690	* Slabcache can never be merged so we can use the name proper.
9691	* This is typically the case for debug situations. In that
9692	* case we can catch duplicate names easily.
9693	*/
9694	sysfs_remove_link(kobj: &slab_kset->kobj, name: s->name);
9695	name = s->name;
9696	} else {
9697	/*
9698	* Create a unique name for the slab as a target
9699	* for the symlinks.
9700	*/
9701	name = create_unique_id(s);
9702	if (IS_ERR(ptr: name))
9703	return PTR_ERR(ptr: name);
9704	}
9705
9706	s->kobj.kset = kset;
9707	err = kobject_init_and_add(kobj: &s->kobj, ktype: &slab_ktype, NULL, fmt: "%s", name);
9708	if (err)
9709	goto out;
9710
9711	err = sysfs_create_group(kobj: &s->kobj, grp: &slab_attr_group);
9712	if (err)
9713	goto out_del_kobj;
9714
9715	if (!unmergeable) {
9716	/ Setup first alias /
9717	sysfs_slab_alias(s, s->name);
9718	}
9719	out:
9720	if (!unmergeable)
9721	kfree(name);
9722	return err;
9723	out_del_kobj:
9724	kobject_del(kobj: &s->kobj);
9725	goto out;
9726	}
9727
9728	void sysfs_slab_unlink(struct kmem_cache *s)
9729	{
9730	if (s->kobj.state_in_sysfs)
9731	kobject_del(kobj: &s->kobj);
9732	}
9733
9734	void sysfs_slab_release(struct kmem_cache *s)
9735	{
9736	kobject_put(kobj: &s->kobj);
9737	}
9738
9739	/*
9740	* Need to buffer aliases during bootup until sysfs becomes
9741	* available lest we lose that information.
9742	*/
9743	struct saved_alias {
9744	struct kmem_cache *s;
9745	const char *name;
9746	struct saved_alias *next;
9747	};
9748
9749	static struct saved_alias *alias_list;
9750
9751	static int sysfs_slab_alias(struct kmem_cache s, const* char *name)
9752	{
9753	struct saved_alias *al;
9754
9755	if (slab_state == FULL) {
9756	/*
9757	* If we have a leftover link then remove it.
9758	*/
9759	sysfs_remove_link(kobj: &slab_kset->kobj, name);
9760	/*
9761	* The original cache may have failed to generate sysfs file.
9762	* In that case, sysfs_create_link() returns -ENOENT and
9763	* symbolic link creation is skipped.
9764	*/
9765	return sysfs_create_link(kobj: &slab_kset->kobj, target: &s->kobj, name);
9766	}
9767
9768	al = kmalloc(sizeof(struct saved_alias), GFP_KERNEL);
9769	if (!al)
9770	return -ENOMEM;
9771
9772	al->s = s;
9773	al->name = name;
9774	al->next = alias_list;
9775	alias_list = al;
9776	kmsan_unpoison_memory(address: al, size: sizeof(*al));
9777	return `0`;
9778	}
9779
9780	static int __init slab_sysfs_init(void)
9781	{
9782	struct kmem_cache *s;
9783	int err;
9784
9785	mutex_lock(lock: &slab_mutex);
9786
9787	slab_kset = kset_create_and_add(name: "slab", NULL, parent_kobj: kernel_kobj);
9788	if (!slab_kset) {
9789	mutex_unlock(lock: &slab_mutex);
9790	pr_err("Cannot register slab subsystem.\n");
9791	return -ENOMEM;
9792	}
9793
9794	slab_state = FULL;
9795
9796	list_for_each_entry(s, &slab_caches, list) {
9797	err = sysfs_slab_add(s);
9798	if (err)
9799	pr_err("SLUB: Unable to add boot slab %s to sysfs\n",
9800	s->name);
9801	}
9802
9803	while (alias_list) {
9804	struct saved_alias *al = alias_list;
9805
9806	alias_list = alias_list->next;
9807	err = sysfs_slab_alias(s: al->s, name: al->name);
9808	if (err)
9809	pr_err("SLUB: Unable to add boot slab alias %s to sysfs\n",
9810	al->name);
9811	kfree(al);
9812	}
9813
9814	mutex_unlock(lock: &slab_mutex);
9815	return `0`;
9816	}
9817	late_initcall(slab_sysfs_init);
9818	#endif /* SLAB_SUPPORTS_SYSFS */
9819
9820	#if defined(CONFIG_SLUB_DEBUG) && defined(CONFIG_DEBUG_FS)
9821	static int slab_debugfs_show(struct seq_file seq, void* *v)
9822	{
9823	struct loc_track *t = seq->private;
9824	struct location *l;
9825	unsigned long idx;
9826
9827	idx = (unsigned long) t->idx;
9828	if (idx < t->count) {
9829	l = &t->loc[idx];
9830
9831	seq_printf(m: seq, fmt: "%7ld ", l->count);
9832
9833	if (l->addr)
9834	seq_printf(m: seq, fmt: "%pS", (void *)l->addr);
9835	else
9836	seq_puts(m: seq, s: "<not-available>");
9837
9838	if (l->waste)
9839	seq_printf(m: seq, fmt: " waste=%lu/%lu",
9840	l->count * l->waste, l->waste);
9841
9842	if (l->sum_time != l->min_time) {
9843	seq_printf(m: seq, fmt: " age=%ld/%llu/%ld",
9844	l->min_time, div_u64(dividend: l->sum_time, divisor: l->count),
9845	l->max_time);
9846	} else
9847	seq_printf(m: seq, fmt: " age=%ld", l->min_time);
9848
9849	if (l->min_pid != l->max_pid)
9850	seq_printf(m: seq, fmt: " pid=%ld-%ld", l->min_pid, l->max_pid);
9851	else
9852	seq_printf(m: seq, fmt: " pid=%ld",
9853	l->min_pid);
9854
9855	if (num_online_cpus() > `1` && !cpumask_empty(to_cpumask(l->cpus)))
9856	seq_printf(m: seq, fmt: " cpus=%*pbl",
9857	cpumask_pr_args(to_cpumask(l->cpus)));
9858
9859	if (nr_online_nodes > `1` && !nodes_empty(l->nodes))
9860	seq_printf(m: seq, fmt: " nodes=%*pbl",
9861	nodemask_pr_args(&l->nodes));
9862
9863	#ifdef CONFIG_STACKDEPOT
9864	{
9865	depot_stack_handle_t handle;
9866	unsigned long *entries;
9867	unsigned int nr_entries, j;
9868
9869	handle = READ_ONCE(l->handle);
9870	if (handle) {
9871	nr_entries = stack_depot_fetch(handle, entries: &entries);
9872	seq_puts(m: seq, s: "\n");
9873	for (j = `0`; j < nr_entries; j++)
9874	seq_printf(m: seq, fmt: " %pS\n", (void *)entries[j]);
9875	}
9876	}
9877	#endif
9878	seq_puts(m: seq, s: "\n");
9879	}
9880
9881	if (!idx && !t->count)
9882	seq_puts(m: seq, s: "No data\n");
9883
9884	return `0`;
9885	}
9886
9887	static void slab_debugfs_stop(struct seq_file seq, void* *v)
9888	{
9889	}
9890
9891	static void slab_debugfs_next(struct* seq_file seq, void* v, loff_t ppos)
9892	{
9893	struct loc_track *t = seq->private;
9894
9895	t->idx = ++(*ppos);
9896	if (*ppos <= t->count)
9897	return ppos;
9898
9899	return NULL;
9900	}
9901
9902	static int cmp_loc_by_count(const void a, const* void *b)
9903	{
9904	struct location loc1 = (struct* location *)a;
9905	struct location loc2 = (struct* location *)b;
9906
9907	return cmp_int(loc2->count, loc1->count);
9908	}
9909
9910	static void slab_debugfs_start(struct* seq_file seq, loff_t ppos)
9911	{
9912	struct loc_track *t = seq->private;
9913
9914	t->idx = *ppos;
9915	return ppos;
9916	}
9917
9918	static const struct seq_operations slab_debugfs_sops = {
9919	.start = slab_debugfs_start,
9920	.next = slab_debugfs_next,
9921	.stop = slab_debugfs_stop,
9922	.show = slab_debugfs_show,
9923	};
9924
9925	static int slab_debug_trace_open(struct inode inode, struct* file *filep)
9926	{
9927
9928	struct kmem_cache_node *n;
9929	enum track_item alloc;
9930	int node;
9931	struct loc_track *t = __seq_open_private(filep, &slab_debugfs_sops,
9932	sizeof(struct loc_track));
9933	struct kmem_cache *s = file_inode(f: filep)->i_private;
9934	unsigned long *obj_map;
9935
9936	if (!t)
9937	return -ENOMEM;
9938
9939	obj_map = bitmap_alloc(nbits: oo_objects(x: s->oo), GFP_KERNEL);
9940	if (!obj_map) {
9941	seq_release_private(inode, filep);
9942	return -ENOMEM;
9943	}
9944
9945	alloc = debugfs_get_aux_num(filep);
9946
9947	if (!alloc_loc_track(t, PAGE_SIZE / sizeof(struct location), GFP_KERNEL)) {
9948	bitmap_free(bitmap: obj_map);
9949	seq_release_private(inode, filep);
9950	return -ENOMEM;
9951	}
9952
9953	for_each_kmem_cache_node(s, node, n) {
9954	unsigned long flags;
9955	struct slab *slab;
9956
9957	if (!node_nr_slabs(n))
9958	continue;
9959
9960	spin_lock_irqsave(&n->list_lock, flags);
9961	list_for_each_entry(slab, &n->partial, slab_list)
9962	process_slab(t, s, slab, alloc, obj_map);
9963	list_for_each_entry(slab, &n->full, slab_list)
9964	process_slab(t, s, slab, alloc, obj_map);
9965	spin_unlock_irqrestore(lock: &n->list_lock, flags);
9966	}
9967
9968	/ Sort locations by count /
9969	sort(base: t->loc, num: t->count, size: sizeof(struct location),
9970	cmp_func: cmp_loc_by_count, NULL);
9971
9972	bitmap_free(bitmap: obj_map);
9973	return `0`;
9974	}
9975
9976	static int slab_debug_trace_release(struct inode inode, struct* file *file)
9977	{
9978	struct seq_file *seq = file->private_data;
9979	struct loc_track *t = seq->private;
9980
9981	free_loc_track(t);
9982	return seq_release_private(inode, file);
9983	}
9984
9985	static const struct file_operations slab_debugfs_fops = {
9986	.open = slab_debug_trace_open,
9987	.read = seq_read,
9988	.llseek = seq_lseek,
9989	.release = slab_debug_trace_release,
9990	};
9991
9992	static void debugfs_slab_add(struct kmem_cache *s)
9993	{
9994	struct dentry *slab_cache_dir;
9995
9996	if (unlikely(!slab_debugfs_root))
9997	return;
9998
9999	slab_cache_dir = debugfs_create_dir(name: s->name, parent: slab_debugfs_root);
10000
10001	debugfs_create_file_aux_num("alloc_traces", `0400`, slab_cache_dir, s,
10002	TRACK_ALLOC, &slab_debugfs_fops);
10003
10004	debugfs_create_file_aux_num("free_traces", `0400`, slab_cache_dir, s,
10005	TRACK_FREE, &slab_debugfs_fops);
10006	}
10007
10008	void debugfs_slab_release(struct kmem_cache *s)
10009	{
10010	debugfs_lookup_and_remove(name: s->name, parent: slab_debugfs_root);
10011	}
10012
10013	static int __init slab_debugfs_init(void)
10014	{
10015	struct kmem_cache *s;
10016
10017	slab_debugfs_root = debugfs_create_dir(name: "slab", NULL);
10018
10019	list_for_each_entry(s, &slab_caches, list)
10020	if (s->flags & SLAB_STORE_USER)
10021	debugfs_slab_add(s);
10022
10023	return `0`;
10024
10025	}
10026	__initcall(slab_debugfs_init);
10027	#endif
10028	/*
10029	* The /proc/slabinfo ABI
10030	*/
10031	#ifdef CONFIG_SLUB_DEBUG
10032	void get_slabinfo(struct kmem_cache s, struct* slabinfo *sinfo)
10033	{
10034	unsigned long nr_slabs = `0`;
10035	unsigned long nr_objs = `0`;
10036	unsigned long nr_free = `0`;
10037	int node;
10038	struct kmem_cache_node *n;
10039
10040	for_each_kmem_cache_node(s, node, n) {
10041	nr_slabs += node_nr_slabs(n);
10042	nr_objs += node_nr_objs(n);
10043	nr_free += count_partial_free_approx(n);
10044	}
10045
10046	sinfo->active_objs = nr_objs - nr_free;
10047	sinfo->num_objs = nr_objs;
10048	sinfo->active_slabs = nr_slabs;
10049	sinfo->num_slabs = nr_slabs;
10050	sinfo->objects_per_slab = oo_objects(x: s->oo);
10051	sinfo->cache_order = oo_order(x: s->oo);
10052	}
10053	#endif /* CONFIG_SLUB_DEBUG */
10054

Browse the source code of Linux/mm/slub.c