mm.h source code [Linux/include/linux/mm.h]

1	/ SPDX-License-Identifier: GPL-2.0 /
2	#ifndef _LINUX_MM_H
3	#define _LINUX_MM_H
4
5	#include <linux/errno.h>
6	#include <linux/mmdebug.h>
7	#include <linux/gfp.h>
8	#include <linux/pgalloc_tag.h>
9	#include <linux/bug.h>
10	#include <linux/list.h>
11	#include <linux/mmzone.h>
12	#include <linux/rbtree.h>
13	#include <linux/atomic.h>
14	#include <linux/debug_locks.h>
15	#include <linux/compiler.h>
16	#include <linux/mm_types.h>
17	#include <linux/mmap_lock.h>
18	#include <linux/range.h>
19	#include <linux/pfn.h>
20	#include <linux/percpu-refcount.h>
21	#include <linux/bit_spinlock.h>
22	#include <linux/shrinker.h>
23	#include <linux/resource.h>
24	#include <linux/page_ext.h>
25	#include <linux/err.h>
26	#include <linux/page-flags.h>
27	#include <linux/page_ref.h>
28	#include <linux/overflow.h>
29	#include <linux/sizes.h>
30	#include <linux/sched.h>
31	#include <linux/pgtable.h>
32	#include <linux/kasan.h>
33	#include <linux/memremap.h>
34	#include <linux/slab.h>
35	#include <linux/cacheinfo.h>
36	#include <linux/rcuwait.h>
37	#include <linux/bitmap.h>
38	#include <linux/bitops.h>
39
40	struct mempolicy;
41	struct anon_vma;
42	struct anon_vma_chain;
43	struct user_struct;
44	struct pt_regs;
45	struct folio_batch;
46
47	void arch_mm_preinit(void);
48	void mm_core_init(void);
49	void init_mm_internals(void);
50
51	extern atomic_long_t _totalram_pages;
52	static inline unsigned long totalram_pages(void)
53	{
54	return (unsigned long)atomic_long_read(v: &_totalram_pages);
55	}
56
57	static inline void totalram_pages_inc(void)
58	{
59	atomic_long_inc(v: &_totalram_pages);
60	}
61
62	static inline void totalram_pages_dec(void)
63	{
64	atomic_long_dec(v: &_totalram_pages);
65	}
66
67	static inline void totalram_pages_add(long count)
68	{
69	atomic_long_add(i: count, v: &_totalram_pages);
70	}
71
72	extern void * high_memory;
73
74	/*
75	* Convert between pages and MB
76	* 20 is the shift for 1MB (2^20 = 1MB)
77	* PAGE_SHIFT is the shift for page size (e.g., 12 for 4KB pages)
78	* So (20 - PAGE_SHIFT) converts between pages and MB
79	*/
80	#define PAGES_TO_MB(pages) ((pages) >> (20 - PAGE_SHIFT))
81	#define MB_TO_PAGES(mb) ((mb) << (20 - PAGE_SHIFT))
82
83	#ifdef CONFIG_SYSCTL
84	extern int sysctl_legacy_va_layout;
85	#else
86	#define sysctl_legacy_va_layout 0
87	#endif
88
89	#ifdef CONFIG_HAVE_ARCH_MMAP_RND_BITS
90	extern const int mmap_rnd_bits_min;
91	extern int mmap_rnd_bits_max __ro_after_init;
92	extern int mmap_rnd_bits __read_mostly;
93	#endif
94	#ifdef CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS
95	extern const int mmap_rnd_compat_bits_min;
96	extern const int mmap_rnd_compat_bits_max;
97	extern int mmap_rnd_compat_bits __read_mostly;
98	#endif
99
100	#ifndef DIRECT_MAP_PHYSMEM_END
101	# ifdef MAX_PHYSMEM_BITS
102	# define DIRECT_MAP_PHYSMEM_END ((1ULL << MAX_PHYSMEM_BITS) - 1)
103	# else
104	# define DIRECT_MAP_PHYSMEM_END (((phys_addr_t)-1)&~(1ULL<<63))
105	# endif
106	#endif
107
108	#include <asm/page.h>
109	#include <asm/processor.h>
110
111	#ifndef __pa_symbol
112	#define __pa_symbol(x) __pa(RELOC_HIDE((unsigned long)(x), 0))
113	#endif
114
115	#ifndef page_to_virt
116	#define page_to_virt(x) __va(PFN_PHYS(page_to_pfn(x)))
117	#endif
118
119	#ifndef lm_alias
120	#define lm_alias(x) __va(__pa_symbol(x))
121	#endif
122
123	/*
124	* To prevent common memory management code establishing
125	* a zero page mapping on a read fault.
126	* This macro should be defined within <asm/pgtable.h>.
127	* s390 does this to prevent multiplexing of hardware bits
128	* related to the physical page in case of virtualization.
129	*/
130	#ifndef mm_forbids_zeropage
131	#define mm_forbids_zeropage(X) (0)
132	#endif
133
134	/*
135	* On some architectures it is expensive to call memset() for small sizes.
136	* If an architecture decides to implement their own version of
137	* mm_zero_struct_page they should wrap the defines below in a #ifndef and
138	* define their own version of this macro in <asm/pgtable.h>
139	*/
140	#if BITS_PER_LONG == 64
141	/ This function must be updated when the size of struct page grows above 96*
142	* or reduces below 56. The idea that compiler optimizes out switch()
143	* statement, and only leaves move/store instructions. Also the compiler can
144	* combine write statements if they are both assignments and can be reordered,
145	* this can result in several of the writes here being dropped.
146	*/
147	#define mm_zero_struct_page(pp) __mm_zero_struct_page(pp)
148	static inline void __mm_zero_struct_page(struct page *page)
149	{
150	unsigned long _pp = (void* *)page;
151
152	/ Check that struct page is either 56, 64, 72, 80, 88 or 96 bytes /
153	BUILD_BUG_ON(sizeof(struct page) & `7`);
154	BUILD_BUG_ON(sizeof(struct page) < `56`);
155	BUILD_BUG_ON(sizeof(struct page) > `96`);
156
157	switch (sizeof(struct page)) {
158	case `96`:
159	_pp[`11`] = `0`;
160	fallthrough;
161	case `88`:
162	_pp[`10`] = `0`;
163	fallthrough;
164	case `80`:
165	_pp[`9`] = `0`;
166	fallthrough;
167	case `72`:
168	_pp[`8`] = `0`;
169	fallthrough;
170	case `64`:
171	_pp[`7`] = `0`;
172	fallthrough;
173	case `56`:
174	_pp[`6`] = `0`;
175	_pp[`5`] = `0`;
176	_pp[`4`] = `0`;
177	_pp[`3`] = `0`;
178	_pp[`2`] = `0`;
179	_pp[`1`] = `0`;
180	_pp[`0`] = `0`;
181	}
182	}
183	#else
184	#define mm_zero_struct_page(pp) ((void)memset((pp), 0, sizeof(struct page)))
185	#endif
186
187	/*
188	* Default maximum number of active map areas, this limits the number of vmas
189	* per mm struct. Users can overwrite this number by sysctl but there is a
190	* problem.
191	*
192	* When a program's coredump is generated as ELF format, a section is created
193	* per a vma. In ELF, the number of sections is represented in unsigned short.
194	* This means the number of sections should be smaller than 65535 at coredump.
195	* Because the kernel adds some informative sections to a image of program at
196	* generating coredump, we need some margin. The number of extra sections is
197	* 1-3 now and depends on arch. We use "5" as safe margin, here.
198	*
199	* ELF extended numbering allows more than 65535 sections, so 16-bit bound is
200	* not a hard limit any more. Although some userspace tools can be surprised by
201	* that.
202	*/
203	#define MAPCOUNT_ELF_CORE_MARGIN (5)
204	#define DEFAULT_MAX_MAP_COUNT (USHRT_MAX - MAPCOUNT_ELF_CORE_MARGIN)
205
206	extern int sysctl_max_map_count;
207
208	extern unsigned long sysctl_user_reserve_kbytes;
209	extern unsigned long sysctl_admin_reserve_kbytes;
210
211	#if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP)
212	bool page_range_contiguous(const struct page page, unsigned* long nr_pages);
213	#else
214	static inline bool page_range_contiguous(const struct page *page,
215	unsigned long nr_pages)
216	{
217	return true;
218	}
219	#endif
220
221	/ to align the pointer to the (next) page boundary /
222	#define PAGE_ALIGN(addr) ALIGN(addr, PAGE_SIZE)
223
224	/ to align the pointer to the (prev) page boundary /
225	#define PAGE_ALIGN_DOWN(addr) ALIGN_DOWN(addr, PAGE_SIZE)
226
227	/ test whether an address (unsigned long or pointer) is aligned to PAGE_SIZE /
228	#define PAGE_ALIGNED(addr) IS_ALIGNED((unsigned long)(addr), PAGE_SIZE)
229
230	/**
231	* folio_page_idx - Return the number of a page in a folio.
232	* @folio: The folio.
233	* @page: The folio page.
234	*
235	* This function expects that the page is actually part of the folio.
236	* The returned number is relative to the start of the folio.
237	*/
238	static inline unsigned long folio_page_idx(const struct folio *folio,
239	const struct page *page)
240	{
241	return page - &folio->page;
242	}
243
244	static inline struct folio lru_to_folio(struct* list_head *head)
245	{
246	return list_entry((head)->prev, struct folio, lru);
247	}
248
249	void setup_initial_init_mm(void start_code, void* *end_code,
250	void end_data, void* *brk);
251
252	/*
253	* Linux kernel virtual memory manager primitives.
254	* The idea being to have a "virtual" mm in the same way
255	* we have a virtual fs - giving a cleaner interface to the
256	* mm details, and allowing different kinds of memory mappings
257	* (from shared memory to executable loading to arbitrary
258	* mmap() functions).
259	*/
260
261	struct vm_area_struct vm_area_alloc(struct* mm_struct *);
262	struct vm_area_struct vm_area_dup(struct* vm_area_struct *);
263	void vm_area_free(struct vm_area_struct *);
264
265	#ifndef CONFIG_MMU
266	extern struct rb_root nommu_region_tree;
267	extern struct rw_semaphore nommu_region_sem;
268
269	extern unsigned int kobjsize(const void *objp);
270	#endif
271
272	/*
273	* vm_flags in vm_area_struct, see mm_types.h.
274	* When changing, update also include/trace/events/mmflags.h
275	*/
276	#define VM_NONE 0x00000000
277
278	#define VM_READ 0x00000001 /* currently active flags */
279	#define VM_WRITE 0x00000002
280	#define VM_EXEC 0x00000004
281	#define VM_SHARED 0x00000008
282
283	/ mprotect() hardcodes VM_MAYREAD >> 4 == VM_READ, and so for r/w/x bits. /
284	#define VM_MAYREAD 0x00000010 /* limits for mprotect() etc */
285	#define VM_MAYWRITE 0x00000020
286	#define VM_MAYEXEC 0x00000040
287	#define VM_MAYSHARE 0x00000080
288
289	#define VM_GROWSDOWN 0x00000100 /* general info on the segment */
290	#ifdef CONFIG_MMU
291	#define VM_UFFD_MISSING 0x00000200 /* missing pages tracking */
292	#else /* CONFIG_MMU */
293	#define VM_MAYOVERLAY 0x00000200 /* nommu: R/O MAP_PRIVATE mapping that might overlay a file mapping */
294	#define VM_UFFD_MISSING 0
295	#endif /* CONFIG_MMU */
296	#define VM_PFNMAP 0x00000400 /* Page-ranges managed without "struct page", just pure PFN */
297	#define VM_UFFD_WP 0x00001000 /* wrprotect pages tracking */
298
299	#define VM_LOCKED 0x00002000
300	#define VM_IO 0x00004000 /* Memory mapped I/O or similar */
301
302	/ Used by sys_madvise() /
303	#define VM_SEQ_READ 0x00008000 /* App will access data sequentially */
304	#define VM_RAND_READ 0x00010000 /* App will not benefit from clustered reads */
305
306	#define VM_DONTCOPY 0x00020000 /* Do not copy this vma on fork */
307	#define VM_DONTEXPAND 0x00040000 /* Cannot expand with mremap() */
308	#define VM_LOCKONFAULT 0x00080000 /* Lock the pages covered when they are faulted in */
309	#define VM_ACCOUNT 0x00100000 /* Is a VM accounted object */
310	#define VM_NORESERVE 0x00200000 /* should the VM suppress accounting */
311	#define VM_HUGETLB 0x00400000 /* Huge TLB Page VM */
312	#define VM_SYNC 0x00800000 /* Synchronous page faults */
313	#define VM_ARCH_1 0x01000000 /* Architecture-specific flag */
314	#define VM_WIPEONFORK 0x02000000 /* Wipe VMA contents in child. */
315	#define VM_DONTDUMP 0x04000000 /* Do not include in the core dump */
316
317	#ifdef CONFIG_MEM_SOFT_DIRTY
318	# define VM_SOFTDIRTY 0x08000000 /* Not soft dirty clean area */
319	#else
320	# define VM_SOFTDIRTY 0
321	#endif
322
323	#define VM_MIXEDMAP 0x10000000 /* Can contain "struct page" and pure PFN pages */
324	#define VM_HUGEPAGE 0x20000000 /* MADV_HUGEPAGE marked this vma */
325	#define VM_NOHUGEPAGE 0x40000000 /* MADV_NOHUGEPAGE marked this vma */
326	#define VM_MERGEABLE BIT(31) /* KSM may merge identical pages */
327
328	#ifdef CONFIG_ARCH_USES_HIGH_VMA_FLAGS
329	#define VM_HIGH_ARCH_BIT_0 32 /* bit only usable on 64-bit architectures */
330	#define VM_HIGH_ARCH_BIT_1 33 /* bit only usable on 64-bit architectures */
331	#define VM_HIGH_ARCH_BIT_2 34 /* bit only usable on 64-bit architectures */
332	#define VM_HIGH_ARCH_BIT_3 35 /* bit only usable on 64-bit architectures */
333	#define VM_HIGH_ARCH_BIT_4 36 /* bit only usable on 64-bit architectures */
334	#define VM_HIGH_ARCH_BIT_5 37 /* bit only usable on 64-bit architectures */
335	#define VM_HIGH_ARCH_BIT_6 38 /* bit only usable on 64-bit architectures */
336	#define VM_HIGH_ARCH_0 BIT(VM_HIGH_ARCH_BIT_0)
337	#define VM_HIGH_ARCH_1 BIT(VM_HIGH_ARCH_BIT_1)
338	#define VM_HIGH_ARCH_2 BIT(VM_HIGH_ARCH_BIT_2)
339	#define VM_HIGH_ARCH_3 BIT(VM_HIGH_ARCH_BIT_3)
340	#define VM_HIGH_ARCH_4 BIT(VM_HIGH_ARCH_BIT_4)
341	#define VM_HIGH_ARCH_5 BIT(VM_HIGH_ARCH_BIT_5)
342	#define VM_HIGH_ARCH_6 BIT(VM_HIGH_ARCH_BIT_6)
343	#endif /* CONFIG_ARCH_USES_HIGH_VMA_FLAGS */
344
345	#ifdef CONFIG_ARCH_HAS_PKEYS
346	# define VM_PKEY_SHIFT VM_HIGH_ARCH_BIT_0
347	# define VM_PKEY_BIT0 VM_HIGH_ARCH_0
348	# define VM_PKEY_BIT1 VM_HIGH_ARCH_1
349	# define VM_PKEY_BIT2 VM_HIGH_ARCH_2
350	#if CONFIG_ARCH_PKEY_BITS > 3
351	# define VM_PKEY_BIT3 VM_HIGH_ARCH_3
352	#else
353	# define VM_PKEY_BIT3 0
354	#endif
355	#if CONFIG_ARCH_PKEY_BITS > 4
356	# define VM_PKEY_BIT4 VM_HIGH_ARCH_4
357	#else
358	# define VM_PKEY_BIT4 0
359	#endif
360	#endif /* CONFIG_ARCH_HAS_PKEYS */
361
362	#ifdef CONFIG_X86_USER_SHADOW_STACK
363	/*
364	* VM_SHADOW_STACK should not be set with VM_SHARED because of lack of
365	* support core mm.
366	*
367	* These VMAs will get a single end guard page. This helps userspace protect
368	* itself from attacks. A single page is enough for current shadow stack archs
369	* (x86). See the comments near alloc_shstk() in arch/x86/kernel/shstk.c
370	* for more details on the guard size.
371	*/
372	# define VM_SHADOW_STACK VM_HIGH_ARCH_5
373	#endif
374
375	#if defined(CONFIG_ARM64_GCS)
376	/*
377	* arm64's Guarded Control Stack implements similar functionality and
378	* has similar constraints to shadow stacks.
379	*/
380	# define VM_SHADOW_STACK VM_HIGH_ARCH_6
381	#endif
382
383	#ifndef VM_SHADOW_STACK
384	# define VM_SHADOW_STACK VM_NONE
385	#endif
386
387	#if defined(CONFIG_PPC64)
388	# define VM_SAO VM_ARCH_1 /* Strong Access Ordering (powerpc) */
389	#elif defined(CONFIG_PARISC)
390	# define VM_GROWSUP VM_ARCH_1
391	#elif defined(CONFIG_SPARC64)
392	# define VM_SPARC_ADI VM_ARCH_1 /* Uses ADI tag for access control */
393	# define VM_ARCH_CLEAR VM_SPARC_ADI
394	#elif defined(CONFIG_ARM64)
395	# define VM_ARM64_BTI VM_ARCH_1 /* BTI guarded page, a.k.a. GP bit */
396	# define VM_ARCH_CLEAR VM_ARM64_BTI
397	#elif !defined(CONFIG_MMU)
398	# define VM_MAPPED_COPY VM_ARCH_1 /* T if mapped copy of data (nommu mmap) */
399	#endif
400
401	#if defined(CONFIG_ARM64_MTE)
402	# define VM_MTE VM_HIGH_ARCH_4 /* Use Tagged memory for access control */
403	# define VM_MTE_ALLOWED VM_HIGH_ARCH_5 /* Tagged memory permitted */
404	#else
405	# define VM_MTE VM_NONE
406	# define VM_MTE_ALLOWED VM_NONE
407	#endif
408
409	#ifndef VM_GROWSUP
410	# define VM_GROWSUP VM_NONE
411	#endif
412
413	#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_MINOR
414	# define VM_UFFD_MINOR_BIT 41
415	# define VM_UFFD_MINOR BIT(VM_UFFD_MINOR_BIT) /* UFFD minor faults */
416	#else /* !CONFIG_HAVE_ARCH_USERFAULTFD_MINOR */
417	# define VM_UFFD_MINOR VM_NONE
418	#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_MINOR */
419
420	/*
421	* This flag is used to connect VFIO to arch specific KVM code. It
422	* indicates that the memory under this VMA is safe for use with any
423	* non-cachable memory type inside KVM. Some VFIO devices, on some
424	* platforms, are thought to be unsafe and can cause machine crashes
425	* if KVM does not lock down the memory type.
426	*/
427	#ifdef CONFIG_64BIT
428	#define VM_ALLOW_ANY_UNCACHED_BIT 39
429	#define VM_ALLOW_ANY_UNCACHED BIT(VM_ALLOW_ANY_UNCACHED_BIT)
430	#else
431	#define VM_ALLOW_ANY_UNCACHED VM_NONE
432	#endif
433
434	#ifdef CONFIG_64BIT
435	#define VM_DROPPABLE_BIT 40
436	#define VM_DROPPABLE BIT(VM_DROPPABLE_BIT)
437	#elif defined(CONFIG_PPC32)
438	#define VM_DROPPABLE VM_ARCH_1
439	#else
440	#define VM_DROPPABLE VM_NONE
441	#endif
442
443	#ifdef CONFIG_64BIT
444	#define VM_SEALED_BIT 42
445	#define VM_SEALED BIT(VM_SEALED_BIT)
446	#else
447	#define VM_SEALED VM_NONE
448	#endif
449
450	/ Bits set in the VMA until the stack is in its final location /
451	#define VM_STACK_INCOMPLETE_SETUP (VM_RAND_READ \| VM_SEQ_READ \| VM_STACK_EARLY)
452
453	#define TASK_EXEC ((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0)
454
455	/ Common data flag combinations /
456	#define VM_DATA_FLAGS_TSK_EXEC (VM_READ \| VM_WRITE \| TASK_EXEC \| \
457	VM_MAYREAD \| VM_MAYWRITE \| VM_MAYEXEC)
458	#define VM_DATA_FLAGS_NON_EXEC (VM_READ \| VM_WRITE \| VM_MAYREAD \| \
459	VM_MAYWRITE \| VM_MAYEXEC)
460	#define VM_DATA_FLAGS_EXEC (VM_READ \| VM_WRITE \| VM_EXEC \| \
461	VM_MAYREAD \| VM_MAYWRITE \| VM_MAYEXEC)
462
463	#ifndef VM_DATA_DEFAULT_FLAGS /* arch can override this */
464	#define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_EXEC
465	#endif
466
467	#ifndef VM_STACK_DEFAULT_FLAGS /* arch can override this */
468	#define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS
469	#endif
470
471	#define VM_STARTGAP_FLAGS (VM_GROWSDOWN \| VM_SHADOW_STACK)
472
473	#ifdef CONFIG_STACK_GROWSUP
474	#define VM_STACK VM_GROWSUP
475	#define VM_STACK_EARLY VM_GROWSDOWN
476	#else
477	#define VM_STACK VM_GROWSDOWN
478	#define VM_STACK_EARLY 0
479	#endif
480
481	#define VM_STACK_FLAGS (VM_STACK \| VM_STACK_DEFAULT_FLAGS \| VM_ACCOUNT)
482
483	/ VMA basic access permission flags /
484	#define VM_ACCESS_FLAGS (VM_READ \| VM_WRITE \| VM_EXEC)
485
486
487	/*
488	* Special vmas that are non-mergable, non-mlock()able.
489	*/
490	#define VM_SPECIAL (VM_IO \| VM_DONTEXPAND \| VM_PFNMAP \| VM_MIXEDMAP)
491
492	/ This mask prevents VMA from being scanned with khugepaged /
493	#define VM_NO_KHUGEPAGED (VM_SPECIAL \| VM_HUGETLB)
494
495	/ This mask defines which mm->def_flags a process can inherit its parent /
496	#define VM_INIT_DEF_MASK VM_NOHUGEPAGE
497
498	/ This mask represents all the VMA flag bits used by mlock /
499	#define VM_LOCKED_MASK (VM_LOCKED \| VM_LOCKONFAULT)
500
501	/ Arch-specific flags to clear when updating VM flags on protection change /
502	#ifndef VM_ARCH_CLEAR
503	# define VM_ARCH_CLEAR VM_NONE
504	#endif
505	#define VM_FLAGS_CLEAR (ARCH_VM_PKEY_FLAGS \| VM_ARCH_CLEAR)
506
507	/*
508	* mapping from the currently active vm_flags protection bits (the
509	* low four bits) to a page protection mask..
510	*/
511
512	/*
513	* The default fault flags that should be used by most of the
514	* arch-specific page fault handlers.
515	*/
516	#define FAULT_FLAG_DEFAULT (FAULT_FLAG_ALLOW_RETRY \| \
517	FAULT_FLAG_KILLABLE \| \
518	FAULT_FLAG_INTERRUPTIBLE)
519
520	/**
521	* fault_flag_allow_retry_first - check ALLOW_RETRY the first time
522	* @flags: Fault flags.
523	*
524	* This is mostly used for places where we want to try to avoid taking
525	* the mmap_lock for too long a time when waiting for another condition
526	* to change, in which case we can try to be polite to release the
527	* mmap_lock in the first round to avoid potential starvation of other
528	* processes that would also want the mmap_lock.
529	*
530	* Return: true if the page fault allows retry and this is the first
531	* attempt of the fault handling; false otherwise.
532	*/
533	static inline bool fault_flag_allow_retry_first(enum fault_flag flags)
534	{
535	return (flags & FAULT_FLAG_ALLOW_RETRY) &&
536	(!(flags & FAULT_FLAG_TRIED));
537	}
538
539	#define FAULT_FLAG_TRACE \
540	{ FAULT_FLAG_WRITE, "WRITE" }, \
541	{ FAULT_FLAG_MKWRITE, "MKWRITE" }, \
542	{ FAULT_FLAG_ALLOW_RETRY, "ALLOW_RETRY" }, \
543	{ FAULT_FLAG_RETRY_NOWAIT, "RETRY_NOWAIT" }, \
544	{ FAULT_FLAG_KILLABLE, "KILLABLE" }, \
545	{ FAULT_FLAG_TRIED, "TRIED" }, \
546	{ FAULT_FLAG_USER, "USER" }, \
547	{ FAULT_FLAG_REMOTE, "REMOTE" }, \
548	{ FAULT_FLAG_INSTRUCTION, "INSTRUCTION" }, \
549	{ FAULT_FLAG_INTERRUPTIBLE, "INTERRUPTIBLE" }, \
550	{ FAULT_FLAG_VMA_LOCK, "VMA_LOCK" }
551
552	/*
553	* vm_fault is filled by the pagefault handler and passed to the vma's
554	* ->fault function. The vma's ->fault is responsible for returning a bitmask
555	* of VM_FAULT_xxx flags that give details about how the fault was handled.
556	*
557	* MM layer fills up gfp_mask for page allocations but fault handler might
558	* alter it if its implementation requires a different allocation context.
559	*
560	* pgoff should be used in favour of virtual_address, if possible.
561	*/
562	struct vm_fault {
563	const struct {
564	struct vm_area_struct vma; /* Target VMA /
565	gfp_t gfp_mask; / gfp mask to be used for allocations /
566	pgoff_t pgoff; / Logical page offset based on vma /
567	unsigned long address; / Faulting virtual address - masked /
568	unsigned long real_address; / Faulting virtual address - unmasked /
569	};
570	enum fault_flag flags; / FAULT_FLAG_xxx flags*
571	* XXX: should really be 'const' */
572	pmd_t pmd; /* Pointer to pmd entry matching*
573	* the 'address' */
574	pud_t pud; /* Pointer to pud entry matching*
575	* the 'address'
576	*/
577	union {
578	pte_t orig_pte; / Value of PTE at the time of fault /
579	pmd_t orig_pmd; / Value of PMD at the time of fault,*
580	* used by PMD fault only.
581	*/
582	};
583
584	struct page cow_page; /* Page handler may use for COW fault /
585	struct page page; /* ->fault handlers should return a*
586	* page here, unless VM_FAULT_NOPAGE
587	* is set (which is also implied by
588	* VM_FAULT_ERROR).
589	*/
590	/ These three entries are valid only while holding ptl lock /
591	pte_t pte; /* Pointer to pte entry matching*
592	* the 'address'. NULL if the page
593	* table hasn't been allocated.
594	*/
595	spinlock_t ptl; /* Page table lock.*
596	* Protects pte page table if 'pte'
597	* is not NULL, otherwise pmd.
598	*/
599	pgtable_t prealloc_pte; / Pre-allocated pte page table.*
600	* vm_ops->map_pages() sets up a page
601	* table from atomic context.
602	* do_fault_around() pre-allocates
603	* page table to avoid allocation from
604	* atomic context.
605	*/
606	};
607
608	/*
609	* These are the virtual MM functions - opening of an area, closing and
610	* unmapping it (needed to keep files on disk up-to-date etc), pointer
611	* to the functions called when a no-page or a wp-page exception occurs.
612	*/
613	struct vm_operations_struct {
614	void (open)(struct* vm_area_struct * area);
615	/**
616	* @close: Called when the VMA is being removed from the MM.
617	* Context: User context. May sleep. Caller holds mmap_lock.
618	*/
619	void (close)(struct* vm_area_struct * area);
620	/ Called any time before splitting to check if it's allowed /
621	int (may_split)(struct* vm_area_struct area, unsigned* long addr);
622	int (mremap)(struct* vm_area_struct *area);
623	/*
624	* Called by mprotect() to make driver-specific permission
625	* checks before mprotect() is finalised. The VMA must not
626	* be modified. Returns 0 if mprotect() can proceed.
627	*/
628	int (mprotect)(struct* vm_area_struct vma, unsigned* long start,
629	unsigned long end, unsigned long newflags);
630	vm_fault_t (fault)(struct* vm_fault *vmf);
631	vm_fault_t (huge_fault)(struct* vm_fault vmf, unsigned* int order);
632	vm_fault_t (map_pages)(struct* vm_fault *vmf,
633	pgoff_t start_pgoff, pgoff_t end_pgoff);
634	unsigned long (pagesize)(struct* vm_area_struct * area);
635
636	/ notification that a previously read-only page is about to become*
637	* writable, if an error is returned it will cause a SIGBUS */
638	vm_fault_t (page_mkwrite)(struct* vm_fault *vmf);
639
640	/ same as page_mkwrite when using VM_PFNMAP\|VM_MIXEDMAP /
641	vm_fault_t (pfn_mkwrite)(struct* vm_fault *vmf);
642
643	/ called by access_process_vm when get_user_pages() fails, typically*
644	* for use by special VMAs. See also generic_access_phys() for a generic
645	* implementation useful for any iomem mapping.
646	*/
647	int (access)(struct* vm_area_struct vma, unsigned* long addr,
648	void buf, int* len, int write);
649
650	/ Called by the /proc/PID/maps code to ask the vma whether it*
651	* has a special name. Returning non-NULL will also cause this
652	* vma to be dumped unconditionally. */
653	const char (name)(struct vm_area_struct *vma);
654
655	#ifdef CONFIG_NUMA
656	/*
657	* set_policy() op must add a reference to any non-NULL @new mempolicy
658	* to hold the policy upon return. Caller should pass NULL @new to
659	* remove a policy and fall back to surrounding context--i.e. do not
660	* install a MPOL_DEFAULT policy, nor the task or system default
661	* mempolicy.
662	*/
663	int (set_policy)(struct* vm_area_struct vma, struct* mempolicy *new);
664
665	/*
666	* get_policy() op must add reference [mpol_get()] to any policy at
667	* (vma,addr) marked as MPOL_SHARED. The shared policy infrastructure
668	* in mm/mempolicy.c will do this automatically.
669	* get_policy() must NOT add a ref if the policy at (vma,addr) is not
670	* marked as MPOL_SHARED. vma policies are protected by the mmap_lock.
671	* If no [shared/vma] mempolicy exists at the addr, get_policy() op
672	* must return NULL--i.e., do not "fallback" to task or system default
673	* policy.
674	*/
675	struct mempolicy (get_policy)(struct vm_area_struct *vma,
676	unsigned long addr, pgoff_t *ilx);
677	#endif
678	#ifdef CONFIG_FIND_NORMAL_PAGE
679	/*
680	* Called by vm_normal_page() for special PTEs in @vma at @addr. This
681	* allows for returning a "normal" page from vm_normal_page() even
682	* though the PTE indicates that the "struct page" either does not exist
683	* or should not be touched: "special".
684	*
685	* Do not add new users: this really only works when a "normal" page
686	* was mapped, but then the PTE got changed to something weird (+
687	* marked special) that would not make pte_pfn() identify the originally
688	* inserted page.
689	*/
690	struct page (find_normal_page)(struct vm_area_struct *vma,
691	unsigned long addr);
692	#endif /* CONFIG_FIND_NORMAL_PAGE */
693	};
694
695	#ifdef CONFIG_NUMA_BALANCING
696	static inline void vma_numab_state_init(struct vm_area_struct *vma)
697	{
698	vma->numab_state = NULL;
699	}
700	static inline void vma_numab_state_free(struct vm_area_struct *vma)
701	{
702	kfree(vma->numab_state);
703	}
704	#else
705	static inline void vma_numab_state_init(struct vm_area_struct *vma) {}
706	static inline void vma_numab_state_free(struct vm_area_struct *vma) {}
707	#endif /* CONFIG_NUMA_BALANCING */
708
709	/*
710	* These must be here rather than mmap_lock.h as dependent on vm_fault type,
711	* declared in this header.
712	*/
713	#ifdef CONFIG_PER_VMA_LOCK
714	static inline void release_fault_lock(struct vm_fault *vmf)
715	{
716	if (vmf->flags & FAULT_FLAG_VMA_LOCK)
717	vma_end_read(vma: vmf->vma);
718	else
719	mmap_read_unlock(mm: vmf->vma->vm_mm);
720	}
721
722	static inline void assert_fault_locked(const struct vm_fault *vmf)
723	{
724	if (vmf->flags & FAULT_FLAG_VMA_LOCK)
725	vma_assert_locked(vma: vmf->vma);
726	else
727	mmap_assert_locked(mm: vmf->vma->vm_mm);
728	}
729	#else
730	static inline void release_fault_lock(struct vm_fault *vmf)
731	{
732	mmap_read_unlock(vmf->vma->vm_mm);
733	}
734
735	static inline void assert_fault_locked(const struct vm_fault *vmf)
736	{
737	mmap_assert_locked(vmf->vma->vm_mm);
738	}
739	#endif /* CONFIG_PER_VMA_LOCK */
740
741	static inline bool mm_flags_test(int flag, const struct mm_struct *mm)
742	{
743	return test_bit(flag, ACCESS_PRIVATE(&mm->flags, __mm_flags));
744	}
745
746	static inline bool mm_flags_test_and_set(int flag, struct mm_struct *mm)
747	{
748	return test_and_set_bit(nr: flag, ACCESS_PRIVATE(&mm->flags, __mm_flags));
749	}
750
751	static inline bool mm_flags_test_and_clear(int flag, struct mm_struct *mm)
752	{
753	return test_and_clear_bit(nr: flag, ACCESS_PRIVATE(&mm->flags, __mm_flags));
754	}
755
756	static inline void mm_flags_set(int flag, struct mm_struct *mm)
757	{
758	set_bit(nr: flag, ACCESS_PRIVATE(&mm->flags, __mm_flags));
759	}
760
761	static inline void mm_flags_clear(int flag, struct mm_struct *mm)
762	{
763	clear_bit(nr: flag, ACCESS_PRIVATE(&mm->flags, __mm_flags));
764	}
765
766	static inline void mm_flags_clear_all(struct mm_struct *mm)
767	{
768	bitmap_zero(ACCESS_PRIVATE(&mm->flags, __mm_flags), NUM_MM_FLAG_BITS);
769	}
770
771	extern const struct vm_operations_struct vma_dummy_vm_ops;
772
773	static inline void vma_init(struct vm_area_struct vma, struct* mm_struct *mm)
774	{
775	memset(s: vma, c: `0`, n: sizeof(*vma));
776	vma->vm_mm = mm;
777	vma->vm_ops = &vma_dummy_vm_ops;
778	INIT_LIST_HEAD(list: &vma->anon_vma_chain);
779	vma_lock_init(vma, reset_refcnt: false);
780	}
781
782	/ Use when VMA is not part of the VMA tree and needs no locking /
783	static inline void vm_flags_init(struct vm_area_struct *vma,
784	vm_flags_t flags)
785	{
786	ACCESS_PRIVATE(vma, __vm_flags) = flags;
787	}
788
789	/*
790	* Use when VMA is part of the VMA tree and modifications need coordination
791	* Note: vm_flags_reset and vm_flags_reset_once do not lock the vma and
792	* it should be locked explicitly beforehand.
793	*/
794	static inline void vm_flags_reset(struct vm_area_struct *vma,
795	vm_flags_t flags)
796	{
797	vma_assert_write_locked(vma);
798	vm_flags_init(vma, flags);
799	}
800
801	static inline void vm_flags_reset_once(struct vm_area_struct *vma,
802	vm_flags_t flags)
803	{
804	vma_assert_write_locked(vma);
805	WRITE_ONCE(ACCESS_PRIVATE(vma, __vm_flags), flags);
806	}
807
808	static inline void vm_flags_set(struct vm_area_struct *vma,
809	vm_flags_t flags)
810	{
811	vma_start_write(vma);
812	ACCESS_PRIVATE(vma, __vm_flags) \|= flags;
813	}
814
815	static inline void vm_flags_clear(struct vm_area_struct *vma,
816	vm_flags_t flags)
817	{
818	vma_start_write(vma);
819	ACCESS_PRIVATE(vma, __vm_flags) &= ~flags;
820	}
821
822	/*
823	* Use only if VMA is not part of the VMA tree or has no other users and
824	* therefore needs no locking.
825	*/
826	static inline void __vm_flags_mod(struct vm_area_struct *vma,
827	vm_flags_t set, vm_flags_t clear)
828	{
829	vm_flags_init(vma, flags: (vma->vm_flags \| set) & ~clear);
830	}
831
832	/*
833	* Use only when the order of set/clear operations is unimportant, otherwise
834	* use vm_flags_{set\|clear} explicitly.
835	*/
836	static inline void vm_flags_mod(struct vm_area_struct *vma,
837	vm_flags_t set, vm_flags_t clear)
838	{
839	vma_start_write(vma);
840	__vm_flags_mod(vma, set, clear);
841	}
842
843	static inline void vma_set_anonymous(struct vm_area_struct *vma)
844	{
845	vma->vm_ops = NULL;
846	}
847
848	static inline bool vma_is_anonymous(struct vm_area_struct *vma)
849	{
850	return !vma->vm_ops;
851	}
852
853	/*
854	* Indicate if the VMA is a heap for the given task; for
855	* /proc/PID/maps that is the heap of the main task.
856	*/
857	static inline bool vma_is_initial_heap(const struct vm_area_struct *vma)
858	{
859	return vma->vm_start < vma->vm_mm->brk &&
860	vma->vm_end > vma->vm_mm->start_brk;
861	}
862
863	/*
864	* Indicate if the VMA is a stack for the given task; for
865	* /proc/PID/maps that is the stack of the main task.
866	*/
867	static inline bool vma_is_initial_stack(const struct vm_area_struct *vma)
868	{
869	/*
870	* We make no effort to guess what a given thread considers to be
871	* its "stack". It's not even well-defined for programs written
872	* languages like Go.
873	*/
874	return vma->vm_start <= vma->vm_mm->start_stack &&
875	vma->vm_end >= vma->vm_mm->start_stack;
876	}
877
878	static inline bool vma_is_temporary_stack(const struct vm_area_struct *vma)
879	{
880	int maybe_stack = vma->vm_flags & (VM_GROWSDOWN \| VM_GROWSUP);
881
882	if (!maybe_stack)
883	return false;
884
885	if ((vma->vm_flags & VM_STACK_INCOMPLETE_SETUP) ==
886	VM_STACK_INCOMPLETE_SETUP)
887	return true;
888
889	return false;
890	}
891
892	static inline bool vma_is_foreign(const struct vm_area_struct *vma)
893	{
894	if (!current->mm)
895	return true;
896
897	if (current->mm != vma->vm_mm)
898	return true;
899
900	return false;
901	}
902
903	static inline bool vma_is_accessible(const struct vm_area_struct *vma)
904	{
905	return vma->vm_flags & VM_ACCESS_FLAGS;
906	}
907
908	static inline bool is_shared_maywrite(vm_flags_t vm_flags)
909	{
910	return (vm_flags & (VM_SHARED \| VM_MAYWRITE)) ==
911	(VM_SHARED \| VM_MAYWRITE);
912	}
913
914	static inline bool vma_is_shared_maywrite(const struct vm_area_struct *vma)
915	{
916	return is_shared_maywrite(vm_flags: vma->vm_flags);
917	}
918
919	static inline
920	struct vm_area_struct vma_find(struct* vma_iterator vmi, unsigned* long max)
921	{
922	return mas_find(mas: &vmi->mas, max: max - `1`);
923	}
924
925	static inline struct vm_area_struct vma_next(struct* vma_iterator *vmi)
926	{
927	/*
928	* Uses mas_find() to get the first VMA when the iterator starts.
929	* Calling mas_next() could skip the first entry.
930	*/
931	return mas_find(mas: &vmi->mas, ULONG_MAX);
932	}
933
934	static inline
935	struct vm_area_struct vma_iter_next_range(struct* vma_iterator *vmi)
936	{
937	return mas_next_range(mas: &vmi->mas, ULONG_MAX);
938	}
939
940
941	static inline struct vm_area_struct vma_prev(struct* vma_iterator *vmi)
942	{
943	return mas_prev(mas: &vmi->mas, min: `0`);
944	}
945
946	static inline int vma_iter_clear_gfp(struct vma_iterator *vmi,
947	unsigned long start, unsigned long end, gfp_t gfp)
948	{
949	__mas_set_range(mas: &vmi->mas, start, last: end - `1`);
950	mas_store_gfp(mas: &vmi->mas, NULL, gfp);
951	if (unlikely(mas_is_err(&vmi->mas)))
952	return -ENOMEM;
953
954	return `0`;
955	}
956
957	/ Free any unused preallocations /
958	static inline void vma_iter_free(struct vma_iterator *vmi)
959	{
960	mas_destroy(mas: &vmi->mas);
961	}
962
963	static inline int vma_iter_bulk_store(struct vma_iterator *vmi,
964	struct vm_area_struct *vma)
965	{
966	vmi->mas.index = vma->vm_start;
967	vmi->mas.last = vma->vm_end - `1`;
968	mas_store(mas: &vmi->mas, entry: vma);
969	if (unlikely(mas_is_err(&vmi->mas)))
970	return -ENOMEM;
971
972	vma_mark_attached(vma);
973	return `0`;
974	}
975
976	static inline void vma_iter_invalidate(struct vma_iterator *vmi)
977	{
978	mas_pause(mas: &vmi->mas);
979	}
980
981	static inline void vma_iter_set(struct vma_iterator vmi, unsigned* long addr)
982	{
983	mas_set(mas: &vmi->mas, index: addr);
984	}
985
986	#define for_each_vma(__vmi, __vma) \
987	while (((__vma) = vma_next(&(__vmi))) != NULL)
988
989	/ The MM code likes to work with exclusive end addresses /
990	#define for_each_vma_range(__vmi, __vma, __end) \
991	while (((__vma) = vma_find(&(__vmi), (__end))) != NULL)
992
993	#ifdef CONFIG_SHMEM
994	/*
995	* The vma_is_shmem is not inline because it is used only by slow
996	* paths in userfault.
997	*/
998	bool vma_is_shmem(const struct vm_area_struct *vma);
999	bool vma_is_anon_shmem(const struct vm_area_struct *vma);
1000	#else
1001	static inline bool vma_is_shmem(const struct vm_area_struct vma) { return* false; }
1002	static inline bool vma_is_anon_shmem(const struct vm_area_struct vma) { return* false; }
1003	#endif
1004
1005	int vma_is_stack_for_current(const struct vm_area_struct *vma);
1006
1007	/ flush_tlb_range() takes a vma, not a mm, and can care about flags /
1008	#define TLB_FLUSH_VMA(mm,flags) { .vm_mm = (mm), .vm_flags = (flags) }
1009
1010	struct mmu_gather;
1011	struct inode;
1012
1013	extern void prep_compound_page(struct page page, unsigned* int order);
1014
1015	static inline unsigned int folio_large_order(const struct folio *folio)
1016	{
1017	return folio->_flags_1 & `0xff`;
1018	}
1019
1020	#ifdef NR_PAGES_IN_LARGE_FOLIO
1021	static inline unsigned long folio_large_nr_pages(const struct folio *folio)
1022	{
1023	return folio->_nr_pages;
1024	}
1025	#else
1026	static inline unsigned long folio_large_nr_pages(const struct folio *folio)
1027	{
1028	return `1L` << folio_large_order(folio);
1029	}
1030	#endif
1031
1032	/*
1033	* compound_order() can be called without holding a reference, which means
1034	* that niceties like page_folio() don't work. These callers should be
1035	* prepared to handle wild return values. For example, PG_head may be
1036	* set before the order is initialised, or this may be a tail page.
1037	* See compaction.c for some good examples.
1038	*/
1039	static inline unsigned int compound_order(const struct page *page)
1040	{
1041	const struct folio folio = (struct* folio *)page;
1042
1043	if (!test_bit(PG_head, &folio->flags.f))
1044	return `0`;
1045	return folio_large_order(folio);
1046	}
1047
1048	/**
1049	* folio_order - The allocation order of a folio.
1050	* @folio: The folio.
1051	*
1052	* A folio is composed of 2^order pages. See get_order() for the definition
1053	* of order.
1054	*
1055	* Return: The order of the folio.
1056	*/
1057	static inline unsigned int folio_order(const struct folio *folio)
1058	{
1059	if (!folio_test_large(folio))
1060	return `0`;
1061	return folio_large_order(folio);
1062	}
1063
1064	/**
1065	* folio_reset_order - Reset the folio order and derived _nr_pages
1066	* @folio: The folio.
1067	*
1068	* Reset the order and derived _nr_pages to 0. Must only be used in the
1069	* process of splitting large folios.
1070	*/
1071	static inline void folio_reset_order(struct folio *folio)
1072	{
1073	if (WARN_ON_ONCE(!folio_test_large(folio)))
1074	return;
1075	folio->_flags_1 &= ~`0xffUL`;
1076	#ifdef NR_PAGES_IN_LARGE_FOLIO
1077	folio->_nr_pages = `0`;
1078	#endif
1079	}
1080
1081	#include <linux/huge_mm.h>
1082
1083	/*
1084	* Methods to modify the page usage count.
1085	*
1086	* What counts for a page usage:
1087	* - cache mapping (page->mapping)
1088	* - private data (page->private)
1089	* - page mapped in a task's page tables, each mapping
1090	* is counted separately
1091	*
1092	* Also, many kernel routines increase the page count before a critical
1093	* routine so they can be sure the page doesn't go away from under them.
1094	*/
1095
1096	/*
1097	* Drop a ref, return true if the refcount fell to zero (the page has no users)
1098	*/
1099	static inline int put_page_testzero(struct page *page)
1100	{
1101	VM_BUG_ON_PAGE(page_ref_count(page) == `0`, page);
1102	return page_ref_dec_and_test(page);
1103	}
1104
1105	static inline int folio_put_testzero(struct folio *folio)
1106	{
1107	return put_page_testzero(page: &folio->page);
1108	}
1109
1110	/*
1111	* Try to grab a ref unless the page has a refcount of zero, return false if
1112	* that is the case.
1113	* This can be called when MMU is off so it must not access
1114	* any of the virtual mappings.
1115	*/
1116	static inline bool get_page_unless_zero(struct page *page)
1117	{
1118	return page_ref_add_unless(page, nr: `1`, u: `0`);
1119	}
1120
1121	static inline struct folio folio_get_nontail_page(struct* page *page)
1122	{
1123	if (unlikely(!get_page_unless_zero(page)))
1124	return NULL;
1125	return (struct folio *)page;
1126	}
1127
1128	extern int page_is_ram(unsigned long pfn);
1129
1130	enum {
1131	REGION_INTERSECTS,
1132	REGION_DISJOINT,
1133	REGION_MIXED,
1134	};
1135
1136	int region_intersects(resource_size_t offset, size_t size, unsigned long flags,
1137	unsigned long desc);
1138
1139	/ Support for virtually mapped pages /
1140	struct page vmalloc_to_page(const* void *addr);
1141	unsigned long vmalloc_to_pfn(const void *addr);
1142
1143	/*
1144	* Determine if an address is within the vmalloc range
1145	*
1146	* On nommu, vmalloc/vfree wrap through kmalloc/kfree directly, so there
1147	* is no special casing required.
1148	*/
1149	#ifdef CONFIG_MMU
1150	extern bool is_vmalloc_addr(const void *x);
1151	extern int is_vmalloc_or_module_addr(const void *x);
1152	#else
1153	static inline bool is_vmalloc_addr(const void *x)
1154	{
1155	return false;
1156	}
1157	static inline int is_vmalloc_or_module_addr(const void *x)
1158	{
1159	return `0`;
1160	}
1161	#endif
1162
1163	/*
1164	* How many times the entire folio is mapped as a single unit (eg by a
1165	* PMD or PUD entry). This is probably not what you want, except for
1166	* debugging purposes or implementation of other core folio_*() primitives.
1167	*/
1168	static inline int folio_entire_mapcount(const struct folio *folio)
1169	{
1170	VM_BUG_ON_FOLIO(!folio_test_large(folio), folio);
1171	if (!IS_ENABLED(CONFIG_64BIT) && unlikely(folio_large_order(folio) == `1`))
1172	return `0`;
1173	return atomic_read(v: &folio->_entire_mapcount) + `1`;
1174	}
1175
1176	static inline int folio_large_mapcount(const struct folio *folio)
1177	{
1178	VM_WARN_ON_FOLIO(!folio_test_large(folio), folio);
1179	return atomic_read(v: &folio->_large_mapcount) + `1`;
1180	}
1181
1182	/**
1183	* folio_mapcount() - Number of mappings of this folio.
1184	* @folio: The folio.
1185	*
1186	* The folio mapcount corresponds to the number of present user page table
1187	* entries that reference any part of a folio. Each such present user page
1188	* table entry must be paired with exactly on folio reference.
1189	*
1190	* For ordindary folios, each user page table entry (PTE/PMD/PUD/...) counts
1191	* exactly once.
1192	*
1193	* For hugetlb folios, each abstracted "hugetlb" user page table entry that
1194	* references the entire folio counts exactly once, even when such special
1195	* page table entries are comprised of multiple ordinary page table entries.
1196	*
1197	* Will report 0 for pages which cannot be mapped into userspace, such as
1198	* slab, page tables and similar.
1199	*
1200	* Return: The number of times this folio is mapped.
1201	*/
1202	static inline int folio_mapcount(const struct folio *folio)
1203	{
1204	int mapcount;
1205
1206	if (likely(!folio_test_large(folio))) {
1207	mapcount = atomic_read(v: &folio->_mapcount) + `1`;
1208	if (page_mapcount_is_type(mapcount))
1209	mapcount = `0`;
1210	return mapcount;
1211	}
1212	return folio_large_mapcount(folio);
1213	}
1214
1215	/**
1216	* folio_mapped - Is this folio mapped into userspace?
1217	* @folio: The folio.
1218	*
1219	* Return: True if any page in this folio is referenced by user page tables.
1220	*/
1221	static inline bool folio_mapped(const struct folio *folio)
1222	{
1223	return folio_mapcount(folio) >= `1`;
1224	}
1225
1226	/*
1227	* Return true if this page is mapped into pagetables.
1228	* For compound page it returns true if any sub-page of compound page is mapped,
1229	* even if this particular sub-page is not itself mapped by any PTE or PMD.
1230	*/
1231	static inline bool page_mapped(const struct page *page)
1232	{
1233	return folio_mapped(page_folio(page));
1234	}
1235
1236	static inline struct page virt_to_head_page(const* void *x)
1237	{
1238	struct page *page = virt_to_page(x);
1239
1240	return compound_head(page);
1241	}
1242
1243	static inline struct folio virt_to_folio(const* void *x)
1244	{
1245	struct page *page = virt_to_page(x);
1246
1247	return page_folio(page);
1248	}
1249
1250	void __folio_put(struct folio *folio);
1251
1252	void split_page(struct page page, unsigned* int order);
1253	void folio_copy(struct folio dst, struct* folio *src);
1254	int folio_mc_copy(struct folio dst, struct* folio *src);
1255
1256	unsigned long nr_free_buffer_pages(void);
1257
1258	/ Returns the number of bytes in this potentially compound page. /
1259	static inline unsigned long page_size(const struct page *page)
1260	{
1261	return PAGE_SIZE << compound_order(page);
1262	}
1263
1264	/ Returns the number of bits needed for the number of bytes in a page /
1265	static inline unsigned int page_shift(struct page *page)
1266	{
1267	return PAGE_SHIFT + compound_order(page);
1268	}
1269
1270	/**
1271	* thp_order - Order of a transparent huge page.
1272	* @page: Head page of a transparent huge page.
1273	*/
1274	static inline unsigned int thp_order(struct page *page)
1275	{
1276	VM_BUG_ON_PGFLAGS(PageTail(page), page);
1277	return compound_order(page);
1278	}
1279
1280	/**
1281	* thp_size - Size of a transparent huge page.
1282	* @page: Head page of a transparent huge page.
1283	*
1284	* Return: Number of bytes in this page.
1285	*/
1286	static inline unsigned long thp_size(struct page *page)
1287	{
1288	return PAGE_SIZE << thp_order(page);
1289	}
1290
1291	#ifdef CONFIG_MMU
1292	/*
1293	* Do pte_mkwrite, but only if the vma says VM_WRITE. We do this when
1294	* servicing faults for write access. In the normal case, do always want
1295	* pte_mkwrite. But get_user_pages can cause write faults for mappings
1296	* that do not have writing enabled, when used by access_process_vm.
1297	*/
1298	static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma)
1299	{
1300	if (likely(vma->vm_flags & VM_WRITE))
1301	pte = pte_mkwrite(pte, vma);
1302	return pte;
1303	}
1304
1305	vm_fault_t do_set_pmd(struct vm_fault vmf, struct* folio folio, struct* page *page);
1306	void set_pte_range(struct vm_fault vmf, struct* folio *folio,
1307	struct page page, unsigned* int nr, unsigned long addr);
1308
1309	vm_fault_t finish_fault(struct vm_fault *vmf);
1310	#endif
1311
1312	/*
1313	* Multiple processes may "see" the same page. E.g. for untouched
1314	* mappings of /dev/null, all processes see the same page full of
1315	* zeroes, and text pages of executables and shared libraries have
1316	* only one copy in memory, at most, normally.
1317	*
1318	* For the non-reserved pages, page_count(page) denotes a reference count.
1319	* page_count() == 0 means the page is free. page->lru is then used for
1320	* freelist management in the buddy allocator.
1321	* page_count() > 0 means the page has been allocated.
1322	*
1323	* Pages are allocated by the slab allocator in order to provide memory
1324	* to kmalloc and kmem_cache_alloc. In this case, the management of the
1325	* page, and the fields in 'struct page' are the responsibility of mm/slab.c
1326	* unless a particular usage is carefully commented. (the responsibility of
1327	* freeing the kmalloc memory is the caller's, of course).
1328	*
1329	* A page may be used by anyone else who does a __get_free_page().
1330	* In this case, page_count still tracks the references, and should only
1331	* be used through the normal accessor functions. The top bits of page->flags
1332	* and page->virtual store page management information, but all other fields
1333	* are unused and could be used privately, carefully. The management of this
1334	* page is the responsibility of the one who allocated it, and those who have
1335	* subsequently been given references to it.
1336	*
1337	* The other pages (we may call them "pagecache pages") are completely
1338	* managed by the Linux memory manager: I/O, buffers, swapping etc.
1339	* The following discussion applies only to them.
1340	*
1341	* A pagecache page contains an opaque `private' member, which belongs to the
1342	* page's address_space. Usually, this is the address of a circular list of
1343	* the page's disk buffers. PG_private must be set to tell the VM to call
1344	* into the filesystem to release these pages.
1345	*
1346	* A folio may belong to an inode's memory mapping. In this case,
1347	* folio->mapping points to the inode, and folio->index is the file
1348	* offset of the folio, in units of PAGE_SIZE.
1349	*
1350	* If pagecache pages are not associated with an inode, they are said to be
1351	* anonymous pages. These may become associated with the swapcache, and in that
1352	* case PG_swapcache is set, and page->private is an offset into the swapcache.
1353	*
1354	* In either case (swapcache or inode backed), the pagecache itself holds one
1355	* reference to the page. Setting PG_private should also increment the
1356	* refcount. The each user mapping also has a reference to the page.
1357	*
1358	* The pagecache pages are stored in a per-mapping radix tree, which is
1359	* rooted at mapping->i_pages, and indexed by offset.
1360	* Where 2.4 and early 2.6 kernels kept dirty/clean pages in per-address_space
1361	* lists, we instead now tag pages as dirty/writeback in the radix tree.
1362	*
1363	* All pagecache pages may be subject to I/O:
1364	* - inode pages may need to be read from disk,
1365	* - inode pages which have been modified and are MAP_SHARED may need
1366	* to be written back to the inode on disk,
1367	* - anonymous pages (including MAP_PRIVATE file mappings) which have been
1368	* modified may need to be swapped out to swap space and (later) to be read
1369	* back into memory.
1370	*/
1371
1372	/ 127: arbitrary random number, small enough to assemble well /
1373	#define folio_ref_zero_or_close_to_overflow(folio) \
1374	((unsigned int) folio_ref_count(folio) + 127u <= 127u)
1375
1376	/**
1377	* folio_get - Increment the reference count on a folio.
1378	* @folio: The folio.
1379	*
1380	* Context: May be called in any context, as long as you know that
1381	* you have a refcount on the folio. If you do not already have one,
1382	* folio_try_get() may be the right interface for you to use.
1383	*/
1384	static inline void folio_get(struct folio *folio)
1385	{
1386	VM_BUG_ON_FOLIO(folio_ref_zero_or_close_to_overflow(folio), folio);
1387	folio_ref_inc(folio);
1388	}
1389
1390	static inline void get_page(struct page *page)
1391	{
1392	struct folio *folio = page_folio(page);
1393	if (WARN_ON_ONCE(folio_test_slab(folio)))
1394	return;
1395	if (WARN_ON_ONCE(folio_test_large_kmalloc(folio)))
1396	return;
1397	folio_get(folio);
1398	}
1399
1400	static inline __must_check bool try_get_page(struct page *page)
1401	{
1402	page = compound_head(page);
1403	if (WARN_ON_ONCE(page_ref_count(page) <= `0`))
1404	return false;
1405	page_ref_inc(page);
1406	return true;
1407	}
1408
1409	/**
1410	* folio_put - Decrement the reference count on a folio.
1411	* @folio: The folio.
1412	*
1413	* If the folio's reference count reaches zero, the memory will be
1414	* released back to the page allocator and may be used by another
1415	* allocation immediately. Do not access the memory or the struct folio
1416	* after calling folio_put() unless you can be sure that it wasn't the
1417	* last reference.
1418	*
1419	* Context: May be called in process or interrupt context, but not in NMI
1420	* context. May be called while holding a spinlock.
1421	*/
1422	static inline void folio_put(struct folio *folio)
1423	{
1424	if (folio_put_testzero(folio))
1425	__folio_put(folio);
1426	}
1427
1428	/**
1429	* folio_put_refs - Reduce the reference count on a folio.
1430	* @folio: The folio.
1431	* @refs: The amount to subtract from the folio's reference count.
1432	*
1433	* If the folio's reference count reaches zero, the memory will be
1434	* released back to the page allocator and may be used by another
1435	* allocation immediately. Do not access the memory or the struct folio
1436	* after calling folio_put_refs() unless you can be sure that these weren't
1437	* the last references.
1438	*
1439	* Context: May be called in process or interrupt context, but not in NMI
1440	* context. May be called while holding a spinlock.
1441	*/
1442	static inline void folio_put_refs(struct folio folio, int* refs)
1443	{
1444	if (folio_ref_sub_and_test(folio, nr: refs))
1445	__folio_put(folio);
1446	}
1447
1448	void folios_put_refs(struct folio_batch folios, unsigned* int *refs);
1449
1450	/*
1451	* union release_pages_arg - an array of pages or folios
1452	*
1453	* release_pages() releases a simple array of multiple pages, and
1454	* accepts various different forms of said page array: either
1455	* a regular old boring array of pages, an array of folios, or
1456	* an array of encoded page pointers.
1457	*
1458	* The transparent union syntax for this kind of "any of these
1459	* argument types" is all kinds of ugly, so look away.
1460	*/
1461	typedef union {
1462	struct page **pages;
1463	struct folio **folios;
1464	struct encoded_page **encoded_pages;
1465	} release_pages_arg __attribute__ ((__transparent_union__));
1466
1467	void release_pages(release_pages_arg, int nr);
1468
1469	/**
1470	* folios_put - Decrement the reference count on an array of folios.
1471	* @folios: The folios.
1472	*
1473	* Like folio_put(), but for a batch of folios. This is more efficient
1474	* than writing the loop yourself as it will optimise the locks which need
1475	* to be taken if the folios are freed. The folios batch is returned
1476	* empty and ready to be reused for another batch; there is no need to
1477	* reinitialise it.
1478	*
1479	* Context: May be called in process or interrupt context, but not in NMI
1480	* context. May be called while holding a spinlock.
1481	*/
1482	static inline void folios_put(struct folio_batch *folios)
1483	{
1484	folios_put_refs(folios, NULL);
1485	}
1486
1487	static inline void put_page(struct page *page)
1488	{
1489	struct folio *folio = page_folio(page);
1490
1491	if (folio_test_slab(folio) \|\| folio_test_large_kmalloc(folio))
1492	return;
1493
1494	folio_put(folio);
1495	}
1496
1497	/*
1498	* GUP_PIN_COUNTING_BIAS, and the associated functions that use it, overload
1499	* the page's refcount so that two separate items are tracked: the original page
1500	* reference count, and also a new count of how many pin_user_pages() calls were
1501	* made against the page. ("gup-pinned" is another term for the latter).
1502	*
1503	* With this scheme, pin_user_pages() becomes special: such pages are marked as
1504	* distinct from normal pages. As such, the unpin_user_page() call (and its
1505	* variants) must be used in order to release gup-pinned pages.
1506	*
1507	* Choice of value:
1508	*
1509	* By making GUP_PIN_COUNTING_BIAS a power of two, debugging of page reference
1510	* counts with respect to pin_user_pages() and unpin_user_page() becomes
1511	* simpler, due to the fact that adding an even power of two to the page
1512	* refcount has the effect of using only the upper N bits, for the code that
1513	* counts up using the bias value. This means that the lower bits are left for
1514	* the exclusive use of the original code that increments and decrements by one
1515	* (or at least, by much smaller values than the bias value).
1516	*
1517	* Of course, once the lower bits overflow into the upper bits (and this is
1518	* OK, because subtraction recovers the original values), then visual inspection
1519	* no longer suffices to directly view the separate counts. However, for normal
1520	* applications that don't have huge page reference counts, this won't be an
1521	* issue.
1522	*
1523	* Locking: the lockless algorithm described in folio_try_get_rcu()
1524	* provides safe operation for get_user_pages(), folio_mkclean() and
1525	* other calls that race to set up page table entries.
1526	*/
1527	#define GUP_PIN_COUNTING_BIAS (1U << 10)
1528
1529	void unpin_user_page(struct page *page);
1530	void unpin_folio(struct folio *folio);
1531	void unpin_user_pages_dirty_lock(struct page *pages, unsigned* long npages,
1532	bool make_dirty);
1533	void unpin_user_page_range_dirty_lock(struct page page, unsigned* long npages,
1534	bool make_dirty);
1535	void unpin_user_pages(struct page *pages, unsigned* long npages);
1536	void unpin_user_folio(struct folio folio, unsigned* long npages);
1537	void unpin_folios(struct folio *folios, unsigned* long nfolios);
1538
1539	static inline bool is_cow_mapping(vm_flags_t flags)
1540	{
1541	return (flags & (VM_SHARED \| VM_MAYWRITE)) == VM_MAYWRITE;
1542	}
1543
1544	#ifndef CONFIG_MMU
1545	static inline bool is_nommu_shared_mapping(vm_flags_t flags)
1546	{
1547	/*
1548	* NOMMU shared mappings are ordinary MAP_SHARED mappings and selected
1549	* R/O MAP_PRIVATE file mappings that are an effective R/O overlay of
1550	* a file mapping. R/O MAP_PRIVATE mappings might still modify
1551	* underlying memory if ptrace is active, so this is only possible if
1552	* ptrace does not apply. Note that there is no mprotect() to upgrade
1553	* write permissions later.
1554	*/
1555	return flags & (VM_MAYSHARE \| VM_MAYOVERLAY);
1556	}
1557	#endif
1558
1559	#if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP)
1560	#define SECTION_IN_PAGE_FLAGS
1561	#endif
1562
1563	/*
1564	* The identification function is mainly used by the buddy allocator for
1565	* determining if two pages could be buddies. We are not really identifying
1566	* the zone since we could be using the section number id if we do not have
1567	* node id available in page flags.
1568	* We only guarantee that it will return the same value for two combinable
1569	* pages in a zone.
1570	*/
1571	static inline int page_zone_id(struct page *page)
1572	{
1573	return (page->flags.f >> ZONEID_PGSHIFT) & ZONEID_MASK;
1574	}
1575
1576	#ifdef NODE_NOT_IN_PAGE_FLAGS
1577	int memdesc_nid(memdesc_flags_t mdf);
1578	#else
1579	static inline int memdesc_nid(memdesc_flags_t mdf)
1580	{
1581	return (mdf.f >> NODES_PGSHIFT) & NODES_MASK;
1582	}
1583	#endif
1584
1585	static inline int page_to_nid(const struct page *page)
1586	{
1587	return memdesc_nid(PF_POISONED_CHECK(page)->flags);
1588	}
1589
1590	static inline int folio_nid(const struct folio *folio)
1591	{
1592	return memdesc_nid(mdf: folio->flags);
1593	}
1594
1595	#ifdef CONFIG_NUMA_BALANCING
1596	/ page access time bits needs to hold at least 4 seconds /
1597	#define PAGE_ACCESS_TIME_MIN_BITS 12
1598	#if LAST_CPUPID_SHIFT < PAGE_ACCESS_TIME_MIN_BITS
1599	#define PAGE_ACCESS_TIME_BUCKETS \
1600	(PAGE_ACCESS_TIME_MIN_BITS - LAST_CPUPID_SHIFT)
1601	#else
1602	#define PAGE_ACCESS_TIME_BUCKETS 0
1603	#endif
1604
1605	#define PAGE_ACCESS_TIME_MASK \
1606	(LAST_CPUPID_MASK << PAGE_ACCESS_TIME_BUCKETS)
1607
1608	static inline int cpu_pid_to_cpupid(int cpu, int pid)
1609	{
1610	return ((cpu & LAST__CPU_MASK) << LAST__PID_SHIFT) \| (pid & LAST__PID_MASK);
1611	}
1612
1613	static inline int cpupid_to_pid(int cpupid)
1614	{
1615	return cpupid & LAST__PID_MASK;
1616	}
1617
1618	static inline int cpupid_to_cpu(int cpupid)
1619	{
1620	return (cpupid >> LAST__PID_SHIFT) & LAST__CPU_MASK;
1621	}
1622
1623	static inline int cpupid_to_nid(int cpupid)
1624	{
1625	return cpu_to_node(cpupid_to_cpu(cpupid));
1626	}
1627
1628	static inline bool cpupid_pid_unset(int cpupid)
1629	{
1630	return cpupid_to_pid(cpupid) == (-`1` & LAST__PID_MASK);
1631	}
1632
1633	static inline bool cpupid_cpu_unset(int cpupid)
1634	{
1635	return cpupid_to_cpu(cpupid) == (-`1` & LAST__CPU_MASK);
1636	}
1637
1638	static inline bool __cpupid_match_pid(pid_t task_pid, int cpupid)
1639	{
1640	return (task_pid & LAST__PID_MASK) == cpupid_to_pid(cpupid);
1641	}
1642
1643	#define cpupid_match_pid(task, cpupid) __cpupid_match_pid(task->pid, cpupid)
1644	#ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS
1645	static inline int folio_xchg_last_cpupid(struct folio folio, int* cpupid)
1646	{
1647	return xchg(&folio->_last_cpupid, cpupid & LAST_CPUPID_MASK);
1648	}
1649
1650	static inline int folio_last_cpupid(struct folio *folio)
1651	{
1652	return folio->_last_cpupid;
1653	}
1654	static inline void page_cpupid_reset_last(struct page *page)
1655	{
1656	page->_last_cpupid = -`1` & LAST_CPUPID_MASK;
1657	}
1658	#else
1659	static inline int folio_last_cpupid(struct folio *folio)
1660	{
1661	return (folio->flags.f >> LAST_CPUPID_PGSHIFT) & LAST_CPUPID_MASK;
1662	}
1663
1664	int folio_xchg_last_cpupid(struct folio folio, int* cpupid);
1665
1666	static inline void page_cpupid_reset_last(struct page *page)
1667	{
1668	page->flags.f \|= LAST_CPUPID_MASK << LAST_CPUPID_PGSHIFT;
1669	}
1670	#endif /* LAST_CPUPID_NOT_IN_PAGE_FLAGS */
1671
1672	static inline int folio_xchg_access_time(struct folio folio, int* time)
1673	{
1674	int last_time;
1675
1676	last_time = folio_xchg_last_cpupid(folio,
1677	time >> PAGE_ACCESS_TIME_BUCKETS);
1678	return last_time << PAGE_ACCESS_TIME_BUCKETS;
1679	}
1680
1681	static inline void vma_set_access_pid_bit(struct vm_area_struct *vma)
1682	{
1683	unsigned int pid_bit;
1684
1685	pid_bit = hash_32(current->pid, ilog2(BITS_PER_LONG));
1686	if (vma->numab_state && !test_bit(pid_bit, &vma->numab_state->pids_active[`1`])) {
1687	__set_bit(pid_bit, &vma->numab_state->pids_active[`1`]);
1688	}
1689	}
1690
1691	bool folio_use_access_time(struct folio *folio);
1692	#else /* !CONFIG_NUMA_BALANCING */
1693	static inline int folio_xchg_last_cpupid(struct folio folio, int* cpupid)
1694	{
1695	return folio_nid(folio); / XXX /
1696	}
1697
1698	static inline int folio_xchg_access_time(struct folio folio, int* time)
1699	{
1700	return `0`;
1701	}
1702
1703	static inline int folio_last_cpupid(struct folio *folio)
1704	{
1705	return folio_nid(folio); / XXX /
1706	}
1707
1708	static inline int cpupid_to_nid(int cpupid)
1709	{
1710	return -`1`;
1711	}
1712
1713	static inline int cpupid_to_pid(int cpupid)
1714	{
1715	return -`1`;
1716	}
1717
1718	static inline int cpupid_to_cpu(int cpupid)
1719	{
1720	return -`1`;
1721	}
1722
1723	static inline int cpu_pid_to_cpupid(int nid, int pid)
1724	{
1725	return -`1`;
1726	}
1727
1728	static inline bool cpupid_pid_unset(int cpupid)
1729	{
1730	return true;
1731	}
1732
1733	static inline void page_cpupid_reset_last(struct page *page)
1734	{
1735	}
1736
1737	static inline bool cpupid_match_pid(struct task_struct task, int* cpupid)
1738	{
1739	return false;
1740	}
1741
1742	static inline void vma_set_access_pid_bit(struct vm_area_struct *vma)
1743	{
1744	}
1745	static inline bool folio_use_access_time(struct folio *folio)
1746	{
1747	return false;
1748	}
1749	#endif /* CONFIG_NUMA_BALANCING */
1750
1751	#if defined(CONFIG_KASAN_SW_TAGS) \|\| defined(CONFIG_KASAN_HW_TAGS)
1752
1753	/*
1754	* KASAN per-page tags are stored xor'ed with 0xff. This allows to avoid
1755	* setting tags for all pages to native kernel tag value 0xff, as the default
1756	* value 0x00 maps to 0xff.
1757	*/
1758
1759	static inline u8 page_kasan_tag(const struct page *page)
1760	{
1761	u8 tag = KASAN_TAG_KERNEL;
1762
1763	if (kasan_enabled()) {
1764	tag = (page->flags.f >> KASAN_TAG_PGSHIFT) & KASAN_TAG_MASK;
1765	tag ^= `0xff`;
1766	}
1767
1768	return tag;
1769	}
1770
1771	static inline void page_kasan_tag_set(struct page *page, u8 tag)
1772	{
1773	unsigned long old_flags, flags;
1774
1775	if (!kasan_enabled())
1776	return;
1777
1778	tag ^= `0xff`;
1779	old_flags = READ_ONCE(page->flags.f);
1780	do {
1781	flags = old_flags;
1782	flags &= ~(KASAN_TAG_MASK << KASAN_TAG_PGSHIFT);
1783	flags \|= (tag & KASAN_TAG_MASK) << KASAN_TAG_PGSHIFT;
1784	} while (unlikely(!try_cmpxchg(&page->flags.f, &old_flags, flags)));
1785	}
1786
1787	static inline void page_kasan_tag_reset(struct page *page)
1788	{
1789	if (kasan_enabled())
1790	page_kasan_tag_set(page, KASAN_TAG_KERNEL);
1791	}
1792
1793	#else /* CONFIG_KASAN_SW_TAGS \|\| CONFIG_KASAN_HW_TAGS */
1794
1795	static inline u8 page_kasan_tag(const struct page *page)
1796	{
1797	return `0xff`;
1798	}
1799
1800	static inline void page_kasan_tag_set(struct page *page, u8 tag) { }
1801	static inline void page_kasan_tag_reset(struct page *page) { }
1802
1803	#endif /* CONFIG_KASAN_SW_TAGS \|\| CONFIG_KASAN_HW_TAGS */
1804
1805	static inline struct zone page_zone(const* struct page *page)
1806	{
1807	return &NODE_DATA(page_to_nid(page))->node_zones[page_zonenum(page)];
1808	}
1809
1810	static inline pg_data_t page_pgdat(const* struct page *page)
1811	{
1812	return NODE_DATA(page_to_nid(page));
1813	}
1814
1815	static inline pg_data_t folio_pgdat(const* struct folio *folio)
1816	{
1817	return NODE_DATA(folio_nid(folio));
1818	}
1819
1820	static inline struct zone folio_zone(const* struct folio *folio)
1821	{
1822	return &folio_pgdat(folio)->node_zones[folio_zonenum(folio)];
1823	}
1824
1825	#ifdef SECTION_IN_PAGE_FLAGS
1826	static inline void set_page_section(struct page page, unsigned* long section)
1827	{
1828	page->flags.f &= ~(SECTIONS_MASK << SECTIONS_PGSHIFT);
1829	page->flags.f \|= (section & SECTIONS_MASK) << SECTIONS_PGSHIFT;
1830	}
1831
1832	static inline unsigned long memdesc_section(memdesc_flags_t mdf)
1833	{
1834	return (mdf.f >> SECTIONS_PGSHIFT) & SECTIONS_MASK;
1835	}
1836	#else /* !SECTION_IN_PAGE_FLAGS */
1837	static inline unsigned long memdesc_section(memdesc_flags_t mdf)
1838	{
1839	return `0`;
1840	}
1841	#endif /* SECTION_IN_PAGE_FLAGS */
1842
1843	/**
1844	* folio_pfn - Return the Page Frame Number of a folio.
1845	* @folio: The folio.
1846	*
1847	* A folio may contain multiple pages. The pages have consecutive
1848	* Page Frame Numbers.
1849	*
1850	* Return: The Page Frame Number of the first page in the folio.
1851	*/
1852	static inline unsigned long folio_pfn(const struct folio *folio)
1853	{
1854	return page_to_pfn(&folio->page);
1855	}
1856
1857	static inline struct folio pfn_folio(unsigned* long pfn)
1858	{
1859	return page_folio(pfn_to_page(pfn));
1860	}
1861
1862	#ifdef CONFIG_MMU
1863	static inline pte_t mk_pte(const struct page *page, pgprot_t pgprot)
1864	{
1865	return pfn_pte(page_to_pfn(page), pgprot);
1866	}
1867
1868	/**
1869	* folio_mk_pte - Create a PTE for this folio
1870	* @folio: The folio to create a PTE for
1871	* @pgprot: The page protection bits to use
1872	*
1873	* Create a page table entry for the first page of this folio.
1874	* This is suitable for passing to set_ptes().
1875	*
1876	* Return: A page table entry suitable for mapping this folio.
1877	*/
1878	static inline pte_t folio_mk_pte(const struct folio *folio, pgprot_t pgprot)
1879	{
1880	return pfn_pte(page_nr: folio_pfn(folio), pgprot);
1881	}
1882
1883	#ifdef CONFIG_TRANSPARENT_HUGEPAGE
1884	/**
1885	* folio_mk_pmd - Create a PMD for this folio
1886	* @folio: The folio to create a PMD for
1887	* @pgprot: The page protection bits to use
1888	*
1889	* Create a page table entry for the first page of this folio.
1890	* This is suitable for passing to set_pmd_at().
1891	*
1892	* Return: A page table entry suitable for mapping this folio.
1893	*/
1894	static inline pmd_t folio_mk_pmd(const struct folio *folio, pgprot_t pgprot)
1895	{
1896	return pmd_mkhuge(pfn_pmd(folio_pfn(folio), pgprot));
1897	}
1898
1899	#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
1900	/**
1901	* folio_mk_pud - Create a PUD for this folio
1902	* @folio: The folio to create a PUD for
1903	* @pgprot: The page protection bits to use
1904	*
1905	* Create a page table entry for the first page of this folio.
1906	* This is suitable for passing to set_pud_at().
1907	*
1908	* Return: A page table entry suitable for mapping this folio.
1909	*/
1910	static inline pud_t folio_mk_pud(const struct folio *folio, pgprot_t pgprot)
1911	{
1912	return pud_mkhuge(pfn_pud(folio_pfn(folio), pgprot));
1913	}
1914	#endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */
1915	#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
1916	#endif /* CONFIG_MMU */
1917
1918	static inline bool folio_has_pincount(const struct folio *folio)
1919	{
1920	if (IS_ENABLED(CONFIG_64BIT))
1921	return folio_test_large(folio);
1922	return folio_order(folio) > `1`;
1923	}
1924
1925	/**
1926	* folio_maybe_dma_pinned - Report if a folio may be pinned for DMA.
1927	* @folio: The folio.
1928	*
1929	* This function checks if a folio has been pinned via a call to
1930	* a function in the pin_user_pages() family.
1931	*
1932	* For small folios, the return value is partially fuzzy: false is not fuzzy,
1933	* because it means "definitely not pinned for DMA", but true means "probably
1934	* pinned for DMA, but possibly a false positive due to having at least
1935	* GUP_PIN_COUNTING_BIAS worth of normal folio references".
1936	*
1937	* False positives are OK, because: a) it's unlikely for a folio to
1938	* get that many refcounts, and b) all the callers of this routine are
1939	* expected to be able to deal gracefully with a false positive.
1940	*
1941	* For most large folios, the result will be exactly correct. That's because
1942	* we have more tracking data available: the _pincount field is used
1943	* instead of the GUP_PIN_COUNTING_BIAS scheme.
1944	*
1945	* For more information, please see Documentation/core-api/pin_user_pages.rst.
1946	*
1947	* Return: True, if it is likely that the folio has been "dma-pinned".
1948	* False, if the folio is definitely not dma-pinned.
1949	*/
1950	static inline bool folio_maybe_dma_pinned(struct folio *folio)
1951	{
1952	if (folio_has_pincount(folio))
1953	return atomic_read(v: &folio->_pincount) > `0`;
1954
1955	/*
1956	* folio_ref_count() is signed. If that refcount overflows, then
1957	* folio_ref_count() returns a negative value, and callers will avoid
1958	* further incrementing the refcount.
1959	*
1960	* Here, for that overflow case, use the sign bit to count a little
1961	* bit higher via unsigned math, and thus still get an accurate result.
1962	*/
1963	return ((unsigned int)folio_ref_count(folio)) >=
1964	GUP_PIN_COUNTING_BIAS;
1965	}
1966
1967	/*
1968	* This should most likely only be called during fork() to see whether we
1969	* should break the cow immediately for an anon page on the src mm.
1970	*
1971	* The caller has to hold the PT lock and the vma->vm_mm->->write_protect_seq.
1972	*/
1973	static inline bool folio_needs_cow_for_dma(struct vm_area_struct *vma,
1974	struct folio *folio)
1975	{
1976	VM_BUG_ON(!(raw_read_seqcount(&vma->vm_mm->write_protect_seq) & `1`));
1977
1978	if (!mm_flags_test(MMF_HAS_PINNED, mm: vma->vm_mm))
1979	return false;
1980
1981	return folio_maybe_dma_pinned(folio);
1982	}
1983
1984	/**
1985	* is_zero_page - Query if a page is a zero page
1986	* @page: The page to query
1987	*
1988	* This returns true if @page is one of the permanent zero pages.
1989	*/
1990	static inline bool is_zero_page(const struct page *page)
1991	{
1992	return is_zero_pfn(page_to_pfn(page));
1993	}
1994
1995	/**
1996	* is_zero_folio - Query if a folio is a zero page
1997	* @folio: The folio to query
1998	*
1999	* This returns true if @folio is one of the permanent zero pages.
2000	*/
2001	static inline bool is_zero_folio(const struct folio *folio)
2002	{
2003	return is_zero_page(page: &folio->page);
2004	}
2005
2006	/ MIGRATE_CMA and ZONE_MOVABLE do not allow pin folios /
2007	#ifdef CONFIG_MIGRATION
2008	static inline bool folio_is_longterm_pinnable(struct folio *folio)
2009	{
2010	#ifdef CONFIG_CMA
2011	int mt = folio_migratetype(folio);
2012
2013	if (mt == MIGRATE_CMA \|\| mt == MIGRATE_ISOLATE)
2014	return false;
2015	#endif
2016	/ The zero page can be "pinned" but gets special handling. /
2017	if (is_zero_folio(folio))
2018	return true;
2019
2020	/ Coherent device memory must always allow eviction. /
2021	if (folio_is_device_coherent(folio))
2022	return false;
2023
2024	/*
2025	* Filesystems can only tolerate transient delays to truncate and
2026	* hole-punch operations
2027	*/
2028	if (folio_is_fsdax(folio))
2029	return false;
2030
2031	/ Otherwise, non-movable zone folios can be pinned. /
2032	return !folio_is_zone_movable(folio);
2033
2034	}
2035	#else
2036	static inline bool folio_is_longterm_pinnable(struct folio *folio)
2037	{
2038	return true;
2039	}
2040	#endif
2041
2042	static inline void set_page_zone(struct page page, enum* zone_type zone)
2043	{
2044	page->flags.f &= ~(ZONES_MASK << ZONES_PGSHIFT);
2045	page->flags.f \|= (zone & ZONES_MASK) << ZONES_PGSHIFT;
2046	}
2047
2048	static inline void set_page_node(struct page page, unsigned* long node)
2049	{
2050	page->flags.f &= ~(NODES_MASK << NODES_PGSHIFT);
2051	page->flags.f \|= (node & NODES_MASK) << NODES_PGSHIFT;
2052	}
2053
2054	static inline void set_page_links(struct page page, enum* zone_type zone,
2055	unsigned long node, unsigned long pfn)
2056	{
2057	set_page_zone(page, zone);
2058	set_page_node(page, node);
2059	#ifdef SECTION_IN_PAGE_FLAGS
2060	set_page_section(page, pfn_to_section_nr(pfn));
2061	#endif
2062	}
2063
2064	/**
2065	* folio_nr_pages - The number of pages in the folio.
2066	* @folio: The folio.
2067	*
2068	* Return: A positive power of two.
2069	*/
2070	static inline unsigned long folio_nr_pages(const struct folio *folio)
2071	{
2072	if (!folio_test_large(folio))
2073	return `1`;
2074	return folio_large_nr_pages(folio);
2075	}
2076
2077	#if !defined(CONFIG_ARCH_HAS_GIGANTIC_PAGE)
2078	/*
2079	* We don't expect any folios that exceed buddy sizes (and consequently
2080	* memory sections).
2081	*/
2082	#define MAX_FOLIO_ORDER MAX_PAGE_ORDER
2083	#elif defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP)
2084	/*
2085	* Only pages within a single memory section are guaranteed to be
2086	* contiguous. By limiting folios to a single memory section, all folio
2087	* pages are guaranteed to be contiguous.
2088	*/
2089	#define MAX_FOLIO_ORDER PFN_SECTION_SHIFT
2090	#else
2091	/*
2092	* There is no real limit on the folio size. We limit them to the maximum we
2093	* currently expect (e.g., hugetlb, dax).
2094	*/
2095	#define MAX_FOLIO_ORDER PUD_ORDER
2096	#endif
2097
2098	#define MAX_FOLIO_NR_PAGES (1UL << MAX_FOLIO_ORDER)
2099
2100	/*
2101	* compound_nr() returns the number of pages in this potentially compound
2102	* page. compound_nr() can be called on a tail page, and is defined to
2103	* return 1 in that case.
2104	*/
2105	static inline unsigned long compound_nr(const struct page *page)
2106	{
2107	const struct folio folio = (struct* folio *)page;
2108
2109	if (!test_bit(PG_head, &folio->flags.f))
2110	return `1`;
2111	return folio_large_nr_pages(folio);
2112	}
2113
2114	/**
2115	* folio_next - Move to the next physical folio.
2116	* @folio: The folio we're currently operating on.
2117	*
2118	* If you have physically contiguous memory which may span more than
2119	* one folio (eg a &struct bio_vec), use this function to move from one
2120	* folio to the next. Do not use it if the memory is only virtually
2121	* contiguous as the folios are almost certainly not adjacent to each
2122	* other. This is the folio equivalent to writing ``page++``.
2123	*
2124	* Context: We assume that the folios are refcounted and/or locked at a
2125	* higher level and do not adjust the reference counts.
2126	* Return: The next struct folio.
2127	*/
2128	static inline struct folio folio_next(struct* folio *folio)
2129	{
2130	return (struct folio *)folio_page(folio, folio_nr_pages(folio));
2131	}
2132
2133	/**
2134	* folio_shift - The size of the memory described by this folio.
2135	* @folio: The folio.
2136	*
2137	* A folio represents a number of bytes which is a power-of-two in size.
2138	* This function tells you which power-of-two the folio is. See also
2139	* folio_size() and folio_order().
2140	*
2141	* Context: The caller should have a reference on the folio to prevent
2142	* it from being split. It is not necessary for the folio to be locked.
2143	* Return: The base-2 logarithm of the size of this folio.
2144	*/
2145	static inline unsigned int folio_shift(const struct folio *folio)
2146	{
2147	return PAGE_SHIFT + folio_order(folio);
2148	}
2149
2150	/**
2151	* folio_size - The number of bytes in a folio.
2152	* @folio: The folio.
2153	*
2154	* Context: The caller should have a reference on the folio to prevent
2155	* it from being split. It is not necessary for the folio to be locked.
2156	* Return: The number of bytes in this folio.
2157	*/
2158	static inline size_t folio_size(const struct folio *folio)
2159	{
2160	return PAGE_SIZE << folio_order(folio);
2161	}
2162
2163	/**
2164	* folio_maybe_mapped_shared - Whether the folio is mapped into the page
2165	* tables of more than one MM
2166	* @folio: The folio.
2167	*
2168	* This function checks if the folio maybe currently mapped into more than one
2169	* MM ("maybe mapped shared"), or if the folio is certainly mapped into a single
2170	* MM ("mapped exclusively").
2171	*
2172	* For KSM folios, this function also returns "mapped shared" when a folio is
2173	* mapped multiple times into the same MM, because the individual page mappings
2174	* are independent.
2175	*
2176	* For small anonymous folios and anonymous hugetlb folios, the return
2177	* value will be exactly correct: non-KSM folios can only be mapped at most once
2178	* into an MM, and they cannot be partially mapped. KSM folios are
2179	* considered shared even if mapped multiple times into the same MM.
2180	*
2181	* For other folios, the result can be fuzzy:
2182	* #. For partially-mappable large folios (THP), the return value can wrongly
2183	* indicate "mapped shared" (false positive) if a folio was mapped by
2184	* more than two MMs at one point in time.
2185	* #. For pagecache folios (including hugetlb), the return value can wrongly
2186	* indicate "mapped shared" (false positive) when two VMAs in the same MM
2187	* cover the same file range.
2188	*
2189	* Further, this function only considers current page table mappings that
2190	* are tracked using the folio mapcount(s).
2191	*
2192	* This function does not consider:
2193	* #. If the folio might get mapped in the (near) future (e.g., swapcache,
2194	* pagecache, temporary unmapping for migration).
2195	* #. If the folio is mapped differently (VM_PFNMAP).
2196	* #. If hugetlb page table sharing applies. Callers might want to check
2197	* hugetlb_pmd_shared().
2198	*
2199	* Return: Whether the folio is estimated to be mapped into more than one MM.
2200	*/
2201	static inline bool folio_maybe_mapped_shared(struct folio *folio)
2202	{
2203	int mapcount = folio_mapcount(folio);
2204
2205	/ Only partially-mappable folios require more care. /
2206	if (!folio_test_large(folio) \|\| unlikely(folio_test_hugetlb(folio)))
2207	return mapcount > `1`;
2208
2209	/*
2210	* vm_insert_page() without CONFIG_TRANSPARENT_HUGEPAGE ...
2211	* simply assume "mapped shared", nobody should really care
2212	* about this for arbitrary kernel allocations.
2213	*/
2214	if (!IS_ENABLED(CONFIG_MM_ID))
2215	return true;
2216
2217	/*
2218	* A single mapping implies "mapped exclusively", even if the
2219	* folio flag says something different: it's easier to handle this
2220	* case here instead of on the RMAP hot path.
2221	*/
2222	if (mapcount <= `1`)
2223	return false;
2224	return test_bit(FOLIO_MM_IDS_SHARED_BITNUM, &folio->_mm_ids);
2225	}
2226
2227	/**
2228	* folio_expected_ref_count - calculate the expected folio refcount
2229	* @folio: the folio
2230	*
2231	* Calculate the expected folio refcount, taking references from the pagecache,
2232	* swapcache, PG_private and page table mappings into account. Useful in
2233	* combination with folio_ref_count() to detect unexpected references (e.g.,
2234	* GUP or other temporary references).
2235	*
2236	* Does currently not consider references from the LRU cache. If the folio
2237	* was isolated from the LRU (which is the case during migration or split),
2238	* the LRU cache does not apply.
2239	*
2240	* Calling this function on an unmapped folio -- !folio_mapped() -- that is
2241	* locked will return a stable result.
2242	*
2243	* Calling this function on a mapped folio will not result in a stable result,
2244	* because nothing stops additional page table mappings from coming (e.g.,
2245	* fork()) or going (e.g., munmap()).
2246	*
2247	* Calling this function without the folio lock will also not result in a
2248	* stable result: for example, the folio might get dropped from the swapcache
2249	* concurrently.
2250	*
2251	* However, even when called without the folio lock or on a mapped folio,
2252	* this function can be used to detect unexpected references early (for example,
2253	* if it makes sense to even lock the folio and unmap it).
2254	*
2255	* The caller must add any reference (e.g., from folio_try_get()) it might be
2256	* holding itself to the result.
2257	*
2258	* Returns the expected folio refcount.
2259	*/
2260	static inline int folio_expected_ref_count(const struct folio *folio)
2261	{
2262	const int order = folio_order(folio);
2263	int ref_count = `0`;
2264
2265	if (WARN_ON_ONCE(page_has_type(&folio->page) && !folio_test_hugetlb(folio)))
2266	return `0`;
2267
2268	if (folio_test_anon(folio)) {
2269	/ One reference per page from the swapcache. /
2270	ref_count += folio_test_swapcache(folio) << order;
2271	} else {
2272	/ One reference per page from the pagecache. /
2273	ref_count += !!folio->mapping << order;
2274	/ One reference from PG_private. /
2275	ref_count += folio_test_private(folio);
2276	}
2277
2278	/ One reference per page table mapping. /
2279	return ref_count + folio_mapcount(folio);
2280	}
2281
2282	#ifndef HAVE_ARCH_MAKE_FOLIO_ACCESSIBLE
2283	static inline int arch_make_folio_accessible(struct folio *folio)
2284	{
2285	return `0`;
2286	}
2287	#endif
2288
2289	/*
2290	* Some inline functions in vmstat.h depend on page_zone()
2291	*/
2292	#include <linux/vmstat.h>
2293
2294	#if defined(CONFIG_HIGHMEM) && !defined(WANT_PAGE_VIRTUAL)
2295	#define HASHED_PAGE_VIRTUAL
2296	#endif
2297
2298	#if defined(WANT_PAGE_VIRTUAL)
2299	static inline void page_address(const* struct page *page)
2300	{
2301	return page->virtual;
2302	}
2303	static inline void set_page_address(struct page page, void* *address)
2304	{
2305	page->virtual = address;
2306	}
2307	#define page_address_init() do { } while(0)
2308	#endif
2309
2310	#if defined(HASHED_PAGE_VIRTUAL)
2311	void page_address(const* struct page *page);
2312	void set_page_address(struct page page, void* *virtual);
2313	void page_address_init(void);
2314	#endif
2315
2316	static __always_inline void lowmem_page_address(const* struct page *page)
2317	{
2318	return page_to_virt(page);
2319	}
2320
2321	#if !defined(HASHED_PAGE_VIRTUAL) && !defined(WANT_PAGE_VIRTUAL)
2322	#define page_address(page) lowmem_page_address(page)
2323	#define set_page_address(page, address) do { } while(0)
2324	#define page_address_init() do { } while(0)
2325	#endif
2326
2327	static inline void folio_address(const* struct folio *folio)
2328	{
2329	return page_address(&folio->page);
2330	}
2331
2332	/*
2333	* Return true only if the page has been allocated with
2334	* ALLOC_NO_WATERMARKS and the low watermark was not
2335	* met implying that the system is under some pressure.
2336	*/
2337	static inline bool page_is_pfmemalloc(const struct page *page)
2338	{
2339	/*
2340	* lru.next has bit 1 set if the page is allocated from the
2341	* pfmemalloc reserves. Callers may simply overwrite it if
2342	* they do not need to preserve that information.
2343	*/
2344	return (uintptr_t)page->lru.next & BIT(`1`);
2345	}
2346
2347	/*
2348	* Return true only if the folio has been allocated with
2349	* ALLOC_NO_WATERMARKS and the low watermark was not
2350	* met implying that the system is under some pressure.
2351	*/
2352	static inline bool folio_is_pfmemalloc(const struct folio *folio)
2353	{
2354	/*
2355	* lru.next has bit 1 set if the page is allocated from the
2356	* pfmemalloc reserves. Callers may simply overwrite it if
2357	* they do not need to preserve that information.
2358	*/
2359	return (uintptr_t)folio->lru.next & BIT(`1`);
2360	}
2361
2362	/*
2363	* Only to be called by the page allocator on a freshly allocated
2364	* page.
2365	*/
2366	static inline void set_page_pfmemalloc(struct page *page)
2367	{
2368	page->lru.next = (void *)BIT(`1`);
2369	}
2370
2371	static inline void clear_page_pfmemalloc(struct page *page)
2372	{
2373	page->lru.next = NULL;
2374	}
2375
2376	/*
2377	* Can be called by the pagefault handler when it gets a VM_FAULT_OOM.
2378	*/
2379	extern void pagefault_out_of_memory(void);
2380
2381	#define offset_in_page(p) ((unsigned long)(p) & ~PAGE_MASK)
2382	#define offset_in_folio(folio, p) ((unsigned long)(p) & (folio_size(folio) - 1))
2383
2384	/*
2385	* Parameter block passed down to zap_pte_range in exceptional cases.
2386	*/
2387	struct zap_details {
2388	struct folio single_folio; /* Locked folio to be unmapped /
2389	bool even_cows; / Zap COWed private pages too? /
2390	bool reclaim_pt; / Need reclaim page tables? /
2391	zap_flags_t zap_flags; / Extra flags for zapping /
2392	};
2393
2394	/*
2395	* Whether to drop the pte markers, for example, the uffd-wp information for
2396	* file-backed memory. This should only be specified when we will completely
2397	* drop the page in the mm, either by truncation or unmapping of the vma. By
2398	* default, the flag is not set.
2399	*/
2400	#define ZAP_FLAG_DROP_MARKER ((__force zap_flags_t) BIT(0))
2401	/ Set in unmap_vmas() to indicate a final unmap call. Only used by hugetlb /
2402	#define ZAP_FLAG_UNMAP ((__force zap_flags_t) BIT(1))
2403
2404	#ifdef CONFIG_SCHED_MM_CID
2405	void sched_mm_cid_before_execve(struct task_struct *t);
2406	void sched_mm_cid_after_execve(struct task_struct *t);
2407	void sched_mm_cid_fork(struct task_struct *t);
2408	void sched_mm_cid_exit_signals(struct task_struct *t);
2409	static inline int task_mm_cid(struct task_struct *t)
2410	{
2411	return t->mm_cid;
2412	}
2413	#else
2414	static inline void sched_mm_cid_before_execve(struct task_struct *t) { }
2415	static inline void sched_mm_cid_after_execve(struct task_struct *t) { }
2416	static inline void sched_mm_cid_fork(struct task_struct *t) { }
2417	static inline void sched_mm_cid_exit_signals(struct task_struct *t) { }
2418	static inline int task_mm_cid(struct task_struct *t)
2419	{
2420	/*
2421	* Use the processor id as a fall-back when the mm cid feature is
2422	* disabled. This provides functional per-cpu data structure accesses
2423	* in user-space, althrough it won't provide the memory usage benefits.
2424	*/
2425	return raw_smp_processor_id();
2426	}
2427	#endif
2428
2429	#ifdef CONFIG_MMU
2430	extern bool can_do_mlock(void);
2431	#else
2432	static inline bool can_do_mlock(void) { return false; }
2433	#endif
2434	extern int user_shm_lock(size_t, struct ucounts *);
2435	extern void user_shm_unlock(size_t, struct ucounts *);
2436
2437	struct folio vm_normal_folio(struct* vm_area_struct vma, unsigned* long addr,
2438	pte_t pte);
2439	struct page vm_normal_page(struct* vm_area_struct vma, unsigned* long addr,
2440	pte_t pte);
2441	struct folio vm_normal_folio_pmd(struct* vm_area_struct *vma,
2442	unsigned long addr, pmd_t pmd);
2443	struct page vm_normal_page_pmd(struct* vm_area_struct vma, unsigned* long addr,
2444	pmd_t pmd);
2445	struct page vm_normal_page_pud(struct* vm_area_struct vma, unsigned* long addr,
2446	pud_t pud);
2447
2448	void zap_vma_ptes(struct vm_area_struct vma, unsigned* long address,
2449	unsigned long size);
2450	void zap_page_range_single(struct vm_area_struct vma, unsigned* long address,
2451	unsigned long size, struct zap_details *details);
2452	static inline void zap_vma_pages(struct vm_area_struct *vma)
2453	{
2454	zap_page_range_single(vma, address: vma->vm_start,
2455	size: vma->vm_end - vma->vm_start, NULL);
2456	}
2457	void unmap_vmas(struct mmu_gather tlb, struct* ma_state *mas,
2458	struct vm_area_struct start_vma, unsigned* long start,
2459	unsigned long end, unsigned long tree_end, bool mm_wr_locked);
2460
2461	struct mmu_notifier_range;
2462
2463	void free_pgd_range(struct mmu_gather tlb, unsigned* long addr,
2464	unsigned long end, unsigned long floor, unsigned long ceiling);
2465	int
2466	copy_page_range(struct vm_area_struct dst_vma, struct* vm_area_struct *src_vma);
2467	int generic_access_phys(struct vm_area_struct vma, unsigned* long addr,
2468	void buf, int* len, int write);
2469
2470	struct follow_pfnmap_args {
2471	/**
2472	* Inputs:
2473	* @vma: Pointer to @vm_area_struct struct
2474	* @address: the virtual address to walk
2475	*/
2476	struct vm_area_struct *vma;
2477	unsigned long address;
2478	/**
2479	* Internals:
2480	*
2481	* The caller shouldn't touch any of these.
2482	*/
2483	spinlock_t *lock;
2484	pte_t *ptep;
2485	/**
2486	* Outputs:
2487	*
2488	* @pfn: the PFN of the address
2489	* @addr_mask: address mask covering pfn
2490	* @pgprot: the pgprot_t of the mapping
2491	* @writable: whether the mapping is writable
2492	* @special: whether the mapping is a special mapping (real PFN maps)
2493	*/
2494	unsigned long pfn;
2495	unsigned long addr_mask;
2496	pgprot_t pgprot;
2497	bool writable;
2498	bool special;
2499	};
2500	int follow_pfnmap_start(struct follow_pfnmap_args *args);
2501	void follow_pfnmap_end(struct follow_pfnmap_args *args);
2502
2503	extern void truncate_pagecache(struct inode *inode, loff_t new);
2504	extern void truncate_setsize(struct inode *inode, loff_t newsize);
2505	void pagecache_isize_extended(struct inode *inode, loff_t from, loff_t to);
2506	void truncate_pagecache_range(struct inode *inode, loff_t offset, loff_t end);
2507	int generic_error_remove_folio(struct address_space *mapping,
2508	struct folio *folio);
2509
2510	struct vm_area_struct lock_mm_and_find_vma(struct* mm_struct *mm,
2511	unsigned long address, struct pt_regs *regs);
2512
2513	#ifdef CONFIG_MMU
2514	extern vm_fault_t handle_mm_fault(struct vm_area_struct *vma,
2515	unsigned long address, unsigned int flags,
2516	struct pt_regs *regs);
2517	extern int fixup_user_fault(struct mm_struct *mm,
2518	unsigned long address, unsigned int fault_flags,
2519	bool *unlocked);
2520	void unmap_mapping_pages(struct address_space *mapping,
2521	pgoff_t start, pgoff_t nr, bool even_cows);
2522	void unmap_mapping_range(struct address_space *mapping,
2523	loff_t const holebegin, loff_t const holelen, int even_cows);
2524	#else
2525	static inline vm_fault_t handle_mm_fault(struct vm_area_struct *vma,
2526	unsigned long address, unsigned int flags,
2527	struct pt_regs *regs)
2528	{
2529	/ should never happen if there's no MMU /
2530	BUG();
2531	return VM_FAULT_SIGBUS;
2532	}
2533	static inline int fixup_user_fault(struct mm_struct mm, unsigned* long address,
2534	unsigned int fault_flags, bool *unlocked)
2535	{
2536	/ should never happen if there's no MMU /
2537	BUG();
2538	return -EFAULT;
2539	}
2540	static inline void unmap_mapping_pages(struct address_space *mapping,
2541	pgoff_t start, pgoff_t nr, bool even_cows) { }
2542	static inline void unmap_mapping_range(struct address_space *mapping,
2543	loff_t const holebegin, loff_t const holelen, int even_cows) { }
2544	#endif
2545
2546	static inline void unmap_shared_mapping_range(struct address_space *mapping,
2547	loff_t const holebegin, loff_t const holelen)
2548	{
2549	unmap_mapping_range(mapping, holebegin, holelen, even_cows: `0`);
2550	}
2551
2552	static inline struct vm_area_struct vma_lookup(struct* mm_struct *mm,
2553	unsigned long addr);
2554
2555	extern int access_process_vm(struct task_struct tsk, unsigned* long addr,
2556	void buf, int* len, unsigned int gup_flags);
2557	extern int access_remote_vm(struct mm_struct mm, unsigned* long addr,
2558	void buf, int* len, unsigned int gup_flags);
2559
2560	#ifdef CONFIG_BPF_SYSCALL
2561	extern int copy_remote_vm_str(struct task_struct tsk, unsigned* long addr,
2562	void buf, int* len, unsigned int gup_flags);
2563	#endif
2564
2565	long get_user_pages_remote(struct mm_struct *mm,
2566	unsigned long start, unsigned long nr_pages,
2567	unsigned int gup_flags, struct page **pages,
2568	int *locked);
2569	long pin_user_pages_remote(struct mm_struct *mm,
2570	unsigned long start, unsigned long nr_pages,
2571	unsigned int gup_flags, struct page **pages,
2572	int *locked);
2573
2574	/*
2575	* Retrieves a single page alongside its VMA. Does not support FOLL_NOWAIT.
2576	*/
2577	static inline struct page get_user_page_vma_remote(struct* mm_struct *mm,
2578	unsigned long addr,
2579	int gup_flags,
2580	struct vm_area_struct **vmap)
2581	{
2582	struct page *page;
2583	struct vm_area_struct *vma;
2584	int got;
2585
2586	if (WARN_ON_ONCE(unlikely(gup_flags & FOLL_NOWAIT)))
2587	return ERR_PTR(error: -EINVAL);
2588
2589	got = get_user_pages_remote(mm, start: addr, nr_pages: `1`, gup_flags, pages: &page, NULL);
2590
2591	if (got < `0`)
2592	return ERR_PTR(error: got);
2593
2594	vma = vma_lookup(mm, addr);
2595	if (WARN_ON_ONCE(!vma)) {
2596	put_page(page);
2597	return ERR_PTR(error: -EINVAL);
2598	}
2599
2600	*vmap = vma;
2601	return page;
2602	}
2603
2604	long get_user_pages(unsigned long start, unsigned long nr_pages,
2605	unsigned int gup_flags, struct page **pages);
2606	long pin_user_pages(unsigned long start, unsigned long nr_pages,
2607	unsigned int gup_flags, struct page **pages);
2608	long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
2609	struct page *pages, unsigned* int gup_flags);
2610	long pin_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
2611	struct page *pages, unsigned* int gup_flags);
2612	long memfd_pin_folios(struct file *memfd, loff_t start, loff_t end,
2613	struct folio *folios, unsigned* int max_folios,
2614	pgoff_t *offset);
2615	int folio_add_pins(struct folio folio, unsigned* int pins);
2616
2617	int get_user_pages_fast(unsigned long start, int nr_pages,
2618	unsigned int gup_flags, struct page **pages);
2619	int pin_user_pages_fast(unsigned long start, int nr_pages,
2620	unsigned int gup_flags, struct page **pages);
2621	void folio_add_pin(struct folio *folio);
2622
2623	int account_locked_vm(struct mm_struct mm, unsigned* long pages, bool inc);
2624	int __account_locked_vm(struct mm_struct mm, unsigned* long pages, bool inc,
2625	const struct task_struct *task, bool bypass_rlim);
2626
2627	struct kvec;
2628	struct page get_dump_page(unsigned* long addr, int *locked);
2629
2630	bool folio_mark_dirty(struct folio *folio);
2631	bool folio_mark_dirty_lock(struct folio *folio);
2632	bool set_page_dirty(struct page *page);
2633	int set_page_dirty_lock(struct page *page);
2634
2635	int get_cmdline(struct task_struct task, char* buffer, int* buflen);
2636
2637	/*
2638	* Flags used by change_protection(). For now we make it a bitmap so
2639	* that we can pass in multiple flags just like parameters. However
2640	* for now all the callers are only use one of the flags at the same
2641	* time.
2642	*/
2643	/*
2644	* Whether we should manually check if we can map individual PTEs writable,
2645	* because something (e.g., COW, uffd-wp) blocks that from happening for all
2646	* PTEs automatically in a writable mapping.
2647	*/
2648	#define MM_CP_TRY_CHANGE_WRITABLE (1UL << 0)
2649	/ Whether this protection change is for NUMA hints /
2650	#define MM_CP_PROT_NUMA (1UL << 1)
2651	/ Whether this change is for write protecting /
2652	#define MM_CP_UFFD_WP (1UL << 2) /* do wp */
2653	#define MM_CP_UFFD_WP_RESOLVE (1UL << 3) /* Resolve wp */
2654	#define MM_CP_UFFD_WP_ALL (MM_CP_UFFD_WP \| \
2655	MM_CP_UFFD_WP_RESOLVE)
2656
2657	bool can_change_pte_writable(struct vm_area_struct vma, unsigned* long addr,
2658	pte_t pte);
2659	extern long change_protection(struct mmu_gather *tlb,
2660	struct vm_area_struct vma, unsigned* long start,
2661	unsigned long end, unsigned long cp_flags);
2662	extern int mprotect_fixup(struct vma_iterator vmi, struct* mmu_gather *tlb,
2663	struct vm_area_struct vma, struct* vm_area_struct **pprev,
2664	unsigned long start, unsigned long end, vm_flags_t newflags);
2665
2666	/*
2667	* doesn't attempt to fault and will return short.
2668	*/
2669	int get_user_pages_fast_only(unsigned long start, int nr_pages,
2670	unsigned int gup_flags, struct page **pages);
2671
2672	static inline bool get_user_page_fast_only(unsigned long addr,
2673	unsigned int gup_flags, struct page **pagep)
2674	{
2675	return get_user_pages_fast_only(start: addr, nr_pages: `1`, gup_flags, pages: pagep) == `1`;
2676	}
2677	/*
2678	* per-process(per-mm_struct) statistics.
2679	*/
2680	static inline unsigned long get_mm_counter(struct mm_struct mm, int* member)
2681	{
2682	return percpu_counter_read_positive(fbc: &mm->rss_stat[member]);
2683	}
2684
2685	static inline unsigned long get_mm_counter_sum(struct mm_struct mm, int* member)
2686	{
2687	return percpu_counter_sum_positive(fbc: &mm->rss_stat[member]);
2688	}
2689
2690	void mm_trace_rss_stat(struct mm_struct mm, int* member);
2691
2692	static inline void add_mm_counter(struct mm_struct mm, int* member, long value)
2693	{
2694	percpu_counter_add(fbc: &mm->rss_stat[member], amount: value);
2695
2696	mm_trace_rss_stat(mm, member);
2697	}
2698
2699	static inline void inc_mm_counter(struct mm_struct mm, int* member)
2700	{
2701	percpu_counter_inc(fbc: &mm->rss_stat[member]);
2702
2703	mm_trace_rss_stat(mm, member);
2704	}
2705
2706	static inline void dec_mm_counter(struct mm_struct mm, int* member)
2707	{
2708	percpu_counter_dec(fbc: &mm->rss_stat[member]);
2709
2710	mm_trace_rss_stat(mm, member);
2711	}
2712
2713	/ Optimized variant when folio is already known not to be anon /
2714	static inline int mm_counter_file(struct folio *folio)
2715	{
2716	if (folio_test_swapbacked(folio))
2717	return MM_SHMEMPAGES;
2718	return MM_FILEPAGES;
2719	}
2720
2721	static inline int mm_counter(struct folio *folio)
2722	{
2723	if (folio_test_anon(folio))
2724	return MM_ANONPAGES;
2725	return mm_counter_file(folio);
2726	}
2727
2728	static inline unsigned long get_mm_rss(struct mm_struct *mm)
2729	{
2730	return get_mm_counter(mm, member: MM_FILEPAGES) +
2731	get_mm_counter(mm, member: MM_ANONPAGES) +
2732	get_mm_counter(mm, member: MM_SHMEMPAGES);
2733	}
2734
2735	static inline unsigned long get_mm_hiwater_rss(struct mm_struct *mm)
2736	{
2737	return max(mm->hiwater_rss, get_mm_rss(mm));
2738	}
2739
2740	static inline unsigned long get_mm_hiwater_vm(struct mm_struct *mm)
2741	{
2742	return max(mm->hiwater_vm, mm->total_vm);
2743	}
2744
2745	static inline void update_hiwater_rss(struct mm_struct *mm)
2746	{
2747	unsigned long _rss = get_mm_rss(mm);
2748
2749	if (data_race(mm->hiwater_rss) < _rss)
2750	data_race(mm->hiwater_rss = _rss);
2751	}
2752
2753	static inline void update_hiwater_vm(struct mm_struct *mm)
2754	{
2755	if (mm->hiwater_vm < mm->total_vm)
2756	mm->hiwater_vm = mm->total_vm;
2757	}
2758
2759	static inline void reset_mm_hiwater_rss(struct mm_struct *mm)
2760	{
2761	mm->hiwater_rss = get_mm_rss(mm);
2762	}
2763
2764	static inline void setmax_mm_hiwater_rss(unsigned long *maxrss,
2765	struct mm_struct *mm)
2766	{
2767	unsigned long hiwater_rss = get_mm_hiwater_rss(mm);
2768
2769	if (*maxrss < hiwater_rss)
2770	*maxrss = hiwater_rss;
2771	}
2772
2773	#ifndef CONFIG_ARCH_HAS_PTE_SPECIAL
2774	static inline int pte_special(pte_t pte)
2775	{
2776	return `0`;
2777	}
2778
2779	static inline pte_t pte_mkspecial(pte_t pte)
2780	{
2781	return pte;
2782	}
2783	#endif
2784
2785	#ifndef CONFIG_ARCH_SUPPORTS_PMD_PFNMAP
2786	static inline bool pmd_special(pmd_t pmd)
2787	{
2788	return false;
2789	}
2790
2791	static inline pmd_t pmd_mkspecial(pmd_t pmd)
2792	{
2793	return pmd;
2794	}
2795	#endif /* CONFIG_ARCH_SUPPORTS_PMD_PFNMAP */
2796
2797	#ifndef CONFIG_ARCH_SUPPORTS_PUD_PFNMAP
2798	static inline bool pud_special(pud_t pud)
2799	{
2800	return false;
2801	}
2802
2803	static inline pud_t pud_mkspecial(pud_t pud)
2804	{
2805	return pud;
2806	}
2807	#endif /* CONFIG_ARCH_SUPPORTS_PUD_PFNMAP */
2808
2809	extern pte_t __get_locked_pte(struct* mm_struct mm, unsigned* long addr,
2810	spinlock_t **ptl);
2811	static inline pte_t get_locked_pte(struct* mm_struct mm, unsigned* long addr,
2812	spinlock_t **ptl)
2813	{
2814	pte_t *ptep;
2815	__cond_lock(*ptl, ptep = __get_locked_pte(mm, addr, ptl));
2816	return ptep;
2817	}
2818
2819	#ifdef __PAGETABLE_P4D_FOLDED
2820	static inline int __p4d_alloc(struct mm_struct mm, pgd_t pgd,
2821	unsigned long address)
2822	{
2823	return `0`;
2824	}
2825	#else
2826	int __p4d_alloc(struct mm_struct mm, pgd_t pgd, unsigned long address);
2827	#endif
2828
2829	#if defined(__PAGETABLE_PUD_FOLDED) \|\| !defined(CONFIG_MMU)
2830	static inline int __pud_alloc(struct mm_struct mm, p4d_t p4d,
2831	unsigned long address)
2832	{
2833	return `0`;
2834	}
2835	static inline void mm_inc_nr_puds(struct mm_struct *mm) {}
2836	static inline void mm_dec_nr_puds(struct mm_struct *mm) {}
2837
2838	#else
2839	int __pud_alloc(struct mm_struct mm, p4d_t p4d, unsigned long address);
2840
2841	static inline void mm_inc_nr_puds(struct mm_struct *mm)
2842	{
2843	if (mm_pud_folded(mm))
2844	return;
2845	atomic_long_add(PTRS_PER_PUD * sizeof(pud_t), v: &mm->pgtables_bytes);
2846	}
2847
2848	static inline void mm_dec_nr_puds(struct mm_struct *mm)
2849	{
2850	if (mm_pud_folded(mm))
2851	return;
2852	atomic_long_sub(PTRS_PER_PUD * sizeof(pud_t), v: &mm->pgtables_bytes);
2853	}
2854	#endif
2855
2856	#if defined(__PAGETABLE_PMD_FOLDED) \|\| !defined(CONFIG_MMU)
2857	static inline int __pmd_alloc(struct mm_struct mm, pud_t pud,
2858	unsigned long address)
2859	{
2860	return `0`;
2861	}
2862
2863	static inline void mm_inc_nr_pmds(struct mm_struct *mm) {}
2864	static inline void mm_dec_nr_pmds(struct mm_struct *mm) {}
2865
2866	#else
2867	int __pmd_alloc(struct mm_struct mm, pud_t pud, unsigned long address);
2868
2869	static inline void mm_inc_nr_pmds(struct mm_struct *mm)
2870	{
2871	if (mm_pmd_folded(mm))
2872	return;
2873	atomic_long_add(PTRS_PER_PMD * sizeof(pmd_t), v: &mm->pgtables_bytes);
2874	}
2875
2876	static inline void mm_dec_nr_pmds(struct mm_struct *mm)
2877	{
2878	if (mm_pmd_folded(mm))
2879	return;
2880	atomic_long_sub(PTRS_PER_PMD * sizeof(pmd_t), v: &mm->pgtables_bytes);
2881	}
2882	#endif
2883
2884	#ifdef CONFIG_MMU
2885	static inline void mm_pgtables_bytes_init(struct mm_struct *mm)
2886	{
2887	atomic_long_set(v: &mm->pgtables_bytes, i: `0`);
2888	}
2889
2890	static inline unsigned long mm_pgtables_bytes(const struct mm_struct *mm)
2891	{
2892	return atomic_long_read(v: &mm->pgtables_bytes);
2893	}
2894
2895	static inline void mm_inc_nr_ptes(struct mm_struct *mm)
2896	{
2897	atomic_long_add(PTRS_PER_PTE * sizeof(pte_t), v: &mm->pgtables_bytes);
2898	}
2899
2900	static inline void mm_dec_nr_ptes(struct mm_struct *mm)
2901	{
2902	atomic_long_sub(PTRS_PER_PTE * sizeof(pte_t), v: &mm->pgtables_bytes);
2903	}
2904	#else
2905
2906	static inline void mm_pgtables_bytes_init(struct mm_struct *mm) {}
2907	static inline unsigned long mm_pgtables_bytes(const struct mm_struct *mm)
2908	{
2909	return `0`;
2910	}
2911
2912	static inline void mm_inc_nr_ptes(struct mm_struct *mm) {}
2913	static inline void mm_dec_nr_ptes(struct mm_struct *mm) {}
2914	#endif
2915
2916	int __pte_alloc(struct mm_struct mm, pmd_t pmd);
2917	int __pte_alloc_kernel(pmd_t *pmd);
2918
2919	#if defined(CONFIG_MMU)
2920
2921	static inline p4d_t p4d_alloc(struct* mm_struct mm, pgd_t pgd,
2922	unsigned long address)
2923	{
2924	return (unlikely(pgd_none(*pgd)) && __p4d_alloc(mm, pgd, address)) ?
2925	NULL : p4d_offset(pgd, address);
2926	}
2927
2928	static inline pud_t pud_alloc(struct* mm_struct mm, p4d_t p4d,
2929	unsigned long address)
2930	{
2931	return (unlikely(p4d_none(*p4d)) && __pud_alloc(mm, p4d, address)) ?
2932	NULL : pud_offset(p4d, address);
2933	}
2934
2935	static inline pmd_t pmd_alloc(struct* mm_struct mm, pud_t pud, unsigned long address)
2936	{
2937	return (unlikely(pud_none(*pud)) && __pmd_alloc(mm, pud, address))?
2938	NULL: pmd_offset(pud, address);
2939	}
2940	#endif /* CONFIG_MMU */
2941
2942	enum pt_flags {
2943	PT_reserved = PG_reserved,
2944	/ High bits are used for zone/node/section /
2945	};
2946
2947	static inline struct ptdesc virt_to_ptdesc(const* void *x)
2948	{
2949	return page_ptdesc(virt_to_page(x));
2950	}
2951
2952	/**
2953	* ptdesc_address - Virtual address of page table.
2954	* @pt: Page table descriptor.
2955	*
2956	* Return: The first byte of the page table described by @pt.
2957	*/
2958	static inline void ptdesc_address(const* struct ptdesc *pt)
2959	{
2960	return folio_address(ptdesc_folio(pt));
2961	}
2962
2963	static inline bool pagetable_is_reserved(struct ptdesc *pt)
2964	{
2965	return test_bit(PT_reserved, &pt->pt_flags.f);
2966	}
2967
2968	/**
2969	* pagetable_alloc - Allocate pagetables
2970	* @gfp: GFP flags
2971	* @order: desired pagetable order
2972	*
2973	* pagetable_alloc allocates memory for page tables as well as a page table
2974	* descriptor to describe that memory.
2975	*
2976	* Return: The ptdesc describing the allocated page tables.
2977	*/
2978	static inline struct ptdesc pagetable_alloc_noprof(gfp_t gfp, unsigned* int order)
2979	{
2980	struct page *page = alloc_pages_noprof(gfp: gfp \| __GFP_COMP, order);
2981
2982	return page_ptdesc(page);
2983	}
2984	#define pagetable_alloc(...) alloc_hooks(pagetable_alloc_noprof(__VA_ARGS__))
2985
2986	/**
2987	* pagetable_free - Free pagetables
2988	* @pt: The page table descriptor
2989	*
2990	* pagetable_free frees the memory of all page tables described by a page
2991	* table descriptor and the memory for the descriptor itself.
2992	*/
2993	static inline void pagetable_free(struct ptdesc *pt)
2994	{
2995	struct page *page = ptdesc_page(pt);
2996
2997	__free_pages(page, order: compound_order(page));
2998	}
2999
3000	#if defined(CONFIG_SPLIT_PTE_PTLOCKS)
3001	#if ALLOC_SPLIT_PTLOCKS
3002	void __init ptlock_cache_init(void);
3003	bool ptlock_alloc(struct ptdesc *ptdesc);
3004	void ptlock_free(struct ptdesc *ptdesc);
3005
3006	static inline spinlock_t ptlock_ptr(struct* ptdesc *ptdesc)
3007	{
3008	return ptdesc->ptl;
3009	}
3010	#else /* ALLOC_SPLIT_PTLOCKS */
3011	static inline void ptlock_cache_init(void)
3012	{
3013	}
3014
3015	static inline bool ptlock_alloc(struct ptdesc *ptdesc)
3016	{
3017	return true;
3018	}
3019
3020	static inline void ptlock_free(struct ptdesc *ptdesc)
3021	{
3022	}
3023
3024	static inline spinlock_t ptlock_ptr(struct* ptdesc *ptdesc)
3025	{
3026	return &ptdesc->ptl;
3027	}
3028	#endif /* ALLOC_SPLIT_PTLOCKS */
3029
3030	static inline spinlock_t pte_lockptr(struct* mm_struct mm, pmd_t pmd)
3031	{
3032	return ptlock_ptr(page_ptdesc(pmd_page(*pmd)));
3033	}
3034
3035	static inline spinlock_t ptep_lockptr(struct* mm_struct mm, pte_t pte)
3036	{
3037	BUILD_BUG_ON(IS_ENABLED(CONFIG_HIGHPTE));
3038	BUILD_BUG_ON(MAX_PTRS_PER_PTE * sizeof(pte_t) > PAGE_SIZE);
3039	return ptlock_ptr(ptdesc: virt_to_ptdesc(x: pte));
3040	}
3041
3042	static inline bool ptlock_init(struct ptdesc *ptdesc)
3043	{
3044	/*
3045	* prep_new_page() initialize page->private (and therefore page->ptl)
3046	* with 0. Make sure nobody took it in use in between.
3047	*
3048	* It can happen if arch try to use slab for page table allocation:
3049	* slab code uses page->slab_cache, which share storage with page->ptl.
3050	*/
3051	VM_BUG_ON_PAGE((unsigned* long *)&ptdesc->ptl, ptdesc_page(ptdesc));
3052	if (!ptlock_alloc(ptdesc))
3053	return false;
3054	spin_lock_init(ptlock_ptr(ptdesc));
3055	return true;
3056	}
3057
3058	#else /* !defined(CONFIG_SPLIT_PTE_PTLOCKS) */
3059	/*
3060	* We use mm->page_table_lock to guard all pagetable pages of the mm.
3061	*/
3062	static inline spinlock_t pte_lockptr(struct* mm_struct mm, pmd_t pmd)
3063	{
3064	return &mm->page_table_lock;
3065	}
3066	static inline spinlock_t ptep_lockptr(struct* mm_struct mm, pte_t pte)
3067	{
3068	return &mm->page_table_lock;
3069	}
3070	static inline void ptlock_cache_init(void) {}
3071	static inline bool ptlock_init(struct ptdesc ptdesc) { return* true; }
3072	static inline void ptlock_free(struct ptdesc *ptdesc) {}
3073	#endif /* defined(CONFIG_SPLIT_PTE_PTLOCKS) */
3074
3075	static inline unsigned long ptdesc_nr_pages(const struct ptdesc *ptdesc)
3076	{
3077	return compound_nr(ptdesc_page(ptdesc));
3078	}
3079
3080	static inline void __pagetable_ctor(struct ptdesc *ptdesc)
3081	{
3082	pg_data_t *pgdat = NODE_DATA(memdesc_nid(ptdesc->pt_flags));
3083
3084	__SetPageTable(ptdesc_page(ptdesc));
3085	mod_node_page_state(pgdat, NR_PAGETABLE, ptdesc_nr_pages(ptdesc));
3086	}
3087
3088	static inline void pagetable_dtor(struct ptdesc *ptdesc)
3089	{
3090	pg_data_t *pgdat = NODE_DATA(memdesc_nid(ptdesc->pt_flags));
3091
3092	ptlock_free(ptdesc);
3093	__ClearPageTable(ptdesc_page(ptdesc));
3094	mod_node_page_state(pgdat, NR_PAGETABLE, -ptdesc_nr_pages(ptdesc));
3095	}
3096
3097	static inline void pagetable_dtor_free(struct ptdesc *ptdesc)
3098	{
3099	pagetable_dtor(ptdesc);
3100	pagetable_free(pt: ptdesc);
3101	}
3102
3103	static inline bool pagetable_pte_ctor(struct mm_struct *mm,
3104	struct ptdesc *ptdesc)
3105	{
3106	if (mm != &init_mm && !ptlock_init(ptdesc))
3107	return false;
3108	__pagetable_ctor(ptdesc);
3109	return true;
3110	}
3111
3112	pte_t ___pte_offset_map(pmd_t pmd, unsigned long addr, pmd_t *pmdvalp);
3113	static inline pte_t __pte_offset_map(pmd_t pmd, unsigned long addr,
3114	pmd_t *pmdvalp)
3115	{
3116	pte_t *pte;
3117
3118	__cond_lock(RCU, pte = ___pte_offset_map(pmd, addr, pmdvalp));
3119	return pte;
3120	}
3121	static inline pte_t pte_offset_map(pmd_t pmd, unsigned long addr)
3122	{
3123	return __pte_offset_map(pmd, addr, NULL);
3124	}
3125
3126	pte_t __pte_offset_map_lock(struct* mm_struct mm, pmd_t pmd,
3127	unsigned long addr, spinlock_t **ptlp);
3128	static inline pte_t pte_offset_map_lock(struct* mm_struct mm, pmd_t pmd,
3129	unsigned long addr, spinlock_t **ptlp)
3130	{
3131	pte_t *pte;
3132
3133	__cond_lock(RCU, __cond_lock(*ptlp,
3134	pte = __pte_offset_map_lock(mm, pmd, addr, ptlp)));
3135	return pte;
3136	}
3137
3138	pte_t pte_offset_map_ro_nolock(struct* mm_struct mm, pmd_t pmd,
3139	unsigned long addr, spinlock_t **ptlp);
3140	pte_t pte_offset_map_rw_nolock(struct* mm_struct mm, pmd_t pmd,
3141	unsigned long addr, pmd_t *pmdvalp,
3142	spinlock_t **ptlp);
3143
3144	#define pte_unmap_unlock(pte, ptl) do { \
3145	spin_unlock(ptl); \
3146	pte_unmap(pte); \
3147	} while (0)
3148
3149	#define pte_alloc(mm, pmd) (unlikely(pmd_none(*(pmd))) && __pte_alloc(mm, pmd))
3150
3151	#define pte_alloc_map(mm, pmd, address) \
3152	(pte_alloc(mm, pmd) ? NULL : pte_offset_map(pmd, address))
3153
3154	#define pte_alloc_map_lock(mm, pmd, address, ptlp) \
3155	(pte_alloc(mm, pmd) ? \
3156	NULL : pte_offset_map_lock(mm, pmd, address, ptlp))
3157
3158	#define pte_alloc_kernel(pmd, address) \
3159	((unlikely(pmd_none(*(pmd))) && __pte_alloc_kernel(pmd))? \
3160	NULL: pte_offset_kernel(pmd, address))
3161
3162	#if defined(CONFIG_SPLIT_PMD_PTLOCKS)
3163
3164	static inline struct page pmd_pgtable_page(pmd_t pmd)
3165	{
3166	unsigned long mask = ~(PTRS_PER_PMD * sizeof(pmd_t) - `1`);
3167	return virt_to_page((void )((unsigned* long) pmd & mask));
3168	}
3169
3170	static inline struct ptdesc pmd_ptdesc(pmd_t pmd)
3171	{
3172	return page_ptdesc(pmd_pgtable_page(pmd));
3173	}
3174
3175	static inline spinlock_t pmd_lockptr(struct* mm_struct mm, pmd_t pmd)
3176	{
3177	return ptlock_ptr(ptdesc: pmd_ptdesc(pmd));
3178	}
3179
3180	static inline bool pmd_ptlock_init(struct ptdesc *ptdesc)
3181	{
3182	#ifdef CONFIG_TRANSPARENT_HUGEPAGE
3183	ptdesc->pmd_huge_pte = NULL;
3184	#endif
3185	return ptlock_init(ptdesc);
3186	}
3187
3188	#define pmd_huge_pte(mm, pmd) (pmd_ptdesc(pmd)->pmd_huge_pte)
3189
3190	#else
3191
3192	static inline spinlock_t pmd_lockptr(struct* mm_struct mm, pmd_t pmd)
3193	{
3194	return &mm->page_table_lock;
3195	}
3196
3197	static inline bool pmd_ptlock_init(struct ptdesc ptdesc) { return* true; }
3198
3199	#define pmd_huge_pte(mm, pmd) ((mm)->pmd_huge_pte)
3200
3201	#endif
3202
3203	static inline spinlock_t pmd_lock(struct* mm_struct mm, pmd_t pmd)
3204	{
3205	spinlock_t *ptl = pmd_lockptr(mm, pmd);
3206	spin_lock(lock: ptl);
3207	return ptl;
3208	}
3209
3210	static inline bool pagetable_pmd_ctor(struct mm_struct *mm,
3211	struct ptdesc *ptdesc)
3212	{
3213	if (mm != &init_mm && !pmd_ptlock_init(ptdesc))
3214	return false;
3215	ptdesc_pmd_pts_init(ptdesc);
3216	__pagetable_ctor(ptdesc);
3217	return true;
3218	}
3219
3220	/*
3221	* No scalability reason to split PUD locks yet, but follow the same pattern
3222	* as the PMD locks to make it easier if we decide to. The VM should not be
3223	* considered ready to switch to split PUD locks yet; there may be places
3224	* which need to be converted from page_table_lock.
3225	*/
3226	static inline spinlock_t pud_lockptr(struct* mm_struct mm, pud_t pud)
3227	{
3228	return &mm->page_table_lock;
3229	}
3230
3231	static inline spinlock_t pud_lock(struct* mm_struct mm, pud_t pud)
3232	{
3233	spinlock_t *ptl = pud_lockptr(mm, pud);
3234
3235	spin_lock(lock: ptl);
3236	return ptl;
3237	}
3238
3239	static inline void pagetable_pud_ctor(struct ptdesc *ptdesc)
3240	{
3241	__pagetable_ctor(ptdesc);
3242	}
3243
3244	static inline void pagetable_p4d_ctor(struct ptdesc *ptdesc)
3245	{
3246	__pagetable_ctor(ptdesc);
3247	}
3248
3249	static inline void pagetable_pgd_ctor(struct ptdesc *ptdesc)
3250	{
3251	__pagetable_ctor(ptdesc);
3252	}
3253
3254	extern void __init pagecache_init(void);
3255	extern void free_initmem(void);
3256
3257	/*
3258	* Free reserved pages within range [PAGE_ALIGN(start), end & PAGE_MASK)
3259	* into the buddy system. The freed pages will be poisoned with pattern
3260	* "poison" if it's within range [0, UCHAR_MAX].
3261	* Return pages freed into the buddy system.
3262	*/
3263	extern unsigned long free_reserved_area(void start, void* *end,
3264	int poison, const char *s);
3265
3266	extern void adjust_managed_page_count(struct page page, long* count);
3267
3268	extern void reserve_bootmem_region(phys_addr_t start,
3269	phys_addr_t end, int nid);
3270
3271	/ Free the reserved page into the buddy system, so it gets managed. /
3272	void free_reserved_page(struct page *page);
3273
3274	static inline void mark_page_reserved(struct page *page)
3275	{
3276	SetPageReserved(page);
3277	adjust_managed_page_count(page, count: -`1`);
3278	}
3279
3280	static inline void free_reserved_ptdesc(struct ptdesc *pt)
3281	{
3282	free_reserved_page(ptdesc_page(pt));
3283	}
3284
3285	/*
3286	* Default method to free all the __init memory into the buddy system.
3287	* The freed pages will be poisoned with pattern "poison" if it's within
3288	* range [0, UCHAR_MAX].
3289	* Return pages freed into the buddy system.
3290	*/
3291	static inline unsigned long free_initmem_default(int poison)
3292	{
3293	extern char __init_begin[], __init_end[];
3294
3295	return free_reserved_area(start: &__init_begin, end: &__init_end,
3296	poison, s: "unused kernel image (initmem)");
3297	}
3298
3299	static inline unsigned long get_num_physpages(void)
3300	{
3301	int nid;
3302	unsigned long phys_pages = `0`;
3303
3304	for_each_online_node(nid)
3305	phys_pages += node_present_pages(nid);
3306
3307	return phys_pages;
3308	}
3309
3310	/*
3311	* Using memblock node mappings, an architecture may initialise its
3312	* zones, allocate the backing mem_map and account for memory holes in an
3313	* architecture independent manner.
3314	*
3315	* An architecture is expected to register range of page frames backed by
3316	* physical memory with memblock_add[_node]() before calling
3317	* free_area_init() passing in the PFN each zone ends at. At a basic
3318	* usage, an architecture is expected to do something like
3319	*
3320	* unsigned long max_zone_pfns[MAX_NR_ZONES] = {max_dma, max_normal_pfn,
3321	* max_highmem_pfn};
3322	* for_each_valid_physical_page_range()
3323	* memblock_add_node(base, size, nid, MEMBLOCK_NONE)
3324	* free_area_init(max_zone_pfns);
3325	*/
3326	void free_area_init(unsigned long *max_zone_pfn);
3327	unsigned long node_map_pfn_alignment(void);
3328	extern unsigned long absent_pages_in_range(unsigned long start_pfn,
3329	unsigned long end_pfn);
3330	extern void get_pfn_range_for_nid(unsigned int nid,
3331	unsigned long start_pfn, unsigned* long *end_pfn);
3332
3333	#ifndef CONFIG_NUMA
3334	static inline int early_pfn_to_nid(unsigned long pfn)
3335	{
3336	return `0`;
3337	}
3338	#else
3339	/ please see mm/page_alloc.c /
3340	extern int __meminit early_pfn_to_nid(unsigned long pfn);
3341	#endif
3342
3343	extern void mem_init(void);
3344	extern void __init mmap_init(void);
3345
3346	extern void __show_mem(unsigned int flags, nodemask_t nodemask, int* max_zone_idx);
3347	static inline void show_mem(void)
3348	{
3349	__show_mem(flags: `0`, NULL, MAX_NR_ZONES - `1`);
3350	}
3351	extern long si_mem_available(void);
3352	extern void si_meminfo(struct sysinfo * val);
3353	extern void si_meminfo_node(struct sysinfo val, int* nid);
3354
3355	extern __printf(`3`, `4`)
3356	void warn_alloc(gfp_t gfp_mask, nodemask_t nodemask, const* char *fmt, ...);
3357
3358	extern void setup_per_cpu_pageset(void);
3359
3360	/ nommu.c /
3361	extern atomic_long_t mmap_pages_allocated;
3362	extern int nommu_shrink_inode_mappings(struct inode *, size_t, size_t);
3363
3364	/ interval_tree.c /
3365	void vma_interval_tree_insert(struct vm_area_struct *node,
3366	struct rb_root_cached *root);
3367	void vma_interval_tree_insert_after(struct vm_area_struct *node,
3368	struct vm_area_struct *prev,
3369	struct rb_root_cached *root);
3370	void vma_interval_tree_remove(struct vm_area_struct *node,
3371	struct rb_root_cached *root);
3372	struct vm_area_struct vma_interval_tree_iter_first(struct* rb_root_cached *root,
3373	unsigned long start, unsigned long last);
3374	struct vm_area_struct vma_interval_tree_iter_next(struct* vm_area_struct *node,
3375	unsigned long start, unsigned long last);
3376
3377	#define vma_interval_tree_foreach(vma, root, start, last) \
3378	for (vma = vma_interval_tree_iter_first(root, start, last); \
3379	vma; vma = vma_interval_tree_iter_next(vma, start, last))
3380
3381	void anon_vma_interval_tree_insert(struct anon_vma_chain *node,
3382	struct rb_root_cached *root);
3383	void anon_vma_interval_tree_remove(struct anon_vma_chain *node,
3384	struct rb_root_cached *root);
3385	struct anon_vma_chain *
3386	anon_vma_interval_tree_iter_first(struct rb_root_cached *root,
3387	unsigned long start, unsigned long last);
3388	struct anon_vma_chain *anon_vma_interval_tree_iter_next(
3389	struct anon_vma_chain node, unsigned* long start, unsigned long last);
3390	#ifdef CONFIG_DEBUG_VM_RB
3391	void anon_vma_interval_tree_verify(struct anon_vma_chain *node);
3392	#endif
3393
3394	#define anon_vma_interval_tree_foreach(avc, root, start, last) \
3395	for (avc = anon_vma_interval_tree_iter_first(root, start, last); \
3396	avc; avc = anon_vma_interval_tree_iter_next(avc, start, last))
3397
3398	/ mmap.c /
3399	extern int __vm_enough_memory(const struct mm_struct mm, long* pages, int cap_sys_admin);
3400	extern int insert_vm_struct(struct mm_struct , struct* vm_area_struct *);
3401	extern void exit_mmap(struct mm_struct *);
3402	bool mmap_read_lock_maybe_expand(struct mm_struct mm, struct* vm_area_struct *vma,
3403	unsigned long addr, bool write);
3404
3405	static inline int check_data_rlimit(unsigned long rlim,
3406	unsigned long new,
3407	unsigned long start,
3408	unsigned long end_data,
3409	unsigned long start_data)
3410	{
3411	if (rlim < RLIM_INFINITY) {
3412	if (((new - start) + (end_data - start_data)) > rlim)
3413	return -ENOSPC;
3414	}
3415
3416	return `0`;
3417	}
3418
3419	extern int mm_take_all_locks(struct mm_struct *mm);
3420	extern void mm_drop_all_locks(struct mm_struct *mm);
3421
3422	extern int set_mm_exe_file(struct mm_struct mm, struct* file *new_exe_file);
3423	extern int replace_mm_exe_file(struct mm_struct mm, struct* file *new_exe_file);
3424	extern struct file get_mm_exe_file(struct* mm_struct *mm);
3425	extern struct file get_task_exe_file(struct* task_struct *task);
3426
3427	extern bool may_expand_vm(struct mm_struct , vm_flags_t, unsigned* long npages);
3428	extern void vm_stat_account(struct mm_struct , vm_flags_t, long* npages);
3429
3430	extern bool vma_is_special_mapping(const struct vm_area_struct *vma,
3431	const struct vm_special_mapping *sm);
3432	struct vm_area_struct _install_special_mapping(struct* mm_struct *mm,
3433	unsigned long addr, unsigned long len,
3434	vm_flags_t vm_flags,
3435	const struct vm_special_mapping *spec);
3436
3437	unsigned long randomize_stack_top(unsigned long stack_top);
3438	unsigned long randomize_page(unsigned long start, unsigned long range);
3439
3440	unsigned long
3441	__get_unmapped_area(struct file file, unsigned* long addr, unsigned long len,
3442	unsigned long pgoff, unsigned long flags, vm_flags_t vm_flags);
3443
3444	static inline unsigned long
3445	get_unmapped_area(struct file file, unsigned* long addr, unsigned long len,
3446	unsigned long pgoff, unsigned long flags)
3447	{
3448	return __get_unmapped_area(file, addr, len, pgoff, flags, vm_flags: `0`);
3449	}
3450
3451	extern unsigned long do_mmap(struct file file, unsigned* long addr,
3452	unsigned long len, unsigned long prot, unsigned long flags,
3453	vm_flags_t vm_flags, unsigned long pgoff, unsigned long *populate,
3454	struct list_head *uf);
3455	extern int do_vmi_munmap(struct vma_iterator vmi, struct* mm_struct *mm,
3456	unsigned long start, size_t len, struct list_head *uf,
3457	bool unlock);
3458	int do_vmi_align_munmap(struct vma_iterator vmi, struct* vm_area_struct *vma,
3459	struct mm_struct mm, unsigned* long start,
3460	unsigned long end, struct list_head *uf, bool unlock);
3461	extern int do_munmap(struct mm_struct , unsigned* long, size_t,
3462	struct list_head *uf);
3463	extern int do_madvise(struct mm_struct mm, unsigned* long start, size_t len_in, int behavior);
3464
3465	#ifdef CONFIG_MMU
3466	extern int __mm_populate(unsigned long addr, unsigned long len,
3467	int ignore_errors);
3468	static inline void mm_populate(unsigned long addr, unsigned long len)
3469	{
3470	/ Ignore errors /
3471	(void) __mm_populate(addr, len, ignore_errors: `1`);
3472	}
3473	#else
3474	static inline void mm_populate(unsigned long addr, unsigned long len) {}
3475	#endif
3476
3477	/ This takes the mm semaphore itself /
3478	extern int __must_check vm_brk_flags(unsigned long, unsigned long, unsigned long);
3479	extern int vm_munmap(unsigned long, size_t);
3480	extern unsigned long __must_check vm_mmap(struct file , unsigned* long,
3481	unsigned long, unsigned long,
3482	unsigned long, unsigned long);
3483
3484	struct vm_unmapped_area_info {
3485	#define VM_UNMAPPED_AREA_TOPDOWN 1
3486	unsigned long flags;
3487	unsigned long length;
3488	unsigned long low_limit;
3489	unsigned long high_limit;
3490	unsigned long align_mask;
3491	unsigned long align_offset;
3492	unsigned long start_gap;
3493	};
3494
3495	extern unsigned long vm_unmapped_area(struct vm_unmapped_area_info *info);
3496
3497	/ truncate.c /
3498	extern void truncate_inode_pages(struct address_space *, loff_t);
3499	extern void truncate_inode_pages_range(struct address_space *,
3500	loff_t lstart, loff_t lend);
3501	extern void truncate_inode_pages_final(struct address_space *);
3502
3503	/ generic vm_area_ops exported for stackable file systems /
3504	extern vm_fault_t filemap_fault(struct vm_fault *vmf);
3505	extern vm_fault_t filemap_map_pages(struct vm_fault *vmf,
3506	pgoff_t start_pgoff, pgoff_t end_pgoff);
3507	extern vm_fault_t filemap_page_mkwrite(struct vm_fault *vmf);
3508
3509	extern unsigned long stack_guard_gap;
3510	/ Generic expand stack which grows the stack according to GROWS{UP,DOWN} /
3511	int expand_stack_locked(struct vm_area_struct vma, unsigned* long address);
3512	struct vm_area_struct expand_stack(struct* mm_struct * mm, unsigned long addr);
3513
3514	/ Look up the first VMA which satisfies addr < vm_end, NULL if none. /
3515	extern struct vm_area_struct * find_vma(struct mm_struct * mm, unsigned long addr);
3516	extern struct vm_area_struct * find_vma_prev(struct mm_struct * mm, unsigned long addr,
3517	struct vm_area_struct **pprev);
3518
3519	/*
3520	* Look up the first VMA which intersects the interval [start_addr, end_addr)
3521	* NULL if none. Assume start_addr < end_addr.
3522	*/
3523	struct vm_area_struct find_vma_intersection(struct* mm_struct *mm,
3524	unsigned long start_addr, unsigned long end_addr);
3525
3526	/**
3527	* vma_lookup() - Find a VMA at a specific address
3528	* @mm: The process address space.
3529	* @addr: The user address.
3530	*
3531	* Return: The vm_area_struct at the given address, %NULL otherwise.
3532	*/
3533	static inline
3534	struct vm_area_struct vma_lookup(struct* mm_struct mm, unsigned* long addr)
3535	{
3536	return mtree_load(mt: &mm->mm_mt, index: addr);
3537	}
3538
3539	static inline unsigned long stack_guard_start_gap(const struct vm_area_struct *vma)
3540	{
3541	if (vma->vm_flags & VM_GROWSDOWN)
3542	return stack_guard_gap;
3543
3544	/ See reasoning around the VM_SHADOW_STACK definition /
3545	if (vma->vm_flags & VM_SHADOW_STACK)
3546	return PAGE_SIZE;
3547
3548	return `0`;
3549	}
3550
3551	static inline unsigned long vm_start_gap(const struct vm_area_struct *vma)
3552	{
3553	unsigned long gap = stack_guard_start_gap(vma);
3554	unsigned long vm_start = vma->vm_start;
3555
3556	vm_start -= gap;
3557	if (vm_start > vma->vm_start)
3558	vm_start = `0`;
3559	return vm_start;
3560	}
3561
3562	static inline unsigned long vm_end_gap(const struct vm_area_struct *vma)
3563	{
3564	unsigned long vm_end = vma->vm_end;
3565
3566	if (vma->vm_flags & VM_GROWSUP) {
3567	vm_end += stack_guard_gap;
3568	if (vm_end < vma->vm_end)
3569	vm_end = -PAGE_SIZE;
3570	}
3571	return vm_end;
3572	}
3573
3574	static inline unsigned long vma_pages(const struct vm_area_struct *vma)
3575	{
3576	return (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
3577	}
3578
3579	/ Look up the first VMA which exactly match the interval vm_start ... vm_end /
3580	static inline struct vm_area_struct find_exact_vma(struct* mm_struct *mm,
3581	unsigned long vm_start, unsigned long vm_end)
3582	{
3583	struct vm_area_struct *vma = vma_lookup(mm, addr: vm_start);
3584
3585	if (vma && (vma->vm_start != vm_start \|\| vma->vm_end != vm_end))
3586	vma = NULL;
3587
3588	return vma;
3589	}
3590
3591	static inline bool range_in_vma(const struct vm_area_struct *vma,
3592	unsigned long start, unsigned long end)
3593	{
3594	return (vma && vma->vm_start <= start && end <= vma->vm_end);
3595	}
3596
3597	#ifdef CONFIG_MMU
3598	pgprot_t vm_get_page_prot(vm_flags_t vm_flags);
3599	void vma_set_page_prot(struct vm_area_struct *vma);
3600	#else
3601	static inline pgprot_t vm_get_page_prot(vm_flags_t vm_flags)
3602	{
3603	return __pgprot(`0`);
3604	}
3605	static inline void vma_set_page_prot(struct vm_area_struct *vma)
3606	{
3607	vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
3608	}
3609	#endif
3610
3611	void vma_set_file(struct vm_area_struct vma, struct* file *file);
3612
3613	#ifdef CONFIG_NUMA_BALANCING
3614	unsigned long change_prot_numa(struct vm_area_struct *vma,
3615	unsigned long start, unsigned long end);
3616	#endif
3617
3618	struct vm_area_struct find_extend_vma_locked(struct* mm_struct *,
3619	unsigned long addr);
3620	int remap_pfn_range(struct vm_area_struct , unsigned* long addr,
3621	unsigned long pfn, unsigned long size, pgprot_t);
3622	int remap_pfn_range_notrack(struct vm_area_struct vma, unsigned* long addr,
3623	unsigned long pfn, unsigned long size, pgprot_t prot);
3624	int vm_insert_page(struct vm_area_struct , unsigned* long addr, struct page *);
3625	int vm_insert_pages(struct vm_area_struct vma, unsigned* long addr,
3626	struct page *pages, unsigned* long *num);
3627	int vm_map_pages(struct vm_area_struct vma, struct* page **pages,
3628	unsigned long num);
3629	int vm_map_pages_zero(struct vm_area_struct vma, struct* page **pages,
3630	unsigned long num);
3631	vm_fault_t vmf_insert_page_mkwrite(struct vm_fault vmf, struct* page *page,
3632	bool write);
3633	vm_fault_t vmf_insert_pfn(struct vm_area_struct vma, unsigned* long addr,
3634	unsigned long pfn);
3635	vm_fault_t vmf_insert_pfn_prot(struct vm_area_struct vma, unsigned* long addr,
3636	unsigned long pfn, pgprot_t pgprot);
3637	vm_fault_t vmf_insert_mixed(struct vm_area_struct vma, unsigned* long addr,
3638	unsigned long pfn);
3639	vm_fault_t vmf_insert_mixed_mkwrite(struct vm_area_struct *vma,
3640	unsigned long addr, unsigned long pfn);
3641	int vm_iomap_memory(struct vm_area_struct vma, phys_addr_t start, unsigned* long len);
3642
3643	static inline vm_fault_t vmf_insert_page(struct vm_area_struct *vma,
3644	unsigned long addr, struct page *page)
3645	{
3646	int err = vm_insert_page(vma, addr, page);
3647
3648	if (err == -ENOMEM)
3649	return VM_FAULT_OOM;
3650	if (err < `0` && err != -EBUSY)
3651	return VM_FAULT_SIGBUS;
3652
3653	return VM_FAULT_NOPAGE;
3654	}
3655
3656	#ifndef io_remap_pfn_range
3657	static inline int io_remap_pfn_range(struct vm_area_struct *vma,
3658	unsigned long addr, unsigned long pfn,
3659	unsigned long size, pgprot_t prot)
3660	{
3661	return remap_pfn_range(vma, addr, pfn, size, pgprot_decrypted(prot));
3662	}
3663	#endif
3664
3665	static inline vm_fault_t vmf_error(int err)
3666	{
3667	if (err == -ENOMEM)
3668	return VM_FAULT_OOM;
3669	else if (err == -EHWPOISON)
3670	return VM_FAULT_HWPOISON;
3671	return VM_FAULT_SIGBUS;
3672	}
3673
3674	/*
3675	* Convert errno to return value for ->page_mkwrite() calls.
3676	*
3677	* This should eventually be merged with vmf_error() above, but will need a
3678	* careful audit of all vmf_error() callers.
3679	*/
3680	static inline vm_fault_t vmf_fs_error(int err)
3681	{
3682	if (err == `0`)
3683	return VM_FAULT_LOCKED;
3684	if (err == -EFAULT \|\| err == -EAGAIN)
3685	return VM_FAULT_NOPAGE;
3686	if (err == -ENOMEM)
3687	return VM_FAULT_OOM;
3688	/ -ENOSPC, -EDQUOT, -EIO ... /
3689	return VM_FAULT_SIGBUS;
3690	}
3691
3692	static inline int vm_fault_to_errno(vm_fault_t vm_fault, int foll_flags)
3693	{
3694	if (vm_fault & VM_FAULT_OOM)
3695	return -ENOMEM;
3696	if (vm_fault & (VM_FAULT_HWPOISON \| VM_FAULT_HWPOISON_LARGE))
3697	return (foll_flags & FOLL_HWPOISON) ? -EHWPOISON : -EFAULT;
3698	if (vm_fault & (VM_FAULT_SIGBUS \| VM_FAULT_SIGSEGV))
3699	return -EFAULT;
3700	return `0`;
3701	}
3702
3703	/*
3704	* Indicates whether GUP can follow a PROT_NONE mapped page, or whether
3705	* a (NUMA hinting) fault is required.
3706	*/
3707	static inline bool gup_can_follow_protnone(const struct vm_area_struct *vma,
3708	unsigned int flags)
3709	{
3710	/*
3711	* If callers don't want to honor NUMA hinting faults, no need to
3712	* determine if we would actually have to trigger a NUMA hinting fault.
3713	*/
3714	if (!(flags & FOLL_HONOR_NUMA_FAULT))
3715	return true;
3716
3717	/*
3718	* NUMA hinting faults don't apply in inaccessible (PROT_NONE) VMAs.
3719	*
3720	* Requiring a fault here even for inaccessible VMAs would mean that
3721	* FOLL_FORCE cannot make any progress, because handle_mm_fault()
3722	* refuses to process NUMA hinting faults in inaccessible VMAs.
3723	*/
3724	return !vma_is_accessible(vma);
3725	}
3726
3727	typedef int (pte_fn_t)(pte_t pte, unsigned long addr, void *data);
3728	extern int apply_to_page_range(struct mm_struct mm, unsigned* long address,
3729	unsigned long size, pte_fn_t fn, void *data);
3730	extern int apply_to_existing_page_range(struct mm_struct *mm,
3731	unsigned long address, unsigned long size,
3732	pte_fn_t fn, void *data);
3733
3734	#ifdef CONFIG_PAGE_POISONING
3735	extern void __kernel_poison_pages(struct page page, int* numpages);
3736	extern void __kernel_unpoison_pages(struct page page, int* numpages);
3737	extern bool _page_poisoning_enabled_early;
3738	DECLARE_STATIC_KEY_FALSE(_page_poisoning_enabled);
3739	static inline bool page_poisoning_enabled(void)
3740	{
3741	return _page_poisoning_enabled_early;
3742	}
3743	/*
3744	* For use in fast paths after init_mem_debugging() has run, or when a
3745	* false negative result is not harmful when called too early.
3746	*/
3747	static inline bool page_poisoning_enabled_static(void)
3748	{
3749	return static_branch_unlikely(&_page_poisoning_enabled);
3750	}
3751	static inline void kernel_poison_pages(struct page page, int* numpages)
3752	{
3753	if (page_poisoning_enabled_static())
3754	__kernel_poison_pages(page, numpages);
3755	}
3756	static inline void kernel_unpoison_pages(struct page page, int* numpages)
3757	{
3758	if (page_poisoning_enabled_static())
3759	__kernel_unpoison_pages(page, numpages);
3760	}
3761	#else
3762	static inline bool page_poisoning_enabled(void) { return false; }
3763	static inline bool page_poisoning_enabled_static(void) { return false; }
3764	static inline void __kernel_poison_pages(struct page page, int* nunmpages) { }
3765	static inline void kernel_poison_pages(struct page page, int* numpages) { }
3766	static inline void kernel_unpoison_pages(struct page page, int* numpages) { }
3767	#endif
3768
3769	DECLARE_STATIC_KEY_MAYBE(CONFIG_INIT_ON_ALLOC_DEFAULT_ON, init_on_alloc);
3770	static inline bool want_init_on_alloc(gfp_t flags)
3771	{
3772	if (static_branch_maybe(CONFIG_INIT_ON_ALLOC_DEFAULT_ON,
3773	&init_on_alloc))
3774	return true;
3775	return flags & __GFP_ZERO;
3776	}
3777
3778	DECLARE_STATIC_KEY_MAYBE(CONFIG_INIT_ON_FREE_DEFAULT_ON, init_on_free);
3779	static inline bool want_init_on_free(void)
3780	{
3781	return static_branch_maybe(CONFIG_INIT_ON_FREE_DEFAULT_ON,
3782	&init_on_free);
3783	}
3784
3785	extern bool _debug_pagealloc_enabled_early;
3786	DECLARE_STATIC_KEY_FALSE(_debug_pagealloc_enabled);
3787
3788	static inline bool debug_pagealloc_enabled(void)
3789	{
3790	return IS_ENABLED(CONFIG_DEBUG_PAGEALLOC) &&
3791	_debug_pagealloc_enabled_early;
3792	}
3793
3794	/*
3795	* For use in fast paths after mem_debugging_and_hardening_init() has run,
3796	* or when a false negative result is not harmful when called too early.
3797	*/
3798	static inline bool debug_pagealloc_enabled_static(void)
3799	{
3800	if (!IS_ENABLED(CONFIG_DEBUG_PAGEALLOC))
3801	return false;
3802
3803	return static_branch_unlikely(&_debug_pagealloc_enabled);
3804	}
3805
3806	/*
3807	* To support DEBUG_PAGEALLOC architecture must ensure that
3808	* __kernel_map_pages() never fails
3809	*/
3810	extern void __kernel_map_pages(struct page page, int* numpages, int enable);
3811	#ifdef CONFIG_DEBUG_PAGEALLOC
3812	static inline void debug_pagealloc_map_pages(struct page page, int* numpages)
3813	{
3814	if (debug_pagealloc_enabled_static())
3815	__kernel_map_pages(page, numpages, `1`);
3816	}
3817
3818	static inline void debug_pagealloc_unmap_pages(struct page page, int* numpages)
3819	{
3820	if (debug_pagealloc_enabled_static())
3821	__kernel_map_pages(page, numpages, `0`);
3822	}
3823
3824	extern unsigned int _debug_guardpage_minorder;
3825	DECLARE_STATIC_KEY_FALSE(_debug_guardpage_enabled);
3826
3827	static inline unsigned int debug_guardpage_minorder(void)
3828	{
3829	return _debug_guardpage_minorder;
3830	}
3831
3832	static inline bool debug_guardpage_enabled(void)
3833	{
3834	return static_branch_unlikely(&_debug_guardpage_enabled);
3835	}
3836
3837	static inline bool page_is_guard(const struct page *page)
3838	{
3839	if (!debug_guardpage_enabled())
3840	return false;
3841
3842	return PageGuard(page);
3843	}
3844
3845	bool __set_page_guard(struct zone zone, struct* page page, unsigned* int order);
3846	static inline bool set_page_guard(struct zone zone, struct* page *page,
3847	unsigned int order)
3848	{
3849	if (!debug_guardpage_enabled())
3850	return false;
3851	return __set_page_guard(zone, page, order);
3852	}
3853
3854	void __clear_page_guard(struct zone zone, struct* page page, unsigned* int order);
3855	static inline void clear_page_guard(struct zone zone, struct* page *page,
3856	unsigned int order)
3857	{
3858	if (!debug_guardpage_enabled())
3859	return;
3860	__clear_page_guard(zone, page, order);
3861	}
3862
3863	#else /* CONFIG_DEBUG_PAGEALLOC */
3864	static inline void debug_pagealloc_map_pages(struct page page, int* numpages) {}
3865	static inline void debug_pagealloc_unmap_pages(struct page page, int* numpages) {}
3866	static inline unsigned int debug_guardpage_minorder(void) { return `0`; }
3867	static inline bool debug_guardpage_enabled(void) { return false; }
3868	static inline bool page_is_guard(const struct page page) { return* false; }
3869	static inline bool set_page_guard(struct zone zone, struct* page *page,
3870	unsigned int order) { return false; }
3871	static inline void clear_page_guard(struct zone zone, struct* page *page,
3872	unsigned int order) {}
3873	#endif /* CONFIG_DEBUG_PAGEALLOC */
3874
3875	#ifdef __HAVE_ARCH_GATE_AREA
3876	extern struct vm_area_struct get_gate_vma(struct* mm_struct *mm);
3877	extern int in_gate_area_no_mm(unsigned long addr);
3878	extern int in_gate_area(struct mm_struct mm, unsigned* long addr);
3879	#else
3880	static inline struct vm_area_struct get_gate_vma(struct* mm_struct *mm)
3881	{
3882	return NULL;
3883	}
3884	static inline int in_gate_area_no_mm(unsigned long addr) { return `0`; }
3885	static inline int in_gate_area(struct mm_struct mm, unsigned* long addr)
3886	{
3887	return `0`;
3888	}
3889	#endif /* __HAVE_ARCH_GATE_AREA */
3890
3891	bool process_shares_mm(const struct task_struct p, const* struct mm_struct *mm);
3892
3893	void drop_slab(void);
3894
3895	#ifndef CONFIG_MMU
3896	#define randomize_va_space 0
3897	#else
3898	extern int randomize_va_space;
3899	#endif
3900
3901	const char * arch_vma_name(struct vm_area_struct *vma);
3902	#ifdef CONFIG_MMU
3903	void print_vma_addr(char prefix, unsigned* long rip);
3904	#else
3905	static inline void print_vma_addr(char prefix, unsigned* long rip)
3906	{
3907	}
3908	#endif
3909
3910	void sparse_buffer_alloc(unsigned* long size);
3911	unsigned long section_map_size(void);
3912	struct page * __populate_section_memmap(unsigned long pfn,
3913	unsigned long nr_pages, int nid, struct vmem_altmap *altmap,
3914	struct dev_pagemap *pgmap);
3915	pgd_t vmemmap_pgd_populate(unsigned* long addr, int node);
3916	p4d_t vmemmap_p4d_populate(pgd_t pgd, unsigned long addr, int node);
3917	pud_t vmemmap_pud_populate(p4d_t p4d, unsigned long addr, int node);
3918	pmd_t vmemmap_pmd_populate(pud_t pud, unsigned long addr, int node);
3919	pte_t vmemmap_pte_populate(pmd_t pmd, unsigned long addr, int node,
3920	struct vmem_altmap altmap, unsigned* long ptpfn,
3921	unsigned long flags);
3922	void vmemmap_alloc_block(unsigned* long size, int node);
3923	struct vmem_altmap;
3924	void vmemmap_alloc_block_buf(unsigned* long size, int node,
3925	struct vmem_altmap *altmap);
3926	void vmemmap_verify(pte_t , int, unsigned* long, unsigned long);
3927	void vmemmap_set_pmd(pmd_t pmd, void* p, int* node,
3928	unsigned long addr, unsigned long next);
3929	int vmemmap_check_pmd(pmd_t pmd, int* node,
3930	unsigned long addr, unsigned long next);
3931	int vmemmap_populate_basepages(unsigned long start, unsigned long end,
3932	int node, struct vmem_altmap *altmap);
3933	int vmemmap_populate_hugepages(unsigned long start, unsigned long end,
3934	int node, struct vmem_altmap *altmap);
3935	int vmemmap_populate(unsigned long start, unsigned long end, int node,
3936	struct vmem_altmap *altmap);
3937	int vmemmap_populate_hvo(unsigned long start, unsigned long end, int node,
3938	unsigned long headsize);
3939	int vmemmap_undo_hvo(unsigned long start, unsigned long end, int node,
3940	unsigned long headsize);
3941	void vmemmap_wrprotect_hvo(unsigned long start, unsigned long end, int node,
3942	unsigned long headsize);
3943	void vmemmap_populate_print_last(void);
3944	#ifdef CONFIG_MEMORY_HOTPLUG
3945	void vmemmap_free(unsigned long start, unsigned long end,
3946	struct vmem_altmap *altmap);
3947	#endif
3948
3949	#ifdef CONFIG_SPARSEMEM_VMEMMAP
3950	static inline unsigned long vmem_altmap_offset(const struct vmem_altmap *altmap)
3951	{
3952	/ number of pfns from base where pfn_to_page() is valid /
3953	if (altmap)
3954	return altmap->reserve + altmap->free;
3955	return `0`;
3956	}
3957
3958	static inline void vmem_altmap_free(struct vmem_altmap *altmap,
3959	unsigned long nr_pfns)
3960	{
3961	altmap->alloc -= nr_pfns;
3962	}
3963	#else
3964	static inline unsigned long vmem_altmap_offset(const struct vmem_altmap *altmap)
3965	{
3966	return `0`;
3967	}
3968
3969	static inline void vmem_altmap_free(struct vmem_altmap *altmap,
3970	unsigned long nr_pfns)
3971	{
3972	}
3973	#endif
3974
3975	#define VMEMMAP_RESERVE_NR 2
3976	#ifdef CONFIG_ARCH_WANT_OPTIMIZE_DAX_VMEMMAP
3977	static inline bool __vmemmap_can_optimize(struct vmem_altmap *altmap,
3978	struct dev_pagemap *pgmap)
3979	{
3980	unsigned long nr_pages;
3981	unsigned long nr_vmemmap_pages;
3982
3983	if (!pgmap \|\| !is_power_of_2(n: sizeof(struct page)))
3984	return false;
3985
3986	nr_pages = pgmap_vmemmap_nr(pgmap);
3987	nr_vmemmap_pages = ((nr_pages * sizeof(struct page)) >> PAGE_SHIFT);
3988	/*
3989	* For vmemmap optimization with DAX we need minimum 2 vmemmap
3990	* pages. See layout diagram in Documentation/mm/vmemmap_dedup.rst
3991	*/
3992	return !altmap && (nr_vmemmap_pages > VMEMMAP_RESERVE_NR);
3993	}
3994	/*
3995	* If we don't have an architecture override, use the generic rule
3996	*/
3997	#ifndef vmemmap_can_optimize
3998	#define vmemmap_can_optimize __vmemmap_can_optimize
3999	#endif
4000
4001	#else
4002	static inline bool vmemmap_can_optimize(struct vmem_altmap *altmap,
4003	struct dev_pagemap *pgmap)
4004	{
4005	return false;
4006	}
4007	#endif
4008
4009	enum mf_flags {
4010	MF_COUNT_INCREASED = `1` << `0`,
4011	MF_ACTION_REQUIRED = `1` << `1`,
4012	MF_MUST_KILL = `1` << `2`,
4013	MF_SOFT_OFFLINE = `1` << `3`,
4014	MF_UNPOISON = `1` << `4`,
4015	MF_SW_SIMULATED = `1` << `5`,
4016	MF_NO_RETRY = `1` << `6`,
4017	MF_MEM_PRE_REMOVE = `1` << `7`,
4018	};
4019	int mf_dax_kill_procs(struct address_space *mapping, pgoff_t index,
4020	unsigned long count, int mf_flags);
4021	extern int memory_failure(unsigned long pfn, int flags);
4022	extern int unpoison_memory(unsigned long pfn);
4023	extern atomic_long_t num_poisoned_pages __read_mostly;
4024	extern int soft_offline_page(unsigned long pfn, int flags);
4025	#ifdef CONFIG_MEMORY_FAILURE
4026	/*
4027	* Sysfs entries for memory failure handling statistics.
4028	*/
4029	extern const struct attribute_group memory_failure_attr_group;
4030	extern void memory_failure_queue(unsigned long pfn, int flags);
4031	extern int __get_huge_page_for_hwpoison(unsigned long pfn, int flags,
4032	bool *migratable_cleared);
4033	void num_poisoned_pages_inc(unsigned long pfn);
4034	void num_poisoned_pages_sub(unsigned long pfn, long i);
4035	#else
4036	static inline void memory_failure_queue(unsigned long pfn, int flags)
4037	{
4038	}
4039
4040	static inline int __get_huge_page_for_hwpoison(unsigned long pfn, int flags,
4041	bool *migratable_cleared)
4042	{
4043	return `0`;
4044	}
4045
4046	static inline void num_poisoned_pages_inc(unsigned long pfn)
4047	{
4048	}
4049
4050	static inline void num_poisoned_pages_sub(unsigned long pfn, long i)
4051	{
4052	}
4053	#endif
4054
4055	#if defined(CONFIG_MEMORY_FAILURE) && defined(CONFIG_MEMORY_HOTPLUG)
4056	extern void memblk_nr_poison_inc(unsigned long pfn);
4057	extern void memblk_nr_poison_sub(unsigned long pfn, long i);
4058	#else
4059	static inline void memblk_nr_poison_inc(unsigned long pfn)
4060	{
4061	}
4062
4063	static inline void memblk_nr_poison_sub(unsigned long pfn, long i)
4064	{
4065	}
4066	#endif
4067
4068	#ifndef arch_memory_failure
4069	static inline int arch_memory_failure(unsigned long pfn, int flags)
4070	{
4071	return -ENXIO;
4072	}
4073	#endif
4074
4075	#ifndef arch_is_platform_page
4076	static inline bool arch_is_platform_page(u64 paddr)
4077	{
4078	return false;
4079	}
4080	#endif
4081
4082	/*
4083	* Error handlers for various types of pages.
4084	*/
4085	enum mf_result {
4086	MF_IGNORED, / Error: cannot be handled /
4087	MF_FAILED, / Error: handling failed /
4088	MF_DELAYED, / Will be handled later /
4089	MF_RECOVERED, / Successfully recovered /
4090	};
4091
4092	enum mf_action_page_type {
4093	MF_MSG_KERNEL,
4094	MF_MSG_KERNEL_HIGH_ORDER,
4095	MF_MSG_DIFFERENT_COMPOUND,
4096	MF_MSG_HUGE,
4097	MF_MSG_FREE_HUGE,
4098	MF_MSG_GET_HWPOISON,
4099	MF_MSG_UNMAP_FAILED,
4100	MF_MSG_DIRTY_SWAPCACHE,
4101	MF_MSG_CLEAN_SWAPCACHE,
4102	MF_MSG_DIRTY_MLOCKED_LRU,
4103	MF_MSG_CLEAN_MLOCKED_LRU,
4104	MF_MSG_DIRTY_UNEVICTABLE_LRU,
4105	MF_MSG_CLEAN_UNEVICTABLE_LRU,
4106	MF_MSG_DIRTY_LRU,
4107	MF_MSG_CLEAN_LRU,
4108	MF_MSG_TRUNCATED_LRU,
4109	MF_MSG_BUDDY,
4110	MF_MSG_DAX,
4111	MF_MSG_UNSPLIT_THP,
4112	MF_MSG_ALREADY_POISONED,
4113	MF_MSG_UNKNOWN,
4114	};
4115
4116	#if defined(CONFIG_TRANSPARENT_HUGEPAGE) \|\| defined(CONFIG_HUGETLBFS)
4117	void folio_zero_user(struct folio folio, unsigned* long addr_hint);
4118	int copy_user_large_folio(struct folio dst, struct* folio *src,
4119	unsigned long addr_hint,
4120	struct vm_area_struct *vma);
4121	long copy_folio_from_user(struct folio *dst_folio,
4122	const void __user *usr_src,
4123	bool allow_pagefault);
4124
4125	/**
4126	* vma_is_special_huge - Are transhuge page-table entries considered special?
4127	* @vma: Pointer to the struct vm_area_struct to consider
4128	*
4129	* Whether transhuge page-table entries are considered "special" following
4130	* the definition in vm_normal_page().
4131	*
4132	* Return: true if transhuge page-table entries should be considered special,
4133	* false otherwise.
4134	*/
4135	static inline bool vma_is_special_huge(const struct vm_area_struct *vma)
4136	{
4137	return vma_is_dax(vma) \|\| (vma->vm_file &&
4138	(vma->vm_flags & (VM_PFNMAP \| VM_MIXEDMAP)));
4139	}
4140
4141	#endif /* CONFIG_TRANSPARENT_HUGEPAGE \|\| CONFIG_HUGETLBFS */
4142
4143	#if MAX_NUMNODES > 1
4144	void __init setup_nr_node_ids(void);
4145	#else
4146	static inline void setup_nr_node_ids(void) {}
4147	#endif
4148
4149	extern int memcmp_pages(struct page page1, struct* page *page2);
4150
4151	static inline int pages_identical(struct page page1, struct* page *page2)
4152	{
4153	return !memcmp_pages(page1, page2);
4154	}
4155
4156	#ifdef CONFIG_MAPPING_DIRTY_HELPERS
4157	unsigned long clean_record_shared_mapping_range(struct address_space *mapping,
4158	pgoff_t first_index, pgoff_t nr,
4159	pgoff_t bitmap_pgoff,
4160	unsigned long *bitmap,
4161	pgoff_t *start,
4162	pgoff_t *end);
4163
4164	unsigned long wp_shared_mapping_range(struct address_space *mapping,
4165	pgoff_t first_index, pgoff_t nr);
4166	#endif
4167
4168	#ifdef CONFIG_ANON_VMA_NAME
4169	int set_anon_vma_name(unsigned long addr, unsigned long size,
4170	const char __user *uname);
4171	#else
4172	static inline
4173	int set_anon_vma_name(unsigned long addr, unsigned long size,
4174	const char __user *uname)
4175	{
4176	return -EINVAL;
4177	}
4178	#endif
4179
4180	#ifdef CONFIG_UNACCEPTED_MEMORY
4181
4182	bool range_contains_unaccepted_memory(phys_addr_t start, unsigned long size);
4183	void accept_memory(phys_addr_t start, unsigned long size);
4184
4185	#else
4186
4187	static inline bool range_contains_unaccepted_memory(phys_addr_t start,
4188	unsigned long size)
4189	{
4190	return false;
4191	}
4192
4193	static inline void accept_memory(phys_addr_t start, unsigned long size)
4194	{
4195	}
4196
4197	#endif
4198
4199	static inline bool pfn_is_unaccepted_memory(unsigned long pfn)
4200	{
4201	return range_contains_unaccepted_memory(start: pfn << PAGE_SHIFT, PAGE_SIZE);
4202	}
4203
4204	void vma_pgtable_walk_begin(struct vm_area_struct *vma);
4205	void vma_pgtable_walk_end(struct vm_area_struct *vma);
4206
4207	int reserve_mem_find_by_name(const char name, phys_addr_t start, phys_addr_t *size);
4208	int reserve_mem_release_by_name(const char *name);
4209
4210	#ifdef CONFIG_64BIT
4211	int do_mseal(unsigned long start, size_t len_in, unsigned long flags);
4212	#else
4213	static inline int do_mseal(unsigned long start, size_t len_in, unsigned long flags)
4214	{
4215	/ noop on 32 bit /
4216	return `0`;
4217	}
4218	#endif
4219
4220	/*
4221	* user_alloc_needs_zeroing checks if a user folio from page allocator needs to
4222	* be zeroed or not.
4223	*/
4224	static inline bool user_alloc_needs_zeroing(void)
4225	{
4226	/*
4227	* for user folios, arch with cache aliasing requires cache flush and
4228	* arc changes folio->flags to make icache coherent with dcache, so
4229	* always return false to make caller use
4230	* clear_user_page()/clear_user_highpage().
4231	*/
4232	return cpu_dcache_is_aliasing() \|\| cpu_icache_is_aliasing() \|\|
4233	!static_branch_maybe(CONFIG_INIT_ON_ALLOC_DEFAULT_ON,
4234	&init_on_alloc);
4235	}
4236
4237	int arch_get_shadow_stack_status(struct task_struct t, unsigned* long __user *status);
4238	int arch_set_shadow_stack_status(struct task_struct t, unsigned* long status);
4239	int arch_lock_shadow_stack_status(struct task_struct t, unsigned* long status);
4240
4241
4242	/*
4243	* mseal of userspace process's system mappings.
4244	*/
4245	#ifdef CONFIG_MSEAL_SYSTEM_MAPPINGS
4246	#define VM_SEALED_SYSMAP VM_SEALED
4247	#else
4248	#define VM_SEALED_SYSMAP VM_NONE
4249	#endif
4250
4251	/*
4252	* DMA mapping IDs for page_pool
4253	*
4254	* When DMA-mapping a page, page_pool allocates an ID (from an xarray) and
4255	* stashes it in the upper bits of page->pp_magic. We always want to be able to
4256	* unambiguously identify page pool pages (using page_pool_page_is_pp()). Non-PP
4257	* pages can have arbitrary kernel pointers stored in the same field as pp_magic
4258	* (since it overlaps with page->lru.next), so we must ensure that we cannot
4259	* mistake a valid kernel pointer with any of the values we write into this
4260	* field.
4261	*
4262	* On architectures that set POISON_POINTER_DELTA, this is already ensured,
4263	* since this value becomes part of PP_SIGNATURE; meaning we can just use the
4264	* space between the PP_SIGNATURE value (without POISON_POINTER_DELTA), and the
4265	* lowest bits of POISON_POINTER_DELTA. On arches where POISON_POINTER_DELTA is
4266	* 0, we use the lowest bit of PAGE_OFFSET as the boundary if that value is
4267	* known at compile-time.
4268	*
4269	* If the value of PAGE_OFFSET is not known at compile time, or if it is too
4270	* small to leave at least 8 bits available above PP_SIGNATURE, we define the
4271	* number of bits to be 0, which turns off the DMA index tracking altogether
4272	* (see page_pool_register_dma_index()).
4273	*/
4274	#define PP_DMA_INDEX_SHIFT (1 + __fls(PP_SIGNATURE - POISON_POINTER_DELTA))
4275	#if POISON_POINTER_DELTA > 0
4276	/ PP_SIGNATURE includes POISON_POINTER_DELTA, so limit the size of the DMA*
4277	* index to not overlap with that if set
4278	*/
4279	#define PP_DMA_INDEX_BITS MIN(32, __ffs(POISON_POINTER_DELTA) - PP_DMA_INDEX_SHIFT)
4280	#else
4281	/ Use the lowest bit of PAGE_OFFSET if there's at least 8 bits available; see above /
4282	#define PP_DMA_INDEX_MIN_OFFSET (1 << (PP_DMA_INDEX_SHIFT + 8))
4283	#define PP_DMA_INDEX_BITS ((__builtin_constant_p(PAGE_OFFSET) && \
4284	PAGE_OFFSET >= PP_DMA_INDEX_MIN_OFFSET && \
4285	!(PAGE_OFFSET & (PP_DMA_INDEX_MIN_OFFSET - 1))) ? \
4286	MIN(32, __ffs(PAGE_OFFSET) - PP_DMA_INDEX_SHIFT) : 0)
4287
4288	#endif
4289
4290	#define PP_DMA_INDEX_MASK GENMASK(PP_DMA_INDEX_BITS + PP_DMA_INDEX_SHIFT - 1, \
4291	PP_DMA_INDEX_SHIFT)
4292
4293	/ Mask used for checking in page_pool_page_is_pp() below. page->pp_magic is*
4294	* OR'ed with PP_SIGNATURE after the allocation in order to preserve bit 0 for
4295	* the head page of compound page and bit 1 for pfmemalloc page, as well as the
4296	* bits used for the DMA index. page_is_pfmemalloc() is checked in
4297	* __page_pool_put_page() to avoid recycling the pfmemalloc page.
4298	*/
4299	#define PP_MAGIC_MASK ~(PP_DMA_INDEX_MASK \| 0x3UL)
4300
4301	#ifdef CONFIG_PAGE_POOL
4302	static inline bool page_pool_page_is_pp(const struct page *page)
4303	{
4304	return (page->pp_magic & PP_MAGIC_MASK) == PP_SIGNATURE;
4305	}
4306	#else
4307	static inline bool page_pool_page_is_pp(const struct page *page)
4308	{
4309	return false;
4310	}
4311	#endif
4312
4313	#define PAGE_SNAPSHOT_FAITHFUL (1 << 0)
4314	#define PAGE_SNAPSHOT_PG_BUDDY (1 << 1)
4315	#define PAGE_SNAPSHOT_PG_IDLE (1 << 2)
4316
4317	struct page_snapshot {
4318	struct folio folio_snapshot;
4319	struct page page_snapshot;
4320	unsigned long pfn;
4321	unsigned long idx;
4322	unsigned long flags;
4323	};
4324
4325	static inline bool snapshot_page_is_faithful(const struct page_snapshot *ps)
4326	{
4327	return ps->flags & PAGE_SNAPSHOT_FAITHFUL;
4328	}
4329
4330	void snapshot_page(struct page_snapshot ps, const* struct page *page);
4331
4332	#endif /* _LINUX_MM_H */
4333

Browse the source code of Linux/include/linux/mm.h