hugetlb.h source code [Linux/include/linux/hugetlb.h]

1	/ SPDX-License-Identifier: GPL-2.0 /
2	#ifndef _LINUX_HUGETLB_H
3	#define _LINUX_HUGETLB_H
4
5	#include <linux/mm.h>
6	#include <linux/mm_types.h>
7	#include <linux/mmdebug.h>
8	#include <linux/fs.h>
9	#include <linux/hugetlb_inline.h>
10	#include <linux/cgroup.h>
11	#include <linux/page_ref.h>
12	#include <linux/list.h>
13	#include <linux/kref.h>
14	#include <linux/pgtable.h>
15	#include <linux/gfp.h>
16	#include <linux/userfaultfd_k.h>
17	#include <linux/nodemask.h>
18
19	struct ctl_table;
20	struct user_struct;
21	struct mmu_gather;
22	struct node;
23
24	void free_huge_folio(struct folio *folio);
25
26	#ifdef CONFIG_HUGETLB_PAGE
27
28	#include <linux/pagemap.h>
29	#include <linux/shm.h>
30	#include <asm/tlbflush.h>
31
32	/*
33	* For HugeTLB page, there are more metadata to save in the struct page. But
34	* the head struct page cannot meet our needs, so we have to abuse other tail
35	* struct page to store the metadata.
36	*/
37	#define __NR_USED_SUBPAGE 3
38
39	struct hugepage_subpool {
40	spinlock_t lock;
41	long count;
42	long max_hpages; / Maximum huge pages or -1 if no maximum. /
43	long used_hpages; / Used count against maximum, includes /
44	/ both allocated and reserved pages. /
45	struct hstate *hstate;
46	long min_hpages; / Minimum huge pages or -1 if no minimum. /
47	long rsv_hpages; / Pages reserved against global pool to /
48	/ satisfy minimum size. /
49	};
50
51	struct resv_map {
52	struct kref refs;
53	spinlock_t lock;
54	struct list_head regions;
55	long adds_in_progress;
56	struct list_head region_cache;
57	long region_cache_count;
58	struct rw_semaphore rw_sema;
59	#ifdef CONFIG_CGROUP_HUGETLB
60	/*
61	* On private mappings, the counter to uncharge reservations is stored
62	* here. If these fields are 0, then either the mapping is shared, or
63	* cgroup accounting is disabled for this resv_map.
64	*/
65	struct page_counter *reservation_counter;
66	unsigned long pages_per_hpage;
67	struct cgroup_subsys_state *css;
68	#endif
69	};
70
71	/*
72	* Region tracking -- allows tracking of reservations and instantiated pages
73	* across the pages in a mapping.
74	*
75	* The region data structures are embedded into a resv_map and protected
76	* by a resv_map's lock. The set of regions within the resv_map represent
77	* reservations for huge pages, or huge pages that have already been
78	* instantiated within the map. The from and to elements are huge page
79	* indices into the associated mapping. from indicates the starting index
80	* of the region. to represents the first index past the end of the region.
81	*
82	* For example, a file region structure with from == 0 and to == 4 represents
83	* four huge pages in a mapping. It is important to note that the to element
84	* represents the first element past the end of the region. This is used in
85	* arithmetic as 4(to) - 0(from) = 4 huge pages in the region.
86	*
87	* Interval notation of the form [from, to) will be used to indicate that
88	* the endpoint from is inclusive and to is exclusive.
89	*/
90	struct file_region {
91	struct list_head link;
92	long from;
93	long to;
94	#ifdef CONFIG_CGROUP_HUGETLB
95	/*
96	* On shared mappings, each reserved region appears as a struct
97	* file_region in resv_map. These fields hold the info needed to
98	* uncharge each reservation.
99	*/
100	struct page_counter *reservation_counter;
101	struct cgroup_subsys_state *css;
102	#endif
103	};
104
105	struct hugetlb_vma_lock {
106	struct kref refs;
107	struct rw_semaphore rw_sema;
108	struct vm_area_struct *vma;
109	};
110
111	extern struct resv_map resv_map_alloc(void*);
112	void resv_map_release(struct kref *ref);
113
114	extern spinlock_t hugetlb_lock;
115	extern int hugetlb_max_hstate __read_mostly;
116	#define for_each_hstate(h) \
117	for ((h) = hstates; (h) < &hstates[hugetlb_max_hstate]; (h)++)
118
119	struct hugepage_subpool hugepage_new_subpool(struct* hstate h, long* max_hpages,
120	long min_hpages);
121	void hugepage_put_subpool(struct hugepage_subpool *spool);
122
123	void hugetlb_dup_vma_private(struct vm_area_struct *vma);
124	void clear_vma_resv_huge_pages(struct vm_area_struct *vma);
125	int move_hugetlb_page_tables(struct vm_area_struct *vma,
126	struct vm_area_struct *new_vma,
127	unsigned long old_addr, unsigned long new_addr,
128	unsigned long len);
129	int copy_hugetlb_page_range(struct mm_struct , struct* mm_struct *,
130	struct vm_area_struct , struct* vm_area_struct *);
131	void unmap_hugepage_range(struct vm_area_struct *,
132	unsigned long start, unsigned long end,
133	struct folio *, zap_flags_t);
134	void __unmap_hugepage_range(struct mmu_gather *tlb,
135	struct vm_area_struct *vma,
136	unsigned long start, unsigned long end,
137	struct folio *, zap_flags_t zap_flags);
138	void hugetlb_report_meminfo(struct seq_file *);
139	int hugetlb_report_node_meminfo(char buf, int* len, int nid);
140	void hugetlb_show_meminfo_node(int nid);
141	unsigned long hugetlb_total_pages(void);
142	vm_fault_t hugetlb_fault(struct mm_struct mm, struct* vm_area_struct *vma,
143	unsigned long address, unsigned int flags);
144	#ifdef CONFIG_USERFAULTFD
145	int hugetlb_mfill_atomic_pte(pte_t *dst_pte,
146	struct vm_area_struct *dst_vma,
147	unsigned long dst_addr,
148	unsigned long src_addr,
149	uffd_flags_t flags,
150	struct folio **foliop);
151	#endif /* CONFIG_USERFAULTFD */
152	long hugetlb_reserve_pages(struct inode inode, long* from, long to,
153	struct vm_area_struct *vma,
154	vm_flags_t vm_flags);
155	long hugetlb_unreserve_pages(struct inode inode, long* start, long end,
156	long freed);
157	bool folio_isolate_hugetlb(struct folio folio, struct* list_head *list);
158	int get_hwpoison_hugetlb_folio(struct folio folio, bool hugetlb, bool unpoison);
159	int get_huge_page_for_hwpoison(unsigned long pfn, int flags,
160	bool *migratable_cleared);
161	void folio_putback_hugetlb(struct folio *folio);
162	void move_hugetlb_state(struct folio old_folio, struct* folio new_folio, int* reason);
163	void hugetlb_fix_reserve_counts(struct inode *inode);
164	extern struct mutex *hugetlb_fault_mutex_table;
165	u32 hugetlb_fault_mutex_hash(struct address_space *mapping, pgoff_t idx);
166
167	pte_t huge_pmd_share(struct* mm_struct mm, struct* vm_area_struct *vma,
168	unsigned long addr, pud_t *pud);
169	bool hugetlbfs_pagecache_present(struct hstate *h,
170	struct vm_area_struct *vma,
171	unsigned long address);
172
173	struct address_space hugetlb_folio_mapping_lock_write(struct* folio *folio);
174
175	extern int sysctl_hugetlb_shm_group;
176	extern struct list_head huge_boot_pages[MAX_NUMNODES];
177
178	void hugetlb_bootmem_alloc(void);
179	bool hugetlb_bootmem_allocated(void);
180	extern nodemask_t hugetlb_bootmem_nodes;
181	void hugetlb_bootmem_set_nodes(void);
182
183	/ arch callbacks /
184
185	#ifndef CONFIG_HIGHPTE
186	/*
187	* pte_offset_huge() and pte_alloc_huge() are helpers for those architectures
188	* which may go down to the lowest PTE level in their huge_pte_offset() and
189	* huge_pte_alloc(): to avoid reliance on pte_offset_map() without pte_unmap().
190	*/
191	static inline pte_t pte_offset_huge(pmd_t pmd, unsigned long address)
192	{
193	return pte_offset_kernel(pmd, address);
194	}
195	static inline pte_t pte_alloc_huge(struct* mm_struct mm, pmd_t pmd,
196	unsigned long address)
197	{
198	return pte_alloc(mm, pmd) ? NULL : pte_offset_huge(pmd, address);
199	}
200	#endif
201
202	pte_t huge_pte_alloc(struct* mm_struct mm, struct* vm_area_struct *vma,
203	unsigned long addr, unsigned long sz);
204	/*
205	* huge_pte_offset(): Walk the hugetlb pgtable until the last level PTE.
206	* Returns the pte_t* if found, or NULL if the address is not mapped.
207	*
208	* IMPORTANT: we should normally not directly call this function, instead
209	* this is only a common interface to implement arch-specific
210	* walker. Please use hugetlb_walk() instead, because that will attempt to
211	* verify the locking for you.
212	*
213	* Since this function will walk all the pgtable pages (including not only
214	* high-level pgtable page, but also PUD entry that can be unshared
215	* concurrently for VM_SHARED), the caller of this function should be
216	* responsible of its thread safety. One can follow this rule:
217	*
218	* (1) For private mappings: pmd unsharing is not possible, so holding the
219	* mmap_lock for either read or write is sufficient. Most callers
220	* already hold the mmap_lock, so normally, no special action is
221	* required.
222	*
223	* (2) For shared mappings: pmd unsharing is possible (so the PUD-ranged
224	* pgtable page can go away from under us! It can be done by a pmd
225	* unshare with a follow up munmap() on the other process), then we
226	* need either:
227	*
228	* (2.1) hugetlb vma lock read or write held, to make sure pmd unshare
229	* won't happen upon the range (it also makes sure the pte_t we
230	* read is the right and stable one), or,
231	*
232	* (2.2) hugetlb mapping i_mmap_rwsem lock held read or write, to make
233	* sure even if unshare happened the racy unmap() will wait until
234	* i_mmap_rwsem is released.
235	*
236	* Option (2.1) is the safest, which guarantees pte stability from pmd
237	* sharing pov, until the vma lock released. Option (2.2) doesn't protect
238	* a concurrent pmd unshare, but it makes sure the pgtable page is safe to
239	* access.
240	*/
241	pte_t huge_pte_offset(struct* mm_struct *mm,
242	unsigned long addr, unsigned long sz);
243	unsigned long hugetlb_mask_last_page(struct hstate *h);
244	int huge_pmd_unshare(struct mm_struct mm, struct* vm_area_struct *vma,
245	unsigned long addr, pte_t *ptep);
246	void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma,
247	unsigned long start, unsigned* long *end);
248
249	extern void __hugetlb_zap_begin(struct vm_area_struct *vma,
250	unsigned long begin, unsigned* long *end);
251	extern void __hugetlb_zap_end(struct vm_area_struct *vma,
252	struct zap_details *details);
253
254	static inline void hugetlb_zap_begin(struct vm_area_struct *vma,
255	unsigned long start, unsigned* long *end)
256	{
257	if (is_vm_hugetlb_page(vma))
258	__hugetlb_zap_begin(vma, begin: start, end);
259	}
260
261	static inline void hugetlb_zap_end(struct vm_area_struct *vma,
262	struct zap_details *details)
263	{
264	if (is_vm_hugetlb_page(vma))
265	__hugetlb_zap_end(vma, details);
266	}
267
268	void hugetlb_vma_lock_read(struct vm_area_struct *vma);
269	void hugetlb_vma_unlock_read(struct vm_area_struct *vma);
270	void hugetlb_vma_lock_write(struct vm_area_struct *vma);
271	void hugetlb_vma_unlock_write(struct vm_area_struct *vma);
272	int hugetlb_vma_trylock_write(struct vm_area_struct *vma);
273	void hugetlb_vma_assert_locked(struct vm_area_struct *vma);
274	void hugetlb_vma_lock_release(struct kref *kref);
275	long hugetlb_change_protection(struct vm_area_struct *vma,
276	unsigned long address, unsigned long end, pgprot_t newprot,
277	unsigned long cp_flags);
278	bool is_hugetlb_entry_migration(pte_t pte);
279	bool is_hugetlb_entry_hwpoisoned(pte_t pte);
280	void hugetlb_unshare_all_pmds(struct vm_area_struct *vma);
281	void fixup_hugetlb_reservations(struct vm_area_struct *vma);
282	void hugetlb_split(struct vm_area_struct vma, unsigned* long addr);
283
284	#else /* !CONFIG_HUGETLB_PAGE */
285
286	static inline void hugetlb_dup_vma_private(struct vm_area_struct *vma)
287	{
288	}
289
290	static inline void clear_vma_resv_huge_pages(struct vm_area_struct *vma)
291	{
292	}
293
294	static inline unsigned long hugetlb_total_pages(void)
295	{
296	return `0`;
297	}
298
299	static inline struct address_space *hugetlb_folio_mapping_lock_write(
300	struct folio *folio)
301	{
302	return NULL;
303	}
304
305	static inline int huge_pmd_unshare(struct mm_struct *mm,
306	struct vm_area_struct *vma,
307	unsigned long addr, pte_t *ptep)
308	{
309	return `0`;
310	}
311
312	static inline void adjust_range_if_pmd_sharing_possible(
313	struct vm_area_struct *vma,
314	unsigned long start, unsigned* long *end)
315	{
316	}
317
318	static inline void hugetlb_zap_begin(
319	struct vm_area_struct *vma,
320	unsigned long start, unsigned* long *end)
321	{
322	}
323
324	static inline void hugetlb_zap_end(
325	struct vm_area_struct *vma,
326	struct zap_details *details)
327	{
328	}
329
330	static inline int copy_hugetlb_page_range(struct mm_struct *dst,
331	struct mm_struct *src,
332	struct vm_area_struct *dst_vma,
333	struct vm_area_struct *src_vma)
334	{
335	BUG();
336	return `0`;
337	}
338
339	static inline int move_hugetlb_page_tables(struct vm_area_struct *vma,
340	struct vm_area_struct *new_vma,
341	unsigned long old_addr,
342	unsigned long new_addr,
343	unsigned long len)
344	{
345	BUG();
346	return `0`;
347	}
348
349	static inline void hugetlb_report_meminfo(struct seq_file *m)
350	{
351	}
352
353	static inline int hugetlb_report_node_meminfo(char buf, int* len, int nid)
354	{
355	return `0`;
356	}
357
358	static inline void hugetlb_show_meminfo_node(int nid)
359	{
360	}
361
362	static inline void hugetlb_vma_lock_read(struct vm_area_struct *vma)
363	{
364	}
365
366	static inline void hugetlb_vma_unlock_read(struct vm_area_struct *vma)
367	{
368	}
369
370	static inline void hugetlb_vma_lock_write(struct vm_area_struct *vma)
371	{
372	}
373
374	static inline void hugetlb_vma_unlock_write(struct vm_area_struct *vma)
375	{
376	}
377
378	static inline int hugetlb_vma_trylock_write(struct vm_area_struct *vma)
379	{
380	return `1`;
381	}
382
383	static inline void hugetlb_vma_assert_locked(struct vm_area_struct *vma)
384	{
385	}
386
387	static inline int is_hugepage_only_range(struct mm_struct *mm,
388	unsigned long addr, unsigned long len)
389	{
390	return `0`;
391	}
392
393	#ifdef CONFIG_USERFAULTFD
394	static inline int hugetlb_mfill_atomic_pte(pte_t *dst_pte,
395	struct vm_area_struct *dst_vma,
396	unsigned long dst_addr,
397	unsigned long src_addr,
398	uffd_flags_t flags,
399	struct folio **foliop)
400	{
401	BUG();
402	return `0`;
403	}
404	#endif /* CONFIG_USERFAULTFD */
405
406	static inline pte_t huge_pte_offset(struct* mm_struct mm, unsigned* long addr,
407	unsigned long sz)
408	{
409	return NULL;
410	}
411
412	static inline bool folio_isolate_hugetlb(struct folio folio, struct* list_head *list)
413	{
414	return false;
415	}
416
417	static inline int get_hwpoison_hugetlb_folio(struct folio folio, bool hugetlb, bool unpoison)
418	{
419	return `0`;
420	}
421
422	static inline int get_huge_page_for_hwpoison(unsigned long pfn, int flags,
423	bool *migratable_cleared)
424	{
425	return `0`;
426	}
427
428	static inline void folio_putback_hugetlb(struct folio *folio)
429	{
430	}
431
432	static inline void move_hugetlb_state(struct folio *old_folio,
433	struct folio new_folio, int* reason)
434	{
435	}
436
437	static inline long hugetlb_change_protection(
438	struct vm_area_struct vma, unsigned* long address,
439	unsigned long end, pgprot_t newprot,
440	unsigned long cp_flags)
441	{
442	return `0`;
443	}
444
445	static inline void __unmap_hugepage_range(struct mmu_gather *tlb,
446	struct vm_area_struct vma, unsigned* long start,
447	unsigned long end, struct folio *folio,
448	zap_flags_t zap_flags)
449	{
450	BUG();
451	}
452
453	static inline vm_fault_t hugetlb_fault(struct mm_struct *mm,
454	struct vm_area_struct vma, unsigned* long address,
455	unsigned int flags)
456	{
457	BUG();
458	return `0`;
459	}
460
461	static inline void hugetlb_unshare_all_pmds(struct vm_area_struct *vma) { }
462
463	static inline void fixup_hugetlb_reservations(struct vm_area_struct *vma)
464	{
465	}
466
467	static inline void hugetlb_split(struct vm_area_struct vma, unsigned* long addr) {}
468
469	#endif /* !CONFIG_HUGETLB_PAGE */
470
471	#ifndef pgd_write
472	static inline int pgd_write(pgd_t pgd)
473	{
474	BUG();
475	return `0`;
476	}
477	#endif
478
479	#define HUGETLB_ANON_FILE "anon_hugepage"
480
481	enum {
482	/*
483	* The file will be used as an shm file so shmfs accounting rules
484	* apply
485	*/
486	HUGETLB_SHMFS_INODE = `1`,
487	/*
488	* The file is being created on the internal vfs mount and shmfs
489	* accounting rules do not apply
490	*/
491	HUGETLB_ANONHUGE_INODE = `2`,
492	};
493
494	#ifdef CONFIG_HUGETLBFS
495	struct hugetlbfs_sb_info {
496	long max_inodes; / inodes allowed /
497	long free_inodes; / inodes free /
498	spinlock_t stat_lock;
499	struct hstate *hstate;
500	struct hugepage_subpool *spool;
501	kuid_t uid;
502	kgid_t gid;
503	umode_t mode;
504	};
505
506	static inline struct hugetlbfs_sb_info HUGETLBFS_SB(struct* super_block *sb)
507	{
508	return sb->s_fs_info;
509	}
510
511	struct hugetlbfs_inode_info {
512	struct inode vfs_inode;
513	unsigned int seals;
514	};
515
516	static inline struct hugetlbfs_inode_info HUGETLBFS_I(struct* inode *inode)
517	{
518	return container_of(inode, struct hugetlbfs_inode_info, vfs_inode);
519	}
520
521	extern const struct vm_operations_struct hugetlb_vm_ops;
522	struct file hugetlb_file_setup(const* char *name, size_t size, vm_flags_t acct,
523	int creat_flags, int page_size_log);
524
525	static inline bool is_file_hugepages(const struct file *file)
526	{
527	return file->f_op->fop_flags & FOP_HUGE_PAGES;
528	}
529
530	static inline struct hstate hstate_inode(struct* inode *i)
531	{
532	return HUGETLBFS_SB(sb: i->i_sb)->hstate;
533	}
534	#else /* !CONFIG_HUGETLBFS */
535
536	#define is_file_hugepages(file) false
537	static inline struct file *
538	hugetlb_file_setup(const char *name, size_t size, vm_flags_t acctflag,
539	int creat_flags, int page_size_log)
540	{
541	return ERR_PTR(-ENOSYS);
542	}
543
544	static inline struct hstate hstate_inode(struct* inode *i)
545	{
546	return NULL;
547	}
548	#endif /* !CONFIG_HUGETLBFS */
549
550	unsigned long
551	hugetlb_get_unmapped_area(struct file file, unsigned* long addr,
552	unsigned long len, unsigned long pgoff,
553	unsigned long flags);
554
555	/*
556	* huegtlb page specific state flags. These flags are located in page.private
557	* of the hugetlb head page. Functions created via the below macros should be
558	* used to manipulate these flags.
559	*
560	* HPG_restore_reserve - Set when a hugetlb page consumes a reservation at
561	* allocation time. Cleared when page is fully instantiated. Free
562	* routine checks flag to restore a reservation on error paths.
563	* Synchronization: Examined or modified by code that knows it has
564	* the only reference to page. i.e. After allocation but before use
565	* or when the page is being freed.
566	* HPG_migratable - Set after a newly allocated page is added to the page
567	* cache and/or page tables. Indicates the page is a candidate for
568	* migration.
569	* Synchronization: Initially set after new page allocation with no
570	* locking. When examined and modified during migration processing
571	* (isolate, migrate, putback) the hugetlb_lock is held.
572	* HPG_temporary - Set on a page that is temporarily allocated from the buddy
573	* allocator. Typically used for migration target pages when no pages
574	* are available in the pool. The hugetlb free page path will
575	* immediately free pages with this flag set to the buddy allocator.
576	* Synchronization: Can be set after huge page allocation from buddy when
577	* code knows it has only reference. All other examinations and
578	* modifications require hugetlb_lock.
579	* HPG_freed - Set when page is on the free lists.
580	* Synchronization: hugetlb_lock held for examination and modification.
581	* HPG_vmemmap_optimized - Set when the vmemmap pages of the page are freed.
582	* HPG_raw_hwp_unreliable - Set when the hugetlb page has a hwpoison sub-page
583	* that is not tracked by raw_hwp_page list.
584	*/
585	enum hugetlb_page_flags {
586	HPG_restore_reserve = `0`,
587	HPG_migratable,
588	HPG_temporary,
589	HPG_freed,
590	HPG_vmemmap_optimized,
591	HPG_raw_hwp_unreliable,
592	HPG_cma,
593	__NR_HPAGEFLAGS,
594	};
595
596	/*
597	* Macros to create test, set and clear function definitions for
598	* hugetlb specific page flags.
599	*/
600	#ifdef CONFIG_HUGETLB_PAGE
601	#define TESTHPAGEFLAG(uname, flname) \
602	static __always_inline \
603	bool folio_test_hugetlb_##flname(struct folio *folio) \
604	{ void *private = &folio->private; \
605	return test_bit(HPG_##flname, private); \
606	}
607
608	#define SETHPAGEFLAG(uname, flname) \
609	static __always_inline \
610	void folio_set_hugetlb_##flname(struct folio *folio) \
611	{ void *private = &folio->private; \
612	set_bit(HPG_##flname, private); \
613	}
614
615	#define CLEARHPAGEFLAG(uname, flname) \
616	static __always_inline \
617	void folio_clear_hugetlb_##flname(struct folio *folio) \
618	{ void *private = &folio->private; \
619	clear_bit(HPG_##flname, private); \
620	}
621	#else
622	#define TESTHPAGEFLAG(uname, flname) \
623	static inline bool \
624	folio_test_hugetlb_##flname(struct folio *folio) \
625	{ return 0; }
626
627	#define SETHPAGEFLAG(uname, flname) \
628	static inline void \
629	folio_set_hugetlb_##flname(struct folio *folio) \
630	{ }
631
632	#define CLEARHPAGEFLAG(uname, flname) \
633	static inline void \
634	folio_clear_hugetlb_##flname(struct folio *folio) \
635	{ }
636	#endif
637
638	#define HPAGEFLAG(uname, flname) \
639	TESTHPAGEFLAG(uname, flname) \
640	SETHPAGEFLAG(uname, flname) \
641	CLEARHPAGEFLAG(uname, flname) \
642
643	/*
644	* Create functions associated with hugetlb page flags
645	*/
646	HPAGEFLAG(RestoreReserve, restore_reserve)
647	HPAGEFLAG(Migratable, migratable)
648	HPAGEFLAG(Temporary, temporary)
649	HPAGEFLAG(Freed, freed)
650	HPAGEFLAG(VmemmapOptimized, vmemmap_optimized)
651	HPAGEFLAG(RawHwpUnreliable, raw_hwp_unreliable)
652	HPAGEFLAG(Cma, cma)
653
654	#ifdef CONFIG_HUGETLB_PAGE
655
656	#define HSTATE_NAME_LEN 32
657	/ Defines one hugetlb page size /
658	struct hstate {
659	struct mutex resize_lock;
660	struct lock_class_key resize_key;
661	int next_nid_to_alloc;
662	int next_nid_to_free;
663	unsigned int order;
664	unsigned int demote_order;
665	unsigned long mask;
666	unsigned long max_huge_pages;
667	unsigned long nr_huge_pages;
668	unsigned long free_huge_pages;
669	unsigned long resv_huge_pages;
670	unsigned long surplus_huge_pages;
671	unsigned long nr_overcommit_huge_pages;
672	struct list_head hugepage_activelist;
673	struct list_head hugepage_freelists[MAX_NUMNODES];
674	unsigned int max_huge_pages_node[MAX_NUMNODES];
675	unsigned int nr_huge_pages_node[MAX_NUMNODES];
676	unsigned int free_huge_pages_node[MAX_NUMNODES];
677	unsigned int surplus_huge_pages_node[MAX_NUMNODES];
678	char name[HSTATE_NAME_LEN];
679	};
680
681	struct cma;
682
683	struct huge_bootmem_page {
684	struct list_head list;
685	struct hstate *hstate;
686	unsigned long flags;
687	struct cma *cma;
688	};
689
690	#define HUGE_BOOTMEM_HVO 0x0001
691	#define HUGE_BOOTMEM_ZONES_VALID 0x0002
692	#define HUGE_BOOTMEM_CMA 0x0004
693
694	bool hugetlb_bootmem_page_zones_valid(int nid, struct huge_bootmem_page *m);
695
696	int isolate_or_dissolve_huge_folio(struct folio folio, struct* list_head *list);
697	int replace_free_hugepage_folios(unsigned long start_pfn, unsigned long end_pfn);
698	void wait_for_freed_hugetlb_folios(void);
699	struct folio alloc_hugetlb_folio(struct* vm_area_struct *vma,
700	unsigned long addr, bool cow_from_owner);
701	struct folio alloc_hugetlb_folio_nodemask(struct* hstate h, int* preferred_nid,
702	nodemask_t *nmask, gfp_t gfp_mask,
703	bool allow_alloc_fallback);
704	struct folio alloc_hugetlb_folio_reserve(struct* hstate h, int* preferred_nid,
705	nodemask_t *nmask, gfp_t gfp_mask);
706
707	int hugetlb_add_to_page_cache(struct folio folio, struct* address_space *mapping,
708	pgoff_t idx);
709	void restore_reserve_on_error(struct hstate h, struct* vm_area_struct *vma,
710	unsigned long address, struct folio *folio);
711
712	/ arch callback /
713	int __init __alloc_bootmem_huge_page(struct hstate h, int* nid);
714	int __init alloc_bootmem_huge_page(struct hstate h, int* nid);
715	bool __init hugetlb_node_alloc_supported(void);
716
717	void __init hugetlb_add_hstate(unsigned order);
718	bool __init arch_hugetlb_valid_size(unsigned long size);
719	struct hstate size_to_hstate(unsigned* long size);
720
721	#ifndef HUGE_MAX_HSTATE
722	#define HUGE_MAX_HSTATE 1
723	#endif
724
725	extern struct hstate hstates[HUGE_MAX_HSTATE];
726	extern unsigned int default_hstate_idx;
727
728	#define default_hstate (hstates[default_hstate_idx])
729
730	static inline struct hugepage_subpool subpool_inode(struct* inode *inode)
731	{
732	return HUGETLBFS_SB(sb: inode->i_sb)->spool;
733	}
734
735	static inline struct hugepage_subpool hugetlb_folio_subpool(struct* folio *folio)
736	{
737	return folio->_hugetlb_subpool;
738	}
739
740	static inline void hugetlb_set_folio_subpool(struct folio *folio,
741	struct hugepage_subpool *subpool)
742	{
743	folio->_hugetlb_subpool = subpool;
744	}
745
746	static inline struct hstate hstate_file(struct* file *f)
747	{
748	return hstate_inode(i: file_inode(f));
749	}
750
751	static inline struct hstate hstate_sizelog(int* page_size_log)
752	{
753	if (!page_size_log)
754	return &default_hstate;
755
756	if (page_size_log < BITS_PER_LONG)
757	return size_to_hstate(size: `1UL` << page_size_log);
758
759	return NULL;
760	}
761
762	static inline struct hstate hstate_vma(struct* vm_area_struct *vma)
763	{
764	return hstate_file(f: vma->vm_file);
765	}
766
767	static inline unsigned long huge_page_size(const struct hstate *h)
768	{
769	return (unsigned long)PAGE_SIZE << h->order;
770	}
771
772	extern unsigned long vma_kernel_pagesize(struct vm_area_struct *vma);
773
774	extern unsigned long vma_mmu_pagesize(struct vm_area_struct *vma);
775
776	static inline unsigned long huge_page_mask(struct hstate *h)
777	{
778	return h->mask;
779	}
780
781	static inline unsigned int huge_page_order(struct hstate *h)
782	{
783	return h->order;
784	}
785
786	static inline unsigned huge_page_shift(struct hstate *h)
787	{
788	return h->order + PAGE_SHIFT;
789	}
790
791	static inline bool order_is_gigantic(unsigned int order)
792	{
793	return order > MAX_PAGE_ORDER;
794	}
795
796	static inline bool hstate_is_gigantic(struct hstate *h)
797	{
798	return order_is_gigantic(order: huge_page_order(h));
799	}
800
801	static inline unsigned int pages_per_huge_page(const struct hstate *h)
802	{
803	return `1` << h->order;
804	}
805
806	static inline unsigned int blocks_per_huge_page(struct hstate *h)
807	{
808	return huge_page_size(h) / `512`;
809	}
810
811	static inline struct folio filemap_lock_hugetlb_folio(struct* hstate *h,
812	struct address_space *mapping, pgoff_t idx)
813	{
814	return filemap_lock_folio(mapping, index: idx << huge_page_order(h));
815	}
816
817	#include <asm/hugetlb.h>
818
819	#ifndef is_hugepage_only_range
820	static inline int is_hugepage_only_range(struct mm_struct *mm,
821	unsigned long addr, unsigned long len)
822	{
823	return `0`;
824	}
825	#define is_hugepage_only_range is_hugepage_only_range
826	#endif
827
828	#ifndef arch_clear_hugetlb_flags
829	static inline void arch_clear_hugetlb_flags(struct folio *folio) { }
830	#define arch_clear_hugetlb_flags arch_clear_hugetlb_flags
831	#endif
832
833	#ifndef arch_make_huge_pte
834	static inline pte_t arch_make_huge_pte(pte_t entry, unsigned int shift,
835	vm_flags_t flags)
836	{
837	return pte_mkhuge(pte: entry);
838	}
839	#endif
840
841	#ifndef arch_has_huge_bootmem_alloc
842	/*
843	* Some architectures do their own bootmem allocation, so they can't use
844	* early CMA allocation.
845	*/
846	static inline bool arch_has_huge_bootmem_alloc(void)
847	{
848	return false;
849	}
850	#endif
851
852	static inline struct hstate folio_hstate(struct* folio *folio)
853	{
854	VM_BUG_ON_FOLIO(!folio_test_hugetlb(folio), folio);
855	return size_to_hstate(size: folio_size(folio));
856	}
857
858	static inline unsigned hstate_index_to_shift(unsigned index)
859	{
860	return hstates[index].order + PAGE_SHIFT;
861	}
862
863	static inline int hstate_index(struct hstate *h)
864	{
865	return h - hstates;
866	}
867
868	int dissolve_free_hugetlb_folio(struct folio *folio);
869	int dissolve_free_hugetlb_folios(unsigned long start_pfn,
870	unsigned long end_pfn);
871
872	#ifdef CONFIG_MEMORY_FAILURE
873	extern void folio_clear_hugetlb_hwpoison(struct folio *folio);
874	#else
875	static inline void folio_clear_hugetlb_hwpoison(struct folio *folio)
876	{
877	}
878	#endif
879
880	#ifdef CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION
881	#ifndef arch_hugetlb_migration_supported
882	static inline bool arch_hugetlb_migration_supported(struct hstate *h)
883	{
884	if ((huge_page_shift(h) == PMD_SHIFT) \|\|
885	(huge_page_shift(h) == PUD_SHIFT) \|\|
886	(huge_page_shift(h) == PGDIR_SHIFT))
887	return true;
888	else
889	return false;
890	}
891	#endif
892	#else
893	static inline bool arch_hugetlb_migration_supported(struct hstate *h)
894	{
895	return false;
896	}
897	#endif
898
899	static inline bool hugepage_migration_supported(struct hstate *h)
900	{
901	return arch_hugetlb_migration_supported(h);
902	}
903
904	/*
905	* Movability check is different as compared to migration check.
906	* It determines whether or not a huge page should be placed on
907	* movable zone or not. Movability of any huge page should be
908	* required only if huge page size is supported for migration.
909	* There won't be any reason for the huge page to be movable if
910	* it is not migratable to start with. Also the size of the huge
911	* page should be large enough to be placed under a movable zone
912	* and still feasible enough to be migratable. Just the presence
913	* in movable zone does not make the migration feasible.
914	*
915	* So even though large huge page sizes like the gigantic ones
916	* are migratable they should not be movable because its not
917	* feasible to migrate them from movable zone.
918	*/
919	static inline bool hugepage_movable_supported(struct hstate *h)
920	{
921	if (!hugepage_migration_supported(h))
922	return false;
923
924	if (hstate_is_gigantic(h))
925	return false;
926	return true;
927	}
928
929	/ Movability of hugepages depends on migration support. /
930	static inline gfp_t htlb_alloc_mask(struct hstate *h)
931	{
932	gfp_t gfp = __GFP_COMP \| __GFP_NOWARN;
933
934	gfp \|= hugepage_movable_supported(h) ? GFP_HIGHUSER_MOVABLE : GFP_HIGHUSER;
935
936	return gfp;
937	}
938
939	static inline gfp_t htlb_modify_alloc_mask(struct hstate *h, gfp_t gfp_mask)
940	{
941	gfp_t modified_mask = htlb_alloc_mask(h);
942
943	/ Some callers might want to enforce node /
944	modified_mask \|= (gfp_mask & __GFP_THISNODE);
945
946	modified_mask \|= (gfp_mask & __GFP_NOWARN);
947
948	return modified_mask;
949	}
950
951	static inline bool htlb_allow_alloc_fallback(int reason)
952	{
953	bool allowed_fallback = false;
954
955	/*
956	* Note: the memory offline, memory failure and migration syscalls will
957	* be allowed to fallback to other nodes due to lack of a better chioce,
958	* that might break the per-node hugetlb pool. While other cases will
959	* set the __GFP_THISNODE to avoid breaking the per-node hugetlb pool.
960	*/
961	switch (reason) {
962	case MR_MEMORY_HOTPLUG:
963	case MR_MEMORY_FAILURE:
964	case MR_SYSCALL:
965	case MR_MEMPOLICY_MBIND:
966	allowed_fallback = true;
967	break;
968	default:
969	break;
970	}
971
972	return allowed_fallback;
973	}
974
975	static inline spinlock_t huge_pte_lockptr(struct* hstate *h,
976	struct mm_struct mm, pte_t pte)
977	{
978	const unsigned long size = huge_page_size(h);
979
980	VM_WARN_ON(size == PAGE_SIZE);
981
982	/*
983	* hugetlb must use the exact same PT locks as core-mm page table
984	* walkers would. When modifying a PTE table, hugetlb must take the
985	* PTE PT lock, when modifying a PMD table, hugetlb must take the PMD
986	* PT lock etc.
987	*
988	* The expectation is that any hugetlb folio smaller than a PMD is
989	* always mapped into a single PTE table and that any hugetlb folio
990	* smaller than a PUD (but at least as big as a PMD) is always mapped
991	* into a single PMD table.
992	*
993	* If that does not hold for an architecture, then that architecture
994	* must disable split PT locks such that all *_lockptr() functions
995	* will give us the same result: the per-MM PT lock.
996	*
997	* Note that with e.g., CONFIG_PGTABLE_LEVELS=2 where
998	* PGDIR_SIZE==P4D_SIZE==PUD_SIZE==PMD_SIZE, we'd use pud_lockptr()
999	* and core-mm would use pmd_lockptr(). However, in such configurations
1000	* split PMD locks are disabled -- they don't make sense on a single
1001	* PGDIR page table -- and the end result is the same.
1002	*/
1003	if (size >= PUD_SIZE)
1004	return pud_lockptr(mm, pud: (pud_t *) pte);
1005	else if (size >= PMD_SIZE \|\| IS_ENABLED(CONFIG_HIGHPTE))
1006	return pmd_lockptr(mm, pmd: (pmd_t *) pte);
1007	/ pte_alloc_huge() only applies with !CONFIG_HIGHPTE /
1008	return ptep_lockptr(mm, pte);
1009	}
1010
1011	#ifndef hugepages_supported
1012	/*
1013	* Some platform decide whether they support huge pages at boot
1014	* time. Some of them, such as powerpc, set HPAGE_SHIFT to 0
1015	* when there is no such support
1016	*/
1017	#define hugepages_supported() (HPAGE_SHIFT != 0)
1018	#endif
1019
1020	void hugetlb_report_usage(struct seq_file m, struct* mm_struct *mm);
1021
1022	static inline void hugetlb_count_init(struct mm_struct *mm)
1023	{
1024	atomic_long_set(v: &mm->hugetlb_usage, i: `0`);
1025	}
1026
1027	static inline void hugetlb_count_add(long l, struct mm_struct *mm)
1028	{
1029	atomic_long_add(i: l, v: &mm->hugetlb_usage);
1030	}
1031
1032	static inline void hugetlb_count_sub(long l, struct mm_struct *mm)
1033	{
1034	atomic_long_sub(i: l, v: &mm->hugetlb_usage);
1035	}
1036
1037	#ifndef huge_ptep_modify_prot_start
1038	#define huge_ptep_modify_prot_start huge_ptep_modify_prot_start
1039	static inline pte_t huge_ptep_modify_prot_start(struct vm_area_struct *vma,
1040	unsigned long addr, pte_t *ptep)
1041	{
1042	unsigned long psize = huge_page_size(h: hstate_vma(vma));
1043
1044	return huge_ptep_get_and_clear(mm: vma->vm_mm, addr, ptep, sz: psize);
1045	}
1046	#endif
1047
1048	#ifndef huge_ptep_modify_prot_commit
1049	#define huge_ptep_modify_prot_commit huge_ptep_modify_prot_commit
1050	static inline void huge_ptep_modify_prot_commit(struct vm_area_struct *vma,
1051	unsigned long addr, pte_t *ptep,
1052	pte_t old_pte, pte_t pte)
1053	{
1054	unsigned long psize = huge_page_size(h: hstate_vma(vma));
1055
1056	set_huge_pte_at(mm: vma->vm_mm, addr, ptep, pte, sz: psize);
1057	}
1058	#endif
1059
1060	#ifdef CONFIG_NUMA
1061	void hugetlb_register_node(struct node *node);
1062	void hugetlb_unregister_node(struct node *node);
1063	#endif
1064
1065	/*
1066	* Check if a given raw @page in a hugepage is HWPOISON.
1067	*/
1068	bool is_raw_hwpoison_page_in_hugepage(struct page *page);
1069
1070	static inline unsigned long huge_page_mask_align(struct file *file)
1071	{
1072	return PAGE_MASK & ~huge_page_mask(h: hstate_file(f: file));
1073	}
1074
1075	#else /* CONFIG_HUGETLB_PAGE */
1076	struct hstate {};
1077
1078	static inline unsigned long huge_page_mask_align(struct file *file)
1079	{
1080	return `0`;
1081	}
1082
1083	static inline struct hugepage_subpool hugetlb_folio_subpool(struct* folio *folio)
1084	{
1085	return NULL;
1086	}
1087
1088	static inline struct folio filemap_lock_hugetlb_folio(struct* hstate *h,
1089	struct address_space *mapping, pgoff_t idx)
1090	{
1091	return NULL;
1092	}
1093
1094	static inline int isolate_or_dissolve_huge_folio(struct folio *folio,
1095	struct list_head *list)
1096	{
1097	return -ENOMEM;
1098	}
1099
1100	static inline int replace_free_hugepage_folios(unsigned long start_pfn,
1101	unsigned long end_pfn)
1102	{
1103	return `0`;
1104	}
1105
1106	static inline void wait_for_freed_hugetlb_folios(void)
1107	{
1108	}
1109
1110	static inline struct folio alloc_hugetlb_folio(struct* vm_area_struct *vma,
1111	unsigned long addr,
1112	bool cow_from_owner)
1113	{
1114	return NULL;
1115	}
1116
1117	static inline struct folio *
1118	alloc_hugetlb_folio_reserve(struct hstate h, int* preferred_nid,
1119	nodemask_t *nmask, gfp_t gfp_mask)
1120	{
1121	return NULL;
1122	}
1123
1124	static inline struct folio *
1125	alloc_hugetlb_folio_nodemask(struct hstate h, int* preferred_nid,
1126	nodemask_t *nmask, gfp_t gfp_mask,
1127	bool allow_alloc_fallback)
1128	{
1129	return NULL;
1130	}
1131
1132	static inline int __alloc_bootmem_huge_page(struct hstate *h)
1133	{
1134	return `0`;
1135	}
1136
1137	static inline struct hstate hstate_file(struct* file *f)
1138	{
1139	return NULL;
1140	}
1141
1142	static inline struct hstate hstate_sizelog(int* page_size_log)
1143	{
1144	return NULL;
1145	}
1146
1147	static inline struct hstate hstate_vma(struct* vm_area_struct *vma)
1148	{
1149	return NULL;
1150	}
1151
1152	static inline struct hstate folio_hstate(struct* folio *folio)
1153	{
1154	return NULL;
1155	}
1156
1157	static inline struct hstate size_to_hstate(unsigned* long size)
1158	{
1159	return NULL;
1160	}
1161
1162	static inline unsigned long huge_page_size(struct hstate *h)
1163	{
1164	return PAGE_SIZE;
1165	}
1166
1167	static inline unsigned long huge_page_mask(struct hstate *h)
1168	{
1169	return PAGE_MASK;
1170	}
1171
1172	static inline unsigned long vma_kernel_pagesize(struct vm_area_struct *vma)
1173	{
1174	return PAGE_SIZE;
1175	}
1176
1177	static inline unsigned long vma_mmu_pagesize(struct vm_area_struct *vma)
1178	{
1179	return PAGE_SIZE;
1180	}
1181
1182	static inline unsigned int huge_page_order(struct hstate *h)
1183	{
1184	return `0`;
1185	}
1186
1187	static inline unsigned int huge_page_shift(struct hstate *h)
1188	{
1189	return PAGE_SHIFT;
1190	}
1191
1192	static inline bool hstate_is_gigantic(struct hstate *h)
1193	{
1194	return false;
1195	}
1196
1197	static inline unsigned int pages_per_huge_page(struct hstate *h)
1198	{
1199	return `1`;
1200	}
1201
1202	static inline unsigned hstate_index_to_shift(unsigned index)
1203	{
1204	return `0`;
1205	}
1206
1207	static inline int hstate_index(struct hstate *h)
1208	{
1209	return `0`;
1210	}
1211
1212	static inline int dissolve_free_hugetlb_folio(struct folio *folio)
1213	{
1214	return `0`;
1215	}
1216
1217	static inline int dissolve_free_hugetlb_folios(unsigned long start_pfn,
1218	unsigned long end_pfn)
1219	{
1220	return `0`;
1221	}
1222
1223	static inline bool hugepage_migration_supported(struct hstate *h)
1224	{
1225	return false;
1226	}
1227
1228	static inline bool hugepage_movable_supported(struct hstate *h)
1229	{
1230	return false;
1231	}
1232
1233	static inline gfp_t htlb_alloc_mask(struct hstate *h)
1234	{
1235	return `0`;
1236	}
1237
1238	static inline gfp_t htlb_modify_alloc_mask(struct hstate *h, gfp_t gfp_mask)
1239	{
1240	return `0`;
1241	}
1242
1243	static inline bool htlb_allow_alloc_fallback(int reason)
1244	{
1245	return false;
1246	}
1247
1248	static inline spinlock_t huge_pte_lockptr(struct* hstate *h,
1249	struct mm_struct mm, pte_t pte)
1250	{
1251	return &mm->page_table_lock;
1252	}
1253
1254	static inline void hugetlb_count_init(struct mm_struct *mm)
1255	{
1256	}
1257
1258	static inline void hugetlb_report_usage(struct seq_file f, struct* mm_struct *m)
1259	{
1260	}
1261
1262	static inline void hugetlb_count_sub(long l, struct mm_struct *mm)
1263	{
1264	}
1265
1266	static inline pte_t huge_ptep_clear_flush(struct vm_area_struct *vma,
1267	unsigned long addr, pte_t *ptep)
1268	{
1269	#ifdef CONFIG_MMU
1270	return ptep_get(ptep);
1271	#else
1272	return *ptep;
1273	#endif
1274	}
1275
1276	static inline void set_huge_pte_at(struct mm_struct mm, unsigned* long addr,
1277	pte_t ptep, pte_t pte, unsigned* long sz)
1278	{
1279	}
1280
1281	static inline void hugetlb_register_node(struct node *node)
1282	{
1283	}
1284
1285	static inline void hugetlb_unregister_node(struct node *node)
1286	{
1287	}
1288
1289	static inline bool hugetlbfs_pagecache_present(
1290	struct hstate h, struct* vm_area_struct vma, unsigned* long address)
1291	{
1292	return false;
1293	}
1294
1295	static inline void hugetlb_bootmem_alloc(void)
1296	{
1297	}
1298
1299	static inline bool hugetlb_bootmem_allocated(void)
1300	{
1301	return false;
1302	}
1303	#endif /* CONFIG_HUGETLB_PAGE */
1304
1305	static inline spinlock_t huge_pte_lock(struct* hstate *h,
1306	struct mm_struct mm, pte_t pte)
1307	{
1308	spinlock_t *ptl;
1309
1310	ptl = huge_pte_lockptr(h, mm, pte);
1311	spin_lock(lock: ptl);
1312	return ptl;
1313	}
1314
1315	#if defined(CONFIG_HUGETLB_PAGE) && defined(CONFIG_CMA)
1316	extern void __init hugetlb_cma_reserve(int order);
1317	#else
1318	static inline __init void hugetlb_cma_reserve(int order)
1319	{
1320	}
1321	#endif
1322
1323	#ifdef CONFIG_HUGETLB_PMD_PAGE_TABLE_SHARING
1324	static inline bool hugetlb_pmd_shared(pte_t *pte)
1325	{
1326	return page_count(virt_to_page(pte)) > `1`;
1327	}
1328	#else
1329	static inline bool hugetlb_pmd_shared(pte_t *pte)
1330	{
1331	return false;
1332	}
1333	#endif
1334
1335	bool want_pmd_share(struct vm_area_struct vma, unsigned* long addr);
1336
1337	#ifndef __HAVE_ARCH_FLUSH_HUGETLB_TLB_RANGE
1338	/*
1339	* ARCHes with special requirements for evicting HUGETLB backing TLB entries can
1340	* implement this.
1341	*/
1342	#define flush_hugetlb_tlb_range(vma, addr, end) flush_tlb_range(vma, addr, end)
1343	#endif
1344
1345	static inline bool __vma_shareable_lock(struct vm_area_struct *vma)
1346	{
1347	return (vma->vm_flags & VM_MAYSHARE) && vma->vm_private_data;
1348	}
1349
1350	bool __vma_private_lock(struct vm_area_struct *vma);
1351
1352	/*
1353	* Safe version of huge_pte_offset() to check the locks. See comments
1354	* above huge_pte_offset().
1355	*/
1356	static inline pte_t *
1357	hugetlb_walk(struct vm_area_struct vma, unsigned* long addr, unsigned long sz)
1358	{
1359	#if defined(CONFIG_HUGETLB_PMD_PAGE_TABLE_SHARING) && defined(CONFIG_LOCKDEP)
1360	struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;
1361
1362	/*
1363	* If pmd sharing possible, locking needed to safely walk the
1364	* hugetlb pgtables. More information can be found at the comment
1365	* above huge_pte_offset() in the same file.
1366	*
1367	* NOTE: lockdep_is_held() is only defined with CONFIG_LOCKDEP.
1368	*/
1369	if (__vma_shareable_lock(vma))
1370	WARN_ON_ONCE(!lockdep_is_held(&vma_lock->rw_sema) &&
1371	!lockdep_is_held(
1372	&vma->vm_file->f_mapping->i_mmap_rwsem));
1373	#endif
1374	return huge_pte_offset(mm: vma->vm_mm, addr, sz);
1375	}
1376
1377	#endif /* _LINUX_HUGETLB_H */
1378

Browse the source code of Linux/include/linux/hugetlb.h