migrate.c source code [Linux/mm/migrate.c]

1	// SPDX-License-Identifier: GPL-2.0
2	/*
3	* Memory Migration functionality - linux/mm/migrate.c
4	*
5	* Copyright (C) 2006 Silicon Graphics, Inc., Christoph Lameter
6	*
7	* Page migration was first developed in the context of the memory hotplug
8	* project. The main authors of the migration code are:
9	*
10	* IWAMOTO Toshihiro <iwamoto@valinux.co.jp>
11	* Hirokazu Takahashi <taka@valinux.co.jp>
12	* Dave Hansen <haveblue@us.ibm.com>
13	* Christoph Lameter
14	*/
15
16	#include <linux/migrate.h>
17	#include <linux/export.h>
18	#include <linux/swap.h>
19	#include <linux/swapops.h>
20	#include <linux/pagemap.h>
21	#include <linux/buffer_head.h>
22	#include <linux/mm_inline.h>
23	#include <linux/ksm.h>
24	#include <linux/rmap.h>
25	#include <linux/topology.h>
26	#include <linux/cpu.h>
27	#include <linux/cpuset.h>
28	#include <linux/writeback.h>
29	#include <linux/mempolicy.h>
30	#include <linux/vmalloc.h>
31	#include <linux/security.h>
32	#include <linux/backing-dev.h>
33	#include <linux/compaction.h>
34	#include <linux/syscalls.h>
35	#include <linux/compat.h>
36	#include <linux/hugetlb.h>
37	#include <linux/gfp.h>
38	#include <linux/page_idle.h>
39	#include <linux/page_owner.h>
40	#include <linux/sched/mm.h>
41	#include <linux/ptrace.h>
42	#include <linux/memory.h>
43	#include <linux/sched/sysctl.h>
44	#include <linux/memory-tiers.h>
45	#include <linux/pagewalk.h>
46
47	#include <asm/tlbflush.h>
48
49	#include <trace/events/migrate.h>
50
51	#include "internal.h"
52	#include "swap.h"
53
54	static const struct movable_operations *offline_movable_ops;
55	static const struct movable_operations *zsmalloc_movable_ops;
56
57	int set_movable_ops(const struct movable_operations ops, enum* pagetype type)
58	{
59	/*
60	* We only allow for selected types and don't handle concurrent
61	* registration attempts yet.
62	*/
63	switch (type) {
64	case PGTY_offline:
65	if (offline_movable_ops && ops)
66	return -EBUSY;
67	offline_movable_ops = ops;
68	break;
69	case PGTY_zsmalloc:
70	if (zsmalloc_movable_ops && ops)
71	return -EBUSY;
72	zsmalloc_movable_ops = ops;
73	break;
74	default:
75	return -EINVAL;
76	}
77	return `0`;
78	}
79	EXPORT_SYMBOL_GPL(set_movable_ops);
80
81	static const struct movable_operations page_movable_ops(struct* page *page)
82	{
83	VM_WARN_ON_ONCE_PAGE(!page_has_movable_ops(page), page);
84
85	/*
86	* If we enable page migration for a page of a certain type by marking
87	* it as movable, the page type must be sticky until the page gets freed
88	* back to the buddy.
89	*/
90	if (PageOffline(page))
91	/ Only balloon compaction sets PageOffline pages movable. /
92	return offline_movable_ops;
93	if (PageZsmalloc(page))
94	return zsmalloc_movable_ops;
95
96	return NULL;
97	}
98
99	/**
100	* isolate_movable_ops_page - isolate a movable_ops page for migration
101	* @page: The page.
102	* @mode: The isolation mode.
103	*
104	* Try to isolate a movable_ops page for migration. Will fail if the page is
105	* not a movable_ops page, if the page is already isolated for migration
106	* or if the page was just was released by its owner.
107	*
108	* Once isolated, the page cannot get freed until it is either putback
109	* or migrated.
110	*
111	* Returns true if isolation succeeded, otherwise false.
112	*/
113	bool isolate_movable_ops_page(struct page *page, isolate_mode_t mode)
114	{
115	/*
116	* TODO: these pages will not be folios in the future. All
117	* folio dependencies will have to be removed.
118	*/
119	struct folio *folio = folio_get_nontail_page(page);
120	const struct movable_operations *mops;
121
122	/*
123	* Avoid burning cycles with pages that are yet under __free_pages(),
124	* or just got freed under us.
125	*
126	* In case we 'win' a race for a movable page being freed under us and
127	* raise its refcount preventing __free_pages() from doing its job
128	* the put_page() at the end of this block will take care of
129	* release this page, thus avoiding a nasty leakage.
130	*/
131	if (!folio)
132	goto out;
133
134	/*
135	* Check for movable_ops pages before taking the page lock because
136	* we use non-atomic bitops on newly allocated page flags so
137	* unconditionally grabbing the lock ruins page's owner side.
138	*
139	* Note that once a page has movable_ops, it will stay that way
140	* until the page was freed.
141	*/
142	if (unlikely(!page_has_movable_ops(page)))
143	goto out_putfolio;
144
145	/*
146	* As movable pages are not isolated from LRU lists, concurrent
147	* compaction threads can race against page migration functions
148	* as well as race against the releasing a page.
149	*
150	* In order to avoid having an already isolated movable page
151	* being (wrongly) re-isolated while it is under migration,
152	* or to avoid attempting to isolate pages being released,
153	* lets be sure we have the page lock
154	* before proceeding with the movable page isolation steps.
155	*/
156	if (unlikely(!folio_trylock(folio)))
157	goto out_putfolio;
158
159	VM_WARN_ON_ONCE_PAGE(!page_has_movable_ops(page), page);
160	if (PageMovableOpsIsolated(page))
161	goto out_no_isolated;
162
163	mops = page_movable_ops(page);
164	if (WARN_ON_ONCE(!mops))
165	goto out_no_isolated;
166
167	if (!mops->isolate_page(page, mode))
168	goto out_no_isolated;
169
170	/ Driver shouldn't use the isolated flag /
171	VM_WARN_ON_ONCE_PAGE(PageMovableOpsIsolated(page), page);
172	SetPageMovableOpsIsolated(page);
173	folio_unlock(folio);
174
175	return true;
176
177	out_no_isolated:
178	folio_unlock(folio);
179	out_putfolio:
180	folio_put(folio);
181	out:
182	return false;
183	}
184
185	/**
186	* putback_movable_ops_page - putback an isolated movable_ops page
187	* @page: The isolated page.
188	*
189	* Putback an isolated movable_ops page.
190	*
191	* After the page was putback, it might get freed instantly.
192	*/
193	static void putback_movable_ops_page(struct page *page)
194	{
195	/*
196	* TODO: these pages will not be folios in the future. All
197	* folio dependencies will have to be removed.
198	*/
199	struct folio *folio = page_folio(page);
200
201	VM_WARN_ON_ONCE_PAGE(!page_has_movable_ops(page), page);
202	VM_WARN_ON_ONCE_PAGE(!PageMovableOpsIsolated(page), page);
203	folio_lock(folio);
204	page_movable_ops(page)->putback_page(page);
205	ClearPageMovableOpsIsolated(page);
206	folio_unlock(folio);
207	folio_put(folio);
208	}
209
210	/**
211	* migrate_movable_ops_page - migrate an isolated movable_ops page
212	* @dst: The destination page.
213	* @src: The source page.
214	* @mode: The migration mode.
215	*
216	* Migrate an isolated movable_ops page.
217	*
218	* If the src page was already released by its owner, the src page is
219	* un-isolated (putback) and migration succeeds; the migration core will be the
220	* owner of both pages.
221	*
222	* If the src page was not released by its owner and the migration was
223	* successful, the owner of the src page and the dst page are swapped and
224	* the src page is un-isolated.
225	*
226	* If migration fails, the ownership stays unmodified and the src page
227	* remains isolated: migration may be retried later or the page can be putback.
228	*
229	* TODO: migration core will treat both pages as folios and lock them before
230	* this call to unlock them after this call. Further, the folio refcounts on
231	* src and dst are also released by migration core. These pages will not be
232	* folios in the future, so that must be reworked.
233	*
234	* Returns 0 on success, otherwise a negative error code.
235	*/
236	static int migrate_movable_ops_page(struct page dst, struct* page *src,
237	enum migrate_mode mode)
238	{
239	int rc;
240
241	VM_WARN_ON_ONCE_PAGE(!page_has_movable_ops(src), src);
242	VM_WARN_ON_ONCE_PAGE(!PageMovableOpsIsolated(src), src);
243	rc = page_movable_ops(page: src)->migrate_page(dst, src, mode);
244	if (!rc)
245	ClearPageMovableOpsIsolated(page: src);
246	return rc;
247	}
248
249	/*
250	* Put previously isolated pages back onto the appropriate lists
251	* from where they were once taken off for compaction/migration.
252	*
253	* This function shall be used whenever the isolated pageset has been
254	* built from lru, balloon, hugetlbfs page. See isolate_migratepages_range()
255	* and folio_isolate_hugetlb().
256	*/
257	void putback_movable_pages(struct list_head *l)
258	{
259	struct folio *folio;
260	struct folio *folio2;
261
262	list_for_each_entry_safe(folio, folio2, l, lru) {
263	if (unlikely(folio_test_hugetlb(folio))) {
264	folio_putback_hugetlb(folio);
265	continue;
266	}
267	list_del(entry: &folio->lru);
268	if (unlikely(page_has_movable_ops(&folio->page))) {
269	putback_movable_ops_page(page: &folio->page);
270	} else {
271	node_stat_mod_folio(folio, item: NR_ISOLATED_ANON +
272	folio_is_file_lru(folio), nr: -folio_nr_pages(folio));
273	folio_putback_lru(folio);
274	}
275	}
276	}
277
278	/ Must be called with an elevated refcount on the non-hugetlb folio /
279	bool isolate_folio_to_list(struct folio folio, struct* list_head *list)
280	{
281	if (folio_test_hugetlb(folio))
282	return folio_isolate_hugetlb(folio, list);
283
284	if (page_has_movable_ops(page: &folio->page)) {
285	if (!isolate_movable_ops_page(page: &folio->page,
286	ISOLATE_UNEVICTABLE))
287	return false;
288	} else {
289	if (!folio_isolate_lru(folio))
290	return false;
291	node_stat_add_folio(folio, item: NR_ISOLATED_ANON +
292	folio_is_file_lru(folio));
293	}
294	list_add(new: &folio->lru, head: list);
295	return true;
296	}
297
298	static bool try_to_map_unused_to_zeropage(struct page_vma_mapped_walk *pvmw,
299	struct folio folio, pte_t old_pte, unsigned* long idx)
300	{
301	struct page *page = folio_page(folio, idx);
302	pte_t newpte;
303
304	if (PageCompound(page))
305	return false;
306	VM_BUG_ON_PAGE(!PageAnon(page), page);
307	VM_BUG_ON_PAGE(!PageLocked(page), page);
308	VM_BUG_ON_PAGE(pte_present(old_pte), page);
309
310	if (folio_test_mlocked(folio) \|\| (pvmw->vma->vm_flags & VM_LOCKED) \|\|
311	mm_forbids_zeropage(pvmw->vma->vm_mm))
312	return false;
313
314	/*
315	* The pmd entry mapping the old thp was flushed and the pte mapping
316	* this subpage has been non present. If the subpage is only zero-filled
317	* then map it to the shared zeropage.
318	*/
319	if (!pages_identical(page1: page, ZERO_PAGE(`0`)))
320	return false;
321
322	newpte = pte_mkspecial(pte: pfn_pte(page_nr: my_zero_pfn(addr: pvmw->address),
323	pgprot: pvmw->vma->vm_page_prot));
324
325	if (pte_swp_soft_dirty(pte: old_pte))
326	newpte = pte_mksoft_dirty(pte: newpte);
327	if (pte_swp_uffd_wp(pte: old_pte))
328	newpte = pte_mkuffd_wp(pte: newpte);
329
330	set_pte_at(pvmw->vma->vm_mm, pvmw->address, pvmw->pte, newpte);
331
332	dec_mm_counter(mm: pvmw->vma->vm_mm, member: mm_counter(folio));
333	return true;
334	}
335
336	struct rmap_walk_arg {
337	struct folio *folio;
338	bool map_unused_to_zeropage;
339	};
340
341	/*
342	* Restore a potential migration pte to a working pte entry
343	*/
344	static bool remove_migration_pte(struct folio *folio,
345	struct vm_area_struct vma, unsigned* long addr, void *arg)
346	{
347	struct rmap_walk_arg *rmap_walk_arg = arg;
348	DEFINE_FOLIO_VMA_WALK(pvmw, rmap_walk_arg->folio, vma, addr, PVMW_SYNC \| PVMW_MIGRATION);
349
350	while (page_vma_mapped_walk(pvmw: &pvmw)) {
351	rmap_t rmap_flags = RMAP_NONE;
352	pte_t old_pte;
353	pte_t pte;
354	swp_entry_t entry;
355	struct page *new;
356	unsigned long idx = `0`;
357
358	/ pgoff is invalid for ksm pages, but they are never large /
359	if (folio_test_large(folio) && !folio_test_hugetlb(folio))
360	idx = linear_page_index(vma, address: pvmw.address) - pvmw.pgoff;
361	new = folio_page(folio, idx);
362
363	#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
364	/ PMD-mapped THP migration entry /
365	if (!pvmw.pte) {
366	VM_BUG_ON_FOLIO(folio_test_hugetlb(folio) \|\|
367	!folio_test_pmd_mappable(folio), folio);
368	remove_migration_pmd(&pvmw, new);
369	continue;
370	}
371	#endif
372	old_pte = ptep_get(ptep: pvmw.pte);
373	if (rmap_walk_arg->map_unused_to_zeropage &&
374	try_to_map_unused_to_zeropage(pvmw: &pvmw, folio, old_pte, idx))
375	continue;
376
377	folio_get(folio);
378	pte = mk_pte(page: new, READ_ONCE(vma->vm_page_prot));
379
380	entry = pte_to_swp_entry(pte: old_pte);
381	if (!is_migration_entry_young(entry))
382	pte = pte_mkold(pte);
383	if (folio_test_dirty(folio) && is_migration_entry_dirty(entry))
384	pte = pte_mkdirty(pte);
385	if (pte_swp_soft_dirty(pte: old_pte))
386	pte = pte_mksoft_dirty(pte);
387	else
388	pte = pte_clear_soft_dirty(pte);
389
390	if (is_writable_migration_entry(entry))
391	pte = pte_mkwrite(pte, vma);
392	else if (pte_swp_uffd_wp(pte: old_pte))
393	pte = pte_mkuffd_wp(pte);
394
395	if (folio_test_anon(folio) && !is_readable_migration_entry(entry))
396	rmap_flags \|= RMAP_EXCLUSIVE;
397
398	if (unlikely(is_device_private_page(new))) {
399	if (pte_write(pte))
400	entry = make_writable_device_private_entry(
401	page_to_pfn(new));
402	else
403	entry = make_readable_device_private_entry(
404	page_to_pfn(new));
405	pte = swp_entry_to_pte(entry);
406	if (pte_swp_soft_dirty(pte: old_pte))
407	pte = pte_swp_mksoft_dirty(pte);
408	if (pte_swp_uffd_wp(pte: old_pte))
409	pte = pte_swp_mkuffd_wp(pte);
410	}
411
412	#ifdef CONFIG_HUGETLB_PAGE
413	if (folio_test_hugetlb(folio)) {
414	struct hstate *h = hstate_vma(vma);
415	unsigned int shift = huge_page_shift(h);
416	unsigned long psize = huge_page_size(h);
417
418	pte = arch_make_huge_pte(entry: pte, shift, flags: vma->vm_flags);
419	if (folio_test_anon(folio))
420	hugetlb_add_anon_rmap(folio, vma, address: pvmw.address,
421	flags: rmap_flags);
422	else
423	hugetlb_add_file_rmap(folio);
424	set_huge_pte_at(mm: vma->vm_mm, addr: pvmw.address, ptep: pvmw.pte, pte,
425	sz: psize);
426	} else
427	#endif
428	{
429	if (folio_test_anon(folio))
430	folio_add_anon_rmap_pte(folio, new, vma,
431	pvmw.address, rmap_flags);
432	else
433	folio_add_file_rmap_pte(folio, new, vma);
434	set_pte_at(vma->vm_mm, pvmw.address, pvmw.pte, pte);
435	}
436	if (READ_ONCE(vma->vm_flags) & VM_LOCKED)
437	mlock_drain_local();
438
439	trace_remove_migration_pte(addr: pvmw.address, pte_val(pte),
440	order: compound_order(page: new));
441
442	/ No need to invalidate - it was non-present before /
443	update_mmu_cache(vma, addr: pvmw.address, ptep: pvmw.pte);
444	}
445
446	return true;
447	}
448
449	/*
450	* Get rid of all migration entries and replace them by
451	* references to the indicated page.
452	*/
453	void remove_migration_ptes(struct folio src, struct* folio dst, int* flags)
454	{
455	struct rmap_walk_arg rmap_walk_arg = {
456	.folio = src,
457	.map_unused_to_zeropage = flags & RMP_USE_SHARED_ZEROPAGE,
458	};
459
460	struct rmap_walk_control rwc = {
461	.rmap_one = remove_migration_pte,
462	.arg = &rmap_walk_arg,
463	};
464
465	VM_BUG_ON_FOLIO((flags & RMP_USE_SHARED_ZEROPAGE) && (src != dst), src);
466
467	if (flags & RMP_LOCKED)
468	rmap_walk_locked(folio: dst, rwc: &rwc);
469	else
470	rmap_walk(folio: dst, rwc: &rwc);
471	}
472
473	/*
474	* Something used the pte of a page under migration. We need to
475	* get to the page and wait until migration is finished.
476	* When we return from this function the fault will be retried.
477	*/
478	void migration_entry_wait(struct mm_struct mm, pmd_t pmd,
479	unsigned long address)
480	{
481	spinlock_t *ptl;
482	pte_t *ptep;
483	pte_t pte;
484	swp_entry_t entry;
485
486	ptep = pte_offset_map_lock(mm, pmd, addr: address, ptlp: &ptl);
487	if (!ptep)
488	return;
489
490	pte = ptep_get(ptep);
491	pte_unmap(pte: ptep);
492
493	if (!is_swap_pte(pte))
494	goto out;
495
496	entry = pte_to_swp_entry(pte);
497	if (!is_migration_entry(entry))
498	goto out;
499
500	migration_entry_wait_on_locked(entry, ptl);
501	return;
502	out:
503	spin_unlock(lock: ptl);
504	}
505
506	#ifdef CONFIG_HUGETLB_PAGE
507	/*
508	* The vma read lock must be held upon entry. Holding that lock prevents either
509	* the pte or the ptl from being freed.
510	*
511	* This function will release the vma lock before returning.
512	*/
513	void migration_entry_wait_huge(struct vm_area_struct vma, unsigned* long addr, pte_t *ptep)
514	{
515	spinlock_t *ptl = huge_pte_lockptr(h: hstate_vma(vma), mm: vma->vm_mm, pte: ptep);
516	pte_t pte;
517
518	hugetlb_vma_assert_locked(vma);
519	spin_lock(lock: ptl);
520	pte = huge_ptep_get(mm: vma->vm_mm, addr, ptep);
521
522	if (unlikely(!is_hugetlb_entry_migration(pte))) {
523	spin_unlock(lock: ptl);
524	hugetlb_vma_unlock_read(vma);
525	} else {
526	/*
527	* If migration entry existed, safe to release vma lock
528	* here because the pgtable page won't be freed without the
529	* pgtable lock released. See comment right above pgtable
530	* lock release in migration_entry_wait_on_locked().
531	*/
532	hugetlb_vma_unlock_read(vma);
533	migration_entry_wait_on_locked(entry: pte_to_swp_entry(pte), ptl);
534	}
535	}
536	#endif
537
538	#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
539	void pmd_migration_entry_wait(struct mm_struct mm, pmd_t pmd)
540	{
541	spinlock_t *ptl;
542
543	ptl = pmd_lock(mm, pmd);
544	if (!is_pmd_migration_entry(*pmd))
545	goto unlock;
546	migration_entry_wait_on_locked(pmd_to_swp_entry(*pmd), ptl);
547	return;
548	unlock:
549	spin_unlock(ptl);
550	}
551	#endif
552
553	/*
554	* Replace the folio in the mapping.
555	*
556	* The number of remaining references must be:
557	* 1 for anonymous folios without a mapping
558	* 2 for folios with a mapping
559	* 3 for folios with a mapping and the private flag set.
560	*/
561	static int __folio_migrate_mapping(struct address_space *mapping,
562	struct folio newfolio, struct* folio folio, int* expected_count)
563	{
564	XA_STATE(xas, &mapping->i_pages, folio_index(folio));
565	struct swap_cluster_info *ci = NULL;
566	struct zone oldzone, newzone;
567	int dirty;
568	long nr = folio_nr_pages(folio);
569
570	if (!mapping) {
571	/ Take off deferred split queue while frozen and memcg set /
572	if (folio_test_large(folio) &&
573	folio_test_large_rmappable(folio)) {
574	if (!folio_ref_freeze(folio, count: expected_count))
575	return -EAGAIN;
576	folio_unqueue_deferred_split(folio);
577	folio_ref_unfreeze(folio, count: expected_count);
578	}
579
580	/ No turning back from here /
581	newfolio->index = folio->index;
582	newfolio->mapping = folio->mapping;
583	if (folio_test_anon(folio) && folio_test_large(folio))
584	mod_mthp_stat(order: folio_order(folio), item: MTHP_STAT_NR_ANON, delta: `1`);
585	if (folio_test_swapbacked(folio))
586	__folio_set_swapbacked(folio: newfolio);
587
588	return `0`;
589	}
590
591	oldzone = folio_zone(folio);
592	newzone = folio_zone(folio: newfolio);
593
594	if (folio_test_swapcache(folio))
595	ci = swap_cluster_get_and_lock_irq(folio);
596	else
597	xas_lock_irq(&xas);
598
599	if (!folio_ref_freeze(folio, count: expected_count)) {
600	if (ci)
601	swap_cluster_unlock_irq(ci);
602	else
603	xas_unlock_irq(&xas);
604	return -EAGAIN;
605	}
606
607	/ Take off deferred split queue while frozen and memcg set /
608	folio_unqueue_deferred_split(folio);
609
610	/*
611	* Now we know that no one else is looking at the folio:
612	* no turning back from here.
613	*/
614	newfolio->index = folio->index;
615	newfolio->mapping = folio->mapping;
616	if (folio_test_anon(folio) && folio_test_large(folio))
617	mod_mthp_stat(order: folio_order(folio), item: MTHP_STAT_NR_ANON, delta: `1`);
618	folio_ref_add(folio: newfolio, nr); / add cache reference /
619	if (folio_test_swapbacked(folio))
620	__folio_set_swapbacked(folio: newfolio);
621	if (folio_test_swapcache(folio)) {
622	folio_set_swapcache(folio: newfolio);
623	newfolio->private = folio_get_private(folio);
624	}
625
626	/ Move dirty while folio refs frozen and newfolio not yet exposed /
627	dirty = folio_test_dirty(folio);
628	if (dirty) {
629	folio_clear_dirty(folio);
630	folio_set_dirty(folio: newfolio);
631	}
632
633	if (folio_test_swapcache(folio))
634	__swap_cache_replace_folio(ci, old: folio, new: newfolio);
635	else
636	xas_store(&xas, entry: newfolio);
637
638	/*
639	* Drop cache reference from old folio by unfreezing
640	* to one less reference.
641	* We know this isn't the last reference.
642	*/
643	folio_ref_unfreeze(folio, count: expected_count - nr);
644
645	/ Leave irq disabled to prevent preemption while updating stats /
646	if (ci)
647	swap_cluster_unlock(ci);
648	else
649	xas_unlock(&xas);
650
651	/*
652	* If moved to a different zone then also account
653	* the folio for that zone. Other VM counters will be
654	* taken care of when we establish references to the
655	* new folio and drop references to the old folio.
656	*
657	* Note that anonymous folios are accounted for
658	* via NR_FILE_PAGES and NR_ANON_MAPPED if they
659	* are mapped to swap space.
660	*/
661	if (newzone != oldzone) {
662	struct lruvec old_lruvec, new_lruvec;
663	struct mem_cgroup *memcg;
664
665	memcg = folio_memcg(folio);
666	old_lruvec = mem_cgroup_lruvec(memcg, pgdat: oldzone->zone_pgdat);
667	new_lruvec = mem_cgroup_lruvec(memcg, pgdat: newzone->zone_pgdat);
668
669	__mod_lruvec_state(lruvec: old_lruvec, idx: NR_FILE_PAGES, val: -nr);
670	__mod_lruvec_state(lruvec: new_lruvec, idx: NR_FILE_PAGES, val: nr);
671	if (folio_test_swapbacked(folio) && !folio_test_swapcache(folio)) {
672	__mod_lruvec_state(lruvec: old_lruvec, idx: NR_SHMEM, val: -nr);
673	__mod_lruvec_state(lruvec: new_lruvec, idx: NR_SHMEM, val: nr);
674
675	if (folio_test_pmd_mappable(folio)) {
676	__mod_lruvec_state(lruvec: old_lruvec, idx: NR_SHMEM_THPS, val: -nr);
677	__mod_lruvec_state(lruvec: new_lruvec, idx: NR_SHMEM_THPS, val: nr);
678	}
679	}
680	#ifdef CONFIG_SWAP
681	if (folio_test_swapcache(folio)) {
682	__mod_lruvec_state(lruvec: old_lruvec, idx: NR_SWAPCACHE, val: -nr);
683	__mod_lruvec_state(lruvec: new_lruvec, idx: NR_SWAPCACHE, val: nr);
684	}
685	#endif
686	if (dirty && mapping_can_writeback(mapping)) {
687	__mod_lruvec_state(lruvec: old_lruvec, idx: NR_FILE_DIRTY, val: -nr);
688	__mod_zone_page_state(oldzone, item: NR_ZONE_WRITE_PENDING, -nr);
689	__mod_lruvec_state(lruvec: new_lruvec, idx: NR_FILE_DIRTY, val: nr);
690	__mod_zone_page_state(newzone, item: NR_ZONE_WRITE_PENDING, nr);
691	}
692	}
693	local_irq_enable();
694
695	return `0`;
696	}
697
698	int folio_migrate_mapping(struct address_space *mapping,
699	struct folio newfolio, struct* folio folio, int* extra_count)
700	{
701	int expected_count = folio_expected_ref_count(folio) + extra_count + `1`;
702
703	if (folio_ref_count(folio) != expected_count)
704	return -EAGAIN;
705
706	return __folio_migrate_mapping(mapping, newfolio, folio, expected_count);
707	}
708	EXPORT_SYMBOL(folio_migrate_mapping);
709
710	/*
711	* The expected number of remaining references is the same as that
712	* of folio_migrate_mapping().
713	*/
714	int migrate_huge_page_move_mapping(struct address_space *mapping,
715	struct folio dst, struct* folio *src)
716	{
717	XA_STATE(xas, &mapping->i_pages, folio_index(src));
718	int rc, expected_count = folio_expected_ref_count(folio: src) + `1`;
719
720	if (folio_ref_count(folio: src) != expected_count)
721	return -EAGAIN;
722
723	rc = folio_mc_copy(dst, src);
724	if (unlikely(rc))
725	return rc;
726
727	xas_lock_irq(&xas);
728	if (!folio_ref_freeze(folio: src, count: expected_count)) {
729	xas_unlock_irq(&xas);
730	return -EAGAIN;
731	}
732
733	dst->index = src->index;
734	dst->mapping = src->mapping;
735
736	folio_ref_add(folio: dst, nr: folio_nr_pages(folio: dst));
737
738	xas_store(&xas, entry: dst);
739
740	folio_ref_unfreeze(folio: src, count: expected_count - folio_nr_pages(folio: src));
741
742	xas_unlock_irq(&xas);
743
744	return `0`;
745	}
746
747	/*
748	* Copy the flags and some other ancillary information
749	*/
750	void folio_migrate_flags(struct folio newfolio, struct* folio *folio)
751	{
752	int cpupid;
753
754	if (folio_test_referenced(folio))
755	folio_set_referenced(folio: newfolio);
756	if (folio_test_uptodate(folio))
757	folio_mark_uptodate(folio: newfolio);
758	if (folio_test_clear_active(folio)) {
759	VM_BUG_ON_FOLIO(folio_test_unevictable(folio), folio);
760	folio_set_active(folio: newfolio);
761	} else if (folio_test_clear_unevictable(folio))
762	folio_set_unevictable(folio: newfolio);
763	if (folio_test_workingset(folio))
764	folio_set_workingset(folio: newfolio);
765	if (folio_test_checked(folio))
766	folio_set_checked(folio: newfolio);
767	/*
768	* PG_anon_exclusive (-> PG_mappedtodisk) is always migrated via
769	* migration entries. We can still have PG_anon_exclusive set on an
770	* effectively unmapped and unreferenced first sub-pages of an
771	* anonymous THP: we can simply copy it here via PG_mappedtodisk.
772	*/
773	if (folio_test_mappedtodisk(folio))
774	folio_set_mappedtodisk(folio: newfolio);
775
776	/ Move dirty on pages not done by folio_migrate_mapping() /
777	if (folio_test_dirty(folio))
778	folio_set_dirty(folio: newfolio);
779
780	if (folio_test_young(folio))
781	folio_set_young(folio: newfolio);
782	if (folio_test_idle(folio))
783	folio_set_idle(folio: newfolio);
784
785	folio_migrate_refs(new: newfolio, old: folio);
786	/*
787	* Copy NUMA information to the new page, to prevent over-eager
788	* future migrations of this same page.
789	*/
790	cpupid = folio_xchg_last_cpupid(folio, cpupid: -`1`);
791	/*
792	* For memory tiering mode, when migrate between slow and fast
793	* memory node, reset cpupid, because that is used to record
794	* page access time in slow memory node.
795	*/
796	if (sysctl_numa_balancing_mode & NUMA_BALANCING_MEMORY_TIERING) {
797	bool f_toptier = node_is_toptier(node: folio_nid(folio));
798	bool t_toptier = node_is_toptier(node: folio_nid(folio: newfolio));
799
800	if (f_toptier != t_toptier)
801	cpupid = -`1`;
802	}
803	folio_xchg_last_cpupid(folio: newfolio, cpupid);
804
805	folio_migrate_ksm(newfolio, old: folio);
806	/*
807	* Please do not reorder this without considering how mm/ksm.c's
808	* ksm_get_folio() depends upon ksm_migrate_page() and the
809	* swapcache flag.
810	*/
811	if (folio_test_swapcache(folio))
812	folio_clear_swapcache(folio);
813	folio_clear_private(folio);
814
815	/ page->private contains hugetlb specific flags /
816	if (!folio_test_hugetlb(folio))
817	folio->private = NULL;
818
819	/*
820	* If any waiters have accumulated on the new page then
821	* wake them up.
822	*/
823	if (folio_test_writeback(folio: newfolio))
824	folio_end_writeback(folio: newfolio);
825
826	/*
827	* PG_readahead shares the same bit with PG_reclaim. The above
828	* end_page_writeback() may clear PG_readahead mistakenly, so set the
829	* bit after that.
830	*/
831	if (folio_test_readahead(folio))
832	folio_set_readahead(folio: newfolio);
833
834	folio_copy_owner(newfolio, folio);
835	pgalloc_tag_swap(new: newfolio, old: folio);
836
837	mem_cgroup_migrate(old: folio, new: newfolio);
838	}
839	EXPORT_SYMBOL(folio_migrate_flags);
840
841	/************************************************************
842	* Migration functions
843	***********************************************************/
844
845	static int __migrate_folio(struct address_space mapping, struct* folio *dst,
846	struct folio src, void* *src_private,
847	enum migrate_mode mode)
848	{
849	int rc, expected_count = folio_expected_ref_count(folio: src) + `1`;
850
851	/ Check whether src does not have extra refs before we do more work /
852	if (folio_ref_count(folio: src) != expected_count)
853	return -EAGAIN;
854
855	rc = folio_mc_copy(dst, src);
856	if (unlikely(rc))
857	return rc;
858
859	rc = __folio_migrate_mapping(mapping, newfolio: dst, folio: src, expected_count);
860	if (rc)
861	return rc;
862
863	if (src_private)
864	folio_attach_private(folio: dst, data: folio_detach_private(folio: src));
865
866	folio_migrate_flags(dst, src);
867	return `0`;
868	}
869
870	/**
871	* migrate_folio() - Simple folio migration.
872	* @mapping: The address_space containing the folio.
873	* @dst: The folio to migrate the data to.
874	* @src: The folio containing the current data.
875	* @mode: How to migrate the page.
876	*
877	* Common logic to directly migrate a single LRU folio suitable for
878	* folios that do not have private data.
879	*
880	* Folios are locked upon entry and exit.
881	*/
882	int migrate_folio(struct address_space mapping, struct* folio *dst,
883	struct folio src, enum* migrate_mode mode)
884	{
885	BUG_ON(folio_test_writeback(src)); / Writeback must be complete /
886	return __migrate_folio(mapping, dst, src, NULL, mode);
887	}
888	EXPORT_SYMBOL(migrate_folio);
889
890	#ifdef CONFIG_BUFFER_HEAD
891	/ Returns true if all buffers are successfully locked /
892	static bool buffer_migrate_lock_buffers(struct buffer_head *head,
893	enum migrate_mode mode)
894	{
895	struct buffer_head *bh = head;
896	struct buffer_head *failed_bh;
897
898	do {
899	if (!trylock_buffer(bh)) {
900	if (mode == MIGRATE_ASYNC)
901	goto unlock;
902	if (mode == MIGRATE_SYNC_LIGHT && !buffer_uptodate(bh))
903	goto unlock;
904	lock_buffer(bh);
905	}
906
907	bh = bh->b_this_page;
908	} while (bh != head);
909
910	return true;
911
912	unlock:
913	/ We failed to lock the buffer and cannot stall. /
914	failed_bh = bh;
915	bh = head;
916	while (bh != failed_bh) {
917	unlock_buffer(bh);
918	bh = bh->b_this_page;
919	}
920
921	return false;
922	}
923
924	static int __buffer_migrate_folio(struct address_space *mapping,
925	struct folio dst, struct* folio src, enum* migrate_mode mode,
926	bool check_refs)
927	{
928	struct buffer_head bh, head;
929	int rc;
930	int expected_count;
931
932	head = folio_buffers(src);
933	if (!head)
934	return migrate_folio(mapping, dst, src, mode);
935
936	/ Check whether page does not have extra refs before we do more work /
937	expected_count = folio_expected_ref_count(folio: src) + `1`;
938	if (folio_ref_count(folio: src) != expected_count)
939	return -EAGAIN;
940
941	if (!buffer_migrate_lock_buffers(head, mode))
942	return -EAGAIN;
943
944	if (check_refs) {
945	bool busy, migrating;
946	bool invalidated = false;
947
948	migrating = test_and_set_bit_lock(nr: BH_Migrate, addr: &head->b_state);
949	VM_WARN_ON_ONCE(migrating);
950	recheck_buffers:
951	busy = false;
952	spin_lock(lock: &mapping->i_private_lock);
953	bh = head;
954	do {
955	if (atomic_read(v: &bh->b_count)) {
956	busy = true;
957	break;
958	}
959	bh = bh->b_this_page;
960	} while (bh != head);
961	spin_unlock(lock: &mapping->i_private_lock);
962	if (busy) {
963	if (invalidated) {
964	rc = -EAGAIN;
965	goto unlock_buffers;
966	}
967	invalidate_bh_lrus();
968	invalidated = true;
969	goto recheck_buffers;
970	}
971	}
972
973	rc = filemap_migrate_folio(mapping, dst, src, mode);
974	if (rc)
975	goto unlock_buffers;
976
977	bh = head;
978	do {
979	folio_set_bh(bh, folio: dst, offset: bh_offset(bh));
980	bh = bh->b_this_page;
981	} while (bh != head);
982
983	unlock_buffers:
984	if (check_refs)
985	clear_bit_unlock(nr: BH_Migrate, addr: &head->b_state);
986	bh = head;
987	do {
988	unlock_buffer(bh);
989	bh = bh->b_this_page;
990	} while (bh != head);
991
992	return rc;
993	}
994
995	/**
996	* buffer_migrate_folio() - Migration function for folios with buffers.
997	* @mapping: The address space containing @src.
998	* @dst: The folio to migrate to.
999	* @src: The folio to migrate from.
1000	* @mode: How to migrate the folio.
1001	*
1002	* This function can only be used if the underlying filesystem guarantees
1003	* that no other references to @src exist. For example attached buffer
1004	* heads are accessed only under the folio lock. If your filesystem cannot
1005	* provide this guarantee, buffer_migrate_folio_norefs() may be more
1006	* appropriate.
1007	*
1008	* Return: 0 on success or a negative errno on failure.
1009	*/
1010	int buffer_migrate_folio(struct address_space *mapping,
1011	struct folio dst, struct* folio src, enum* migrate_mode mode)
1012	{
1013	return __buffer_migrate_folio(mapping, dst, src, mode, check_refs: false);
1014	}
1015	EXPORT_SYMBOL(buffer_migrate_folio);
1016
1017	/**
1018	* buffer_migrate_folio_norefs() - Migration function for folios with buffers.
1019	* @mapping: The address space containing @src.
1020	* @dst: The folio to migrate to.
1021	* @src: The folio to migrate from.
1022	* @mode: How to migrate the folio.
1023	*
1024	* Like buffer_migrate_folio() except that this variant is more careful
1025	* and checks that there are also no buffer head references. This function
1026	* is the right one for mappings where buffer heads are directly looked
1027	* up and referenced (such as block device mappings).
1028	*
1029	* Return: 0 on success or a negative errno on failure.
1030	*/
1031	int buffer_migrate_folio_norefs(struct address_space *mapping,
1032	struct folio dst, struct* folio src, enum* migrate_mode mode)
1033	{
1034	return __buffer_migrate_folio(mapping, dst, src, mode, check_refs: true);
1035	}
1036	EXPORT_SYMBOL_GPL(buffer_migrate_folio_norefs);
1037	#endif /* CONFIG_BUFFER_HEAD */
1038
1039	int filemap_migrate_folio(struct address_space *mapping,
1040	struct folio dst, struct* folio src, enum* migrate_mode mode)
1041	{
1042	return __migrate_folio(mapping, dst, src, src_private: folio_get_private(folio: src), mode);
1043	}
1044	EXPORT_SYMBOL_GPL(filemap_migrate_folio);
1045
1046	/*
1047	* Default handling if a filesystem does not provide a migration function.
1048	*/
1049	static int fallback_migrate_folio(struct address_space *mapping,
1050	struct folio dst, struct* folio src, enum* migrate_mode mode)
1051	{
1052	WARN_ONCE(mapping->a_ops->writepages,
1053	"%ps does not implement migrate_folio\n",
1054	mapping->a_ops);
1055	if (folio_test_dirty(folio: src))
1056	return -EBUSY;
1057
1058	/*
1059	* Filesystem may have private data at folio->private that we
1060	* can't migrate automatically.
1061	*/
1062	if (!filemap_release_folio(folio: src, GFP_KERNEL))
1063	return mode == MIGRATE_SYNC ? -EAGAIN : -EBUSY;
1064
1065	return migrate_folio(mapping, dst, src, mode);
1066	}
1067
1068	/*
1069	* Move a src folio to a newly allocated dst folio.
1070	*
1071	* The src and dst folios are locked and the src folios was unmapped from
1072	* the page tables.
1073	*
1074	* On success, the src folio was replaced by the dst folio.
1075	*
1076	* Return value:
1077	* < 0 - error code
1078	* 0 - success
1079	*/
1080	static int move_to_new_folio(struct folio dst, struct* folio *src,
1081	enum migrate_mode mode)
1082	{
1083	struct address_space *mapping = folio_mapping(folio: src);
1084	int rc = -EAGAIN;
1085
1086	VM_BUG_ON_FOLIO(!folio_test_locked(src), src);
1087	VM_BUG_ON_FOLIO(!folio_test_locked(dst), dst);
1088
1089	if (!mapping)
1090	rc = migrate_folio(mapping, dst, src, mode);
1091	else if (mapping_inaccessible(mapping))
1092	rc = -EOPNOTSUPP;
1093	else if (mapping->a_ops->migrate_folio)
1094	/*
1095	* Most folios have a mapping and most filesystems
1096	* provide a migrate_folio callback. Anonymous folios
1097	* are part of swap space which also has its own
1098	* migrate_folio callback. This is the most common path
1099	* for page migration.
1100	*/
1101	rc = mapping->a_ops->migrate_folio(mapping, dst, src,
1102	mode);
1103	else
1104	rc = fallback_migrate_folio(mapping, dst, src, mode);
1105
1106	if (!rc) {
1107	/*
1108	* For pagecache folios, src->mapping must be cleared before src
1109	* is freed. Anonymous folios must stay anonymous until freed.
1110	*/
1111	if (!folio_test_anon(folio: src))
1112	src->mapping = NULL;
1113
1114	if (likely(!folio_is_zone_device(dst)))
1115	flush_dcache_folio(folio: dst);
1116	}
1117	return rc;
1118	}
1119
1120	/*
1121	* To record some information during migration, we use unused private
1122	* field of struct folio of the newly allocated destination folio.
1123	* This is safe because nobody is using it except us.
1124	*/
1125	enum {
1126	PAGE_WAS_MAPPED = BIT(`0`),
1127	PAGE_WAS_MLOCKED = BIT(`1`),
1128	PAGE_OLD_STATES = PAGE_WAS_MAPPED \| PAGE_WAS_MLOCKED,
1129	};
1130
1131	static void __migrate_folio_record(struct folio *dst,
1132	int old_page_state,
1133	struct anon_vma *anon_vma)
1134	{
1135	dst->private = (void *)anon_vma + old_page_state;
1136	}
1137
1138	static void __migrate_folio_extract(struct folio *dst,
1139	int *old_page_state,
1140	struct anon_vma **anon_vmap)
1141	{
1142	unsigned long private = (unsigned long)dst->private;
1143
1144	anon_vmap = (struct* anon_vma *)(private & ~PAGE_OLD_STATES);
1145	*old_page_state = private & PAGE_OLD_STATES;
1146	dst->private = NULL;
1147	}
1148
1149	/ Restore the source folio to the original state upon failure /
1150	static void migrate_folio_undo_src(struct folio *src,
1151	int page_was_mapped,
1152	struct anon_vma *anon_vma,
1153	bool locked,
1154	struct list_head *ret)
1155	{
1156	if (page_was_mapped)
1157	remove_migration_ptes(src, dst: src, flags: `0`);
1158	/ Drop an anon_vma reference if we took one /
1159	if (anon_vma)
1160	put_anon_vma(anon_vma);
1161	if (locked)
1162	folio_unlock(folio: src);
1163	if (ret)
1164	list_move_tail(list: &src->lru, head: ret);
1165	}
1166
1167	/ Restore the destination folio to the original state upon failure /
1168	static void migrate_folio_undo_dst(struct folio *dst, bool locked,
1169	free_folio_t put_new_folio, unsigned long private)
1170	{
1171	if (locked)
1172	folio_unlock(folio: dst);
1173	if (put_new_folio)
1174	put_new_folio(dst, private);
1175	else
1176	folio_put(folio: dst);
1177	}
1178
1179	/ Cleanup src folio upon migration success /
1180	static void migrate_folio_done(struct folio *src,
1181	enum migrate_reason reason)
1182	{
1183	if (likely(!page_has_movable_ops(&src->page)) && reason != MR_DEMOTION)
1184	mod_node_page_state(folio_pgdat(folio: src), NR_ISOLATED_ANON +
1185	folio_is_file_lru(folio: src), -folio_nr_pages(folio: src));
1186
1187	if (reason != MR_MEMORY_FAILURE)
1188	/ We release the page in page_handle_poison. /
1189	folio_put(folio: src);
1190	}
1191
1192	/ Obtain the lock on page, remove all ptes. /
1193	static int migrate_folio_unmap(new_folio_t get_new_folio,
1194	free_folio_t put_new_folio, unsigned long private,
1195	struct folio src, struct* folio dstp, enum** migrate_mode mode,
1196	struct list_head *ret)
1197	{
1198	struct folio *dst;
1199	int rc = -EAGAIN;
1200	int old_page_state = `0`;
1201	struct anon_vma *anon_vma = NULL;
1202	bool locked = false;
1203	bool dst_locked = false;
1204
1205	dst = get_new_folio(src, private);
1206	if (!dst)
1207	return -ENOMEM;
1208	*dstp = dst;
1209
1210	dst->private = NULL;
1211
1212	if (!folio_trylock(folio: src)) {
1213	if (mode == MIGRATE_ASYNC)
1214	goto out;
1215
1216	/*
1217	* It's not safe for direct compaction to call lock_page.
1218	* For example, during page readahead pages are added locked
1219	* to the LRU. Later, when the IO completes the pages are
1220	* marked uptodate and unlocked. However, the queueing
1221	* could be merging multiple pages for one bio (e.g.
1222	* mpage_readahead). If an allocation happens for the
1223	* second or third page, the process can end up locking
1224	* the same page twice and deadlocking. Rather than
1225	* trying to be clever about what pages can be locked,
1226	* avoid the use of lock_page for direct compaction
1227	* altogether.
1228	*/
1229	if (current->flags & PF_MEMALLOC)
1230	goto out;
1231
1232	/*
1233	* In "light" mode, we can wait for transient locks (eg
1234	* inserting a page into the page table), but it's not
1235	* worth waiting for I/O.
1236	*/
1237	if (mode == MIGRATE_SYNC_LIGHT && !folio_test_uptodate(folio: src))
1238	goto out;
1239
1240	folio_lock(folio: src);
1241	}
1242	locked = true;
1243	if (folio_test_mlocked(folio: src))
1244	old_page_state \|= PAGE_WAS_MLOCKED;
1245
1246	if (folio_test_writeback(folio: src)) {
1247	/*
1248	* Only in the case of a full synchronous migration is it
1249	* necessary to wait for PageWriteback. In the async case,
1250	* the retry loop is too short and in the sync-light case,
1251	* the overhead of stalling is too much
1252	*/
1253	switch (mode) {
1254	case MIGRATE_SYNC:
1255	break;
1256	default:
1257	rc = -EBUSY;
1258	goto out;
1259	}
1260	folio_wait_writeback(folio: src);
1261	}
1262
1263	/*
1264	* By try_to_migrate(), src->mapcount goes down to 0 here. In this case,
1265	* we cannot notice that anon_vma is freed while we migrate a page.
1266	* This get_anon_vma() delays freeing anon_vma pointer until the end
1267	* of migration. File cache pages are no problem because of page_lock()
1268	* File Caches may use write_page() or lock_page() in migration, then,
1269	* just care Anon page here.
1270	*
1271	* Only folio_get_anon_vma() understands the subtleties of
1272	* getting a hold on an anon_vma from outside one of its mms.
1273	* But if we cannot get anon_vma, then we won't need it anyway,
1274	* because that implies that the anon page is no longer mapped
1275	* (and cannot be remapped so long as we hold the page lock).
1276	*/
1277	if (folio_test_anon(folio: src) && !folio_test_ksm(folio: src))
1278	anon_vma = folio_get_anon_vma(folio: src);
1279
1280	/*
1281	* Block others from accessing the new page when we get around to
1282	* establishing additional references. We are usually the only one
1283	* holding a reference to dst at this point. We used to have a BUG
1284	* here if folio_trylock(dst) fails, but would like to allow for
1285	* cases where there might be a race with the previous use of dst.
1286	* This is much like races on refcount of oldpage: just don't BUG().
1287	*/
1288	if (unlikely(!folio_trylock(dst)))
1289	goto out;
1290	dst_locked = true;
1291
1292	if (unlikely(page_has_movable_ops(&src->page))) {
1293	__migrate_folio_record(dst, old_page_state, anon_vma);
1294	return `0`;
1295	}
1296
1297	/*
1298	* Corner case handling:
1299	* 1. When a new swap-cache page is read into, it is added to the LRU
1300	* and treated as swapcache but it has no rmap yet.
1301	* Calling try_to_unmap() against a src->mapping==NULL page will
1302	* trigger a BUG. So handle it here.
1303	* 2. An orphaned page (see truncate_cleanup_page) might have
1304	* fs-private metadata. The page can be picked up due to memory
1305	* offlining. Everywhere else except page reclaim, the page is
1306	* invisible to the vm, so the page can not be migrated. So try to
1307	* free the metadata, so the page can be freed.
1308	*/
1309	if (!src->mapping) {
1310	if (folio_test_private(folio: src)) {
1311	try_to_free_buffers(folio: src);
1312	goto out;
1313	}
1314	} else if (folio_mapped(folio: src)) {
1315	/ Establish migration ptes /
1316	VM_BUG_ON_FOLIO(folio_test_anon(src) &&
1317	!folio_test_ksm(src) && !anon_vma, src);
1318	try_to_migrate(folio: src, flags: mode == MIGRATE_ASYNC ? TTU_BATCH_FLUSH : `0`);
1319	old_page_state \|= PAGE_WAS_MAPPED;
1320	}
1321
1322	if (!folio_mapped(folio: src)) {
1323	__migrate_folio_record(dst, old_page_state, anon_vma);
1324	return `0`;
1325	}
1326
1327	out:
1328	/*
1329	* A folio that has not been unmapped will be restored to
1330	* right list unless we want to retry.
1331	*/
1332	if (rc == -EAGAIN)
1333	ret = NULL;
1334
1335	migrate_folio_undo_src(src, page_was_mapped: old_page_state & PAGE_WAS_MAPPED,
1336	anon_vma, locked, ret);
1337	migrate_folio_undo_dst(dst, locked: dst_locked, put_new_folio, private);
1338
1339	return rc;
1340	}
1341
1342	/ Migrate the folio to the newly allocated folio in dst. /
1343	static int migrate_folio_move(free_folio_t put_new_folio, unsigned long private,
1344	struct folio src, struct* folio *dst,
1345	enum migrate_mode mode, enum migrate_reason reason,
1346	struct list_head *ret)
1347	{
1348	int rc;
1349	int old_page_state = `0`;
1350	struct anon_vma *anon_vma = NULL;
1351	struct list_head *prev;
1352
1353	__migrate_folio_extract(dst, old_page_state: &old_page_state, anon_vmap: &anon_vma);
1354	prev = dst->lru.prev;
1355	list_del(entry: &dst->lru);
1356
1357	if (unlikely(page_has_movable_ops(&src->page))) {
1358	rc = migrate_movable_ops_page(dst: &dst->page, src: &src->page, mode);
1359	if (rc)
1360	goto out;
1361	goto out_unlock_both;
1362	}
1363
1364	rc = move_to_new_folio(dst, src, mode);
1365	if (rc)
1366	goto out;
1367
1368	/*
1369	* When successful, push dst to LRU immediately: so that if it
1370	* turns out to be an mlocked page, remove_migration_ptes() will
1371	* automatically build up the correct dst->mlock_count for it.
1372	*
1373	* We would like to do something similar for the old page, when
1374	* unsuccessful, and other cases when a page has been temporarily
1375	* isolated from the unevictable LRU: but this case is the easiest.
1376	*/
1377	folio_add_lru(dst);
1378	if (old_page_state & PAGE_WAS_MLOCKED)
1379	lru_add_drain();
1380
1381	if (old_page_state & PAGE_WAS_MAPPED)
1382	remove_migration_ptes(src, dst, flags: `0`);
1383
1384	out_unlock_both:
1385	folio_unlock(folio: dst);
1386	folio_set_owner_migrate_reason(folio: dst, reason);
1387	/*
1388	* If migration is successful, decrease refcount of dst,
1389	* which will not free the page because new page owner increased
1390	* refcounter.
1391	*/
1392	folio_put(folio: dst);
1393
1394	/*
1395	* A folio that has been migrated has all references removed
1396	* and will be freed.
1397	*/
1398	list_del(entry: &src->lru);
1399	/ Drop an anon_vma reference if we took one /
1400	if (anon_vma)
1401	put_anon_vma(anon_vma);
1402	folio_unlock(folio: src);
1403	migrate_folio_done(src, reason);
1404
1405	return rc;
1406	out:
1407	/*
1408	* A folio that has not been migrated will be restored to
1409	* right list unless we want to retry.
1410	*/
1411	if (rc == -EAGAIN) {
1412	list_add(new: &dst->lru, head: prev);
1413	__migrate_folio_record(dst, old_page_state, anon_vma);
1414	return rc;
1415	}
1416
1417	migrate_folio_undo_src(src, page_was_mapped: old_page_state & PAGE_WAS_MAPPED,
1418	anon_vma, locked: true, ret);
1419	migrate_folio_undo_dst(dst, locked: true, put_new_folio, private);
1420
1421	return rc;
1422	}
1423
1424	/*
1425	* Counterpart of unmap_and_move_page() for hugepage migration.
1426	*
1427	* This function doesn't wait the completion of hugepage I/O
1428	* because there is no race between I/O and migration for hugepage.
1429	* Note that currently hugepage I/O occurs only in direct I/O
1430	* where no lock is held and PG_writeback is irrelevant,
1431	* and writeback status of all subpages are counted in the reference
1432	* count of the head page (i.e. if all subpages of a 2MB hugepage are
1433	* under direct I/O, the reference of the head page is 512 and a bit more.)
1434	* This means that when we try to migrate hugepage whose subpages are
1435	* doing direct I/O, some references remain after try_to_unmap() and
1436	* hugepage migration fails without data corruption.
1437	*
1438	* There is also no race when direct I/O is issued on the page under migration,
1439	* because then pte is replaced with migration swap entry and direct I/O code
1440	* will wait in the page fault for migration to complete.
1441	*/
1442	static int unmap_and_move_huge_page(new_folio_t get_new_folio,
1443	free_folio_t put_new_folio, unsigned long private,
1444	struct folio src, int* force, enum migrate_mode mode,
1445	int reason, struct list_head *ret)
1446	{
1447	struct folio *dst;
1448	int rc = -EAGAIN;
1449	int page_was_mapped = `0`;
1450	struct anon_vma *anon_vma = NULL;
1451	struct address_space *mapping = NULL;
1452
1453	if (folio_ref_count(folio: src) == `1`) {
1454	/ page was freed from under us. So we are done. /
1455	folio_putback_hugetlb(folio: src);
1456	return `0`;
1457	}
1458
1459	dst = get_new_folio(src, private);
1460	if (!dst)
1461	return -ENOMEM;
1462
1463	if (!folio_trylock(folio: src)) {
1464	if (!force)
1465	goto out;
1466	switch (mode) {
1467	case MIGRATE_SYNC:
1468	break;
1469	default:
1470	goto out;
1471	}
1472	folio_lock(folio: src);
1473	}
1474
1475	/*
1476	* Check for pages which are in the process of being freed. Without
1477	* folio_mapping() set, hugetlbfs specific move page routine will not
1478	* be called and we could leak usage counts for subpools.
1479	*/
1480	if (hugetlb_folio_subpool(folio: src) && !folio_mapping(folio: src)) {
1481	rc = -EBUSY;
1482	goto out_unlock;
1483	}
1484
1485	if (folio_test_anon(folio: src))
1486	anon_vma = folio_get_anon_vma(folio: src);
1487
1488	if (unlikely(!folio_trylock(dst)))
1489	goto put_anon;
1490
1491	if (folio_mapped(folio: src)) {
1492	enum ttu_flags ttu = `0`;
1493
1494	if (!folio_test_anon(folio: src)) {
1495	/*
1496	* In shared mappings, try_to_unmap could potentially
1497	* call huge_pmd_unshare. Because of this, take
1498	* semaphore in write mode here and set TTU_RMAP_LOCKED
1499	* to let lower levels know we have taken the lock.
1500	*/
1501	mapping = hugetlb_folio_mapping_lock_write(folio: src);
1502	if (unlikely(!mapping))
1503	goto unlock_put_anon;
1504
1505	ttu = TTU_RMAP_LOCKED;
1506	}
1507
1508	try_to_migrate(folio: src, flags: ttu);
1509	page_was_mapped = `1`;
1510
1511	if (ttu & TTU_RMAP_LOCKED)
1512	i_mmap_unlock_write(mapping);
1513	}
1514
1515	if (!folio_mapped(folio: src))
1516	rc = move_to_new_folio(dst, src, mode);
1517
1518	if (page_was_mapped)
1519	remove_migration_ptes(src, dst: !rc ? dst : src, flags: `0`);
1520
1521	unlock_put_anon:
1522	folio_unlock(folio: dst);
1523
1524	put_anon:
1525	if (anon_vma)
1526	put_anon_vma(anon_vma);
1527
1528	if (!rc) {
1529	move_hugetlb_state(old_folio: src, new_folio: dst, reason);
1530	put_new_folio = NULL;
1531	}
1532
1533	out_unlock:
1534	folio_unlock(folio: src);
1535	out:
1536	if (!rc)
1537	folio_putback_hugetlb(folio: src);
1538	else if (rc != -EAGAIN)
1539	list_move_tail(list: &src->lru, head: ret);
1540
1541	/*
1542	* If migration was not successful and there's a freeing callback,
1543	* return the folio to that special allocator. Otherwise, simply drop
1544	* our additional reference.
1545	*/
1546	if (put_new_folio)
1547	put_new_folio(dst, private);
1548	else
1549	folio_put(folio: dst);
1550
1551	return rc;
1552	}
1553
1554	static inline int try_split_folio(struct folio folio, struct* list_head *split_folios,
1555	enum migrate_mode mode)
1556	{
1557	int rc;
1558
1559	if (mode == MIGRATE_ASYNC) {
1560	if (!folio_trylock(folio))
1561	return -EAGAIN;
1562	} else {
1563	folio_lock(folio);
1564	}
1565	rc = split_folio_to_list(folio, list: split_folios);
1566	folio_unlock(folio);
1567	if (!rc)
1568	list_move_tail(list: &folio->lru, head: split_folios);
1569
1570	return rc;
1571	}
1572
1573	#ifdef CONFIG_TRANSPARENT_HUGEPAGE
1574	#define NR_MAX_BATCHED_MIGRATION HPAGE_PMD_NR
1575	#else
1576	#define NR_MAX_BATCHED_MIGRATION 512
1577	#endif
1578	#define NR_MAX_MIGRATE_PAGES_RETRY 10
1579	#define NR_MAX_MIGRATE_ASYNC_RETRY 3
1580	#define NR_MAX_MIGRATE_SYNC_RETRY \
1581	(NR_MAX_MIGRATE_PAGES_RETRY - NR_MAX_MIGRATE_ASYNC_RETRY)
1582
1583	struct migrate_pages_stats {
1584	int nr_succeeded; / Normal and large folios migrated successfully, in*
1585	units of base pages /*
1586	int nr_failed_pages; / Normal and large folios failed to be migrated, in*
1587	units of base pages. Untried folios aren't counted /*
1588	int nr_thp_succeeded; / THP migrated successfully /
1589	int nr_thp_failed; / THP failed to be migrated /
1590	int nr_thp_split; / THP split before migrating /
1591	int nr_split; / Large folio (include THP) split before migrating /
1592	};
1593
1594	/*
1595	* Returns the number of hugetlb folios that were not migrated, or an error code
1596	* after NR_MAX_MIGRATE_PAGES_RETRY attempts or if no hugetlb folios are movable
1597	* any more because the list has become empty or no retryable hugetlb folios
1598	* exist any more. It is caller's responsibility to call putback_movable_pages()
1599	* only if ret != 0.
1600	*/
1601	static int migrate_hugetlbs(struct list_head *from, new_folio_t get_new_folio,
1602	free_folio_t put_new_folio, unsigned long private,
1603	enum migrate_mode mode, int reason,
1604	struct migrate_pages_stats *stats,
1605	struct list_head *ret_folios)
1606	{
1607	int retry = `1`;
1608	int nr_failed = `0`;
1609	int nr_retry_pages = `0`;
1610	int pass = `0`;
1611	struct folio folio, folio2;
1612	int rc, nr_pages;
1613
1614	for (pass = `0`; pass < NR_MAX_MIGRATE_PAGES_RETRY && retry; pass++) {
1615	retry = `0`;
1616	nr_retry_pages = `0`;
1617
1618	list_for_each_entry_safe(folio, folio2, from, lru) {
1619	if (!folio_test_hugetlb(folio))
1620	continue;
1621
1622	nr_pages = folio_nr_pages(folio);
1623
1624	cond_resched();
1625
1626	/*
1627	* Migratability of hugepages depends on architectures and
1628	* their size. This check is necessary because some callers
1629	* of hugepage migration like soft offline and memory
1630	* hotremove don't walk through page tables or check whether
1631	* the hugepage is pmd-based or not before kicking migration.
1632	*/
1633	if (!hugepage_migration_supported(h: folio_hstate(folio))) {
1634	nr_failed++;
1635	stats->nr_failed_pages += nr_pages;
1636	list_move_tail(list: &folio->lru, head: ret_folios);
1637	continue;
1638	}
1639
1640	rc = unmap_and_move_huge_page(get_new_folio,
1641	put_new_folio, private,
1642	src: folio, force: pass > `2`, mode,
1643	reason, ret: ret_folios);
1644	/*
1645	* The rules are:
1646	* 0: hugetlb folio will be put back
1647	* -EAGAIN: stay on the from list
1648	* -ENOMEM: stay on the from list
1649	* Other errno: put on ret_folios list
1650	*/
1651	switch(rc) {
1652	case -ENOMEM:
1653	/*
1654	* When memory is low, don't bother to try to migrate
1655	* other folios, just exit.
1656	*/
1657	stats->nr_failed_pages += nr_pages + nr_retry_pages;
1658	return -ENOMEM;
1659	case -EAGAIN:
1660	retry++;
1661	nr_retry_pages += nr_pages;
1662	break;
1663	case `0`:
1664	stats->nr_succeeded += nr_pages;
1665	break;
1666	default:
1667	/*
1668	* Permanent failure (-EBUSY, etc.):
1669	* unlike -EAGAIN case, the failed folio is
1670	* removed from migration folio list and not
1671	* retried in the next outer loop.
1672	*/
1673	nr_failed++;
1674	stats->nr_failed_pages += nr_pages;
1675	break;
1676	}
1677	}
1678	}
1679	/*
1680	* nr_failed is number of hugetlb folios failed to be migrated. After
1681	* NR_MAX_MIGRATE_PAGES_RETRY attempts, give up and count retried hugetlb
1682	* folios as failed.
1683	*/
1684	nr_failed += retry;
1685	stats->nr_failed_pages += nr_retry_pages;
1686
1687	return nr_failed;
1688	}
1689
1690	static void migrate_folios_move(struct list_head *src_folios,
1691	struct list_head *dst_folios,
1692	free_folio_t put_new_folio, unsigned long private,
1693	enum migrate_mode mode, int reason,
1694	struct list_head *ret_folios,
1695	struct migrate_pages_stats *stats,
1696	int retry, int* thp_retry, int* *nr_failed,
1697	int *nr_retry_pages)
1698	{
1699	struct folio folio, folio2, dst, dst2;
1700	bool is_thp;
1701	int nr_pages;
1702	int rc;
1703
1704	dst = list_first_entry(dst_folios, struct folio, lru);
1705	dst2 = list_next_entry(dst, lru);
1706	list_for_each_entry_safe(folio, folio2, src_folios, lru) {
1707	is_thp = folio_test_large(folio) && folio_test_pmd_mappable(folio);
1708	nr_pages = folio_nr_pages(folio);
1709
1710	cond_resched();
1711
1712	rc = migrate_folio_move(put_new_folio, private,
1713	src: folio, dst, mode,
1714	reason, ret: ret_folios);
1715	/*
1716	* The rules are:
1717	* 0: folio will be freed
1718	* -EAGAIN: stay on the unmap_folios list
1719	* Other errno: put on ret_folios list
1720	*/
1721	switch (rc) {
1722	case -EAGAIN:
1723	*retry += `1`;
1724	*thp_retry += is_thp;
1725	*nr_retry_pages += nr_pages;
1726	break;
1727	case `0`:
1728	stats->nr_succeeded += nr_pages;
1729	stats->nr_thp_succeeded += is_thp;
1730	break;
1731	default:
1732	*nr_failed += `1`;
1733	stats->nr_thp_failed += is_thp;
1734	stats->nr_failed_pages += nr_pages;
1735	break;
1736	}
1737	dst = dst2;
1738	dst2 = list_next_entry(dst, lru);
1739	}
1740	}
1741
1742	static void migrate_folios_undo(struct list_head *src_folios,
1743	struct list_head *dst_folios,
1744	free_folio_t put_new_folio, unsigned long private,
1745	struct list_head *ret_folios)
1746	{
1747	struct folio folio, folio2, dst, dst2;
1748
1749	dst = list_first_entry(dst_folios, struct folio, lru);
1750	dst2 = list_next_entry(dst, lru);
1751	list_for_each_entry_safe(folio, folio2, src_folios, lru) {
1752	int old_page_state = `0`;
1753	struct anon_vma *anon_vma = NULL;
1754
1755	__migrate_folio_extract(dst, old_page_state: &old_page_state, anon_vmap: &anon_vma);
1756	migrate_folio_undo_src(src: folio, page_was_mapped: old_page_state & PAGE_WAS_MAPPED,
1757	anon_vma, locked: true, ret: ret_folios);
1758	list_del(entry: &dst->lru);
1759	migrate_folio_undo_dst(dst, locked: true, put_new_folio, private);
1760	dst = dst2;
1761	dst2 = list_next_entry(dst, lru);
1762	}
1763	}
1764
1765	/*
1766	* migrate_pages_batch() first unmaps folios in the from list as many as
1767	* possible, then move the unmapped folios.
1768	*
1769	* We only batch migration if mode == MIGRATE_ASYNC to avoid to wait a
1770	* lock or bit when we have locked more than one folio. Which may cause
1771	* deadlock (e.g., for loop device). So, if mode != MIGRATE_ASYNC, the
1772	* length of the from list must be <= 1.
1773	*/
1774	static int migrate_pages_batch(struct list_head *from,
1775	new_folio_t get_new_folio, free_folio_t put_new_folio,
1776	unsigned long private, enum migrate_mode mode, int reason,
1777	struct list_head ret_folios, struct* list_head *split_folios,
1778	struct migrate_pages_stats stats, int* nr_pass)
1779	{
1780	int retry = `1`;
1781	int thp_retry = `1`;
1782	int nr_failed = `0`;
1783	int nr_retry_pages = `0`;
1784	int pass = `0`;
1785	bool is_thp = false;
1786	bool is_large = false;
1787	struct folio folio, folio2, *dst = NULL;
1788	int rc, rc_saved = `0`, nr_pages;
1789	LIST_HEAD(unmap_folios);
1790	LIST_HEAD(dst_folios);
1791	bool nosplit = (reason == MR_NUMA_MISPLACED);
1792
1793	VM_WARN_ON_ONCE(mode != MIGRATE_ASYNC &&
1794	!list_empty(from) && !list_is_singular(from));
1795
1796	for (pass = `0`; pass < nr_pass && retry; pass++) {
1797	retry = `0`;
1798	thp_retry = `0`;
1799	nr_retry_pages = `0`;
1800
1801	list_for_each_entry_safe(folio, folio2, from, lru) {
1802	is_large = folio_test_large(folio);
1803	is_thp = folio_test_pmd_mappable(folio);
1804	nr_pages = folio_nr_pages(folio);
1805
1806	cond_resched();
1807
1808	/*
1809	* The rare folio on the deferred split list should
1810	* be split now. It should not count as a failure:
1811	* but increment nr_failed because, without doing so,
1812	* migrate_pages() may report success with (split but
1813	* unmigrated) pages still on its fromlist; whereas it
1814	* always reports success when its fromlist is empty.
1815	* stats->nr_thp_failed should be increased too,
1816	* otherwise stats inconsistency will happen when
1817	* migrate_pages_batch is called via migrate_pages()
1818	* with MIGRATE_SYNC and MIGRATE_ASYNC.
1819	*
1820	* Only check it without removing it from the list.
1821	* Since the folio can be on deferred_split_scan()
1822	* local list and removing it can cause the local list
1823	* corruption. Folio split process below can handle it
1824	* with the help of folio_ref_freeze().
1825	*
1826	* nr_pages > 2 is needed to avoid checking order-1
1827	* page cache folios. They exist, in contrast to
1828	* non-existent order-1 anonymous folios, and do not
1829	* use _deferred_list.
1830	*/
1831	if (nr_pages > `2` &&
1832	!list_empty(head: &folio->_deferred_list) &&
1833	folio_test_partially_mapped(folio)) {
1834	if (!try_split_folio(folio, split_folios, mode)) {
1835	nr_failed++;
1836	stats->nr_thp_failed += is_thp;
1837	stats->nr_thp_split += is_thp;
1838	stats->nr_split++;
1839	continue;
1840	}
1841	}
1842
1843	/*
1844	* Large folio migration might be unsupported or
1845	* the allocation might be failed so we should retry
1846	* on the same folio with the large folio split
1847	* to normal folios.
1848	*
1849	* Split folios are put in split_folios, and
1850	* we will migrate them after the rest of the
1851	* list is processed.
1852	*/
1853	if (!thp_migration_supported() && is_thp) {
1854	nr_failed++;
1855	stats->nr_thp_failed++;
1856	if (!try_split_folio(folio, split_folios, mode)) {
1857	stats->nr_thp_split++;
1858	stats->nr_split++;
1859	continue;
1860	}
1861	stats->nr_failed_pages += nr_pages;
1862	list_move_tail(list: &folio->lru, head: ret_folios);
1863	continue;
1864	}
1865
1866	/*
1867	* If we are holding the last folio reference, the folio
1868	* was freed from under us, so just drop our reference.
1869	*/
1870	if (likely(!page_has_movable_ops(&folio->page)) &&
1871	folio_ref_count(folio) == `1`) {
1872	folio_clear_active(folio);
1873	folio_clear_unevictable(folio);
1874	list_del(entry: &folio->lru);
1875	migrate_folio_done(src: folio, reason);
1876	stats->nr_succeeded += nr_pages;
1877	stats->nr_thp_succeeded += is_thp;
1878	continue;
1879	}
1880
1881	rc = migrate_folio_unmap(get_new_folio, put_new_folio,
1882	private, src: folio, dstp: &dst, mode, ret: ret_folios);
1883	/*
1884	* The rules are:
1885	* 0: folio will be put on unmap_folios list,
1886	* dst folio put on dst_folios list
1887	* -EAGAIN: stay on the from list
1888	* -ENOMEM: stay on the from list
1889	* Other errno: put on ret_folios list
1890	*/
1891	switch(rc) {
1892	case -ENOMEM:
1893	/*
1894	* When memory is low, don't bother to try to migrate
1895	* other folios, move unmapped folios, then exit.
1896	*/
1897	nr_failed++;
1898	stats->nr_thp_failed += is_thp;
1899	/ Large folio NUMA faulting doesn't split to retry. /
1900	if (is_large && !nosplit) {
1901	int ret = try_split_folio(folio, split_folios, mode);
1902
1903	if (!ret) {
1904	stats->nr_thp_split += is_thp;
1905	stats->nr_split++;
1906	break;
1907	} else if (reason == MR_LONGTERM_PIN &&
1908	ret == -EAGAIN) {
1909	/*
1910	* Try again to split large folio to
1911	* mitigate the failure of longterm pinning.
1912	*/
1913	retry++;
1914	thp_retry += is_thp;
1915	nr_retry_pages += nr_pages;
1916	/ Undo duplicated failure counting. /
1917	nr_failed--;
1918	stats->nr_thp_failed -= is_thp;
1919	break;
1920	}
1921	}
1922
1923	stats->nr_failed_pages += nr_pages + nr_retry_pages;
1924	/ nr_failed isn't updated for not used /
1925	stats->nr_thp_failed += thp_retry;
1926	rc_saved = rc;
1927	if (list_empty(head: &unmap_folios))
1928	goto out;
1929	else
1930	goto move;
1931	case -EAGAIN:
1932	retry++;
1933	thp_retry += is_thp;
1934	nr_retry_pages += nr_pages;
1935	break;
1936	case `0`:
1937	list_move_tail(list: &folio->lru, head: &unmap_folios);
1938	list_add_tail(new: &dst->lru, head: &dst_folios);
1939	break;
1940	default:
1941	/*
1942	* Permanent failure (-EBUSY, etc.):
1943	* unlike -EAGAIN case, the failed folio is
1944	* removed from migration folio list and not
1945	* retried in the next outer loop.
1946	*/
1947	nr_failed++;
1948	stats->nr_thp_failed += is_thp;
1949	stats->nr_failed_pages += nr_pages;
1950	break;
1951	}
1952	}
1953	}
1954	nr_failed += retry;
1955	stats->nr_thp_failed += thp_retry;
1956	stats->nr_failed_pages += nr_retry_pages;
1957	move:
1958	/ Flush TLBs for all unmapped folios /
1959	try_to_unmap_flush();
1960
1961	retry = `1`;
1962	for (pass = `0`; pass < nr_pass && retry; pass++) {
1963	retry = `0`;
1964	thp_retry = `0`;
1965	nr_retry_pages = `0`;
1966
1967	/ Move the unmapped folios /
1968	migrate_folios_move(src_folios: &unmap_folios, dst_folios: &dst_folios,
1969	put_new_folio, private, mode, reason,
1970	ret_folios, stats, retry: &retry, thp_retry: &thp_retry,
1971	nr_failed: &nr_failed, nr_retry_pages: &nr_retry_pages);
1972	}
1973	nr_failed += retry;
1974	stats->nr_thp_failed += thp_retry;
1975	stats->nr_failed_pages += nr_retry_pages;
1976
1977	rc = rc_saved ? : nr_failed;
1978	out:
1979	/ Cleanup remaining folios /
1980	migrate_folios_undo(src_folios: &unmap_folios, dst_folios: &dst_folios,
1981	put_new_folio, private, ret_folios);
1982
1983	return rc;
1984	}
1985
1986	static int migrate_pages_sync(struct list_head *from, new_folio_t get_new_folio,
1987	free_folio_t put_new_folio, unsigned long private,
1988	enum migrate_mode mode, int reason,
1989	struct list_head ret_folios, struct* list_head *split_folios,
1990	struct migrate_pages_stats *stats)
1991	{
1992	int rc, nr_failed = `0`;
1993	LIST_HEAD(folios);
1994	struct migrate_pages_stats astats;
1995
1996	memset(s: &astats, c: `0`, n: sizeof(astats));
1997	/ Try to migrate in batch with MIGRATE_ASYNC mode firstly /
1998	rc = migrate_pages_batch(from, get_new_folio, put_new_folio, private, mode: MIGRATE_ASYNC,
1999	reason, ret_folios: &folios, split_folios, stats: &astats,
2000	NR_MAX_MIGRATE_ASYNC_RETRY);
2001	stats->nr_succeeded += astats.nr_succeeded;
2002	stats->nr_thp_succeeded += astats.nr_thp_succeeded;
2003	stats->nr_thp_split += astats.nr_thp_split;
2004	stats->nr_split += astats.nr_split;
2005	if (rc < `0`) {
2006	stats->nr_failed_pages += astats.nr_failed_pages;
2007	stats->nr_thp_failed += astats.nr_thp_failed;
2008	list_splice_tail(list: &folios, head: ret_folios);
2009	return rc;
2010	}
2011	stats->nr_thp_failed += astats.nr_thp_split;
2012	/*
2013	* Do not count rc, as pages will be retried below.
2014	* Count nr_split only, since it includes nr_thp_split.
2015	*/
2016	nr_failed += astats.nr_split;
2017	/*
2018	* Fall back to migrate all failed folios one by one synchronously. All
2019	* failed folios except split THPs will be retried, so their failure
2020	* isn't counted
2021	*/
2022	list_splice_tail_init(list: &folios, head: from);
2023	while (!list_empty(head: from)) {
2024	list_move(list: from->next, head: &folios);
2025	rc = migrate_pages_batch(from: &folios, get_new_folio, put_new_folio,
2026	private, mode, reason, ret_folios,
2027	split_folios, stats, NR_MAX_MIGRATE_SYNC_RETRY);
2028	list_splice_tail_init(list: &folios, head: ret_folios);
2029	if (rc < `0`)
2030	return rc;
2031	nr_failed += rc;
2032	}
2033
2034	return nr_failed;
2035	}
2036
2037	/*
2038	* migrate_pages - migrate the folios specified in a list, to the free folios
2039	* supplied as the target for the page migration
2040	*
2041	* @from: The list of folios to be migrated.
2042	* @get_new_folio: The function used to allocate free folios to be used
2043	* as the target of the folio migration.
2044	* @put_new_folio: The function used to free target folios if migration
2045	* fails, or NULL if no special handling is necessary.
2046	* @private: Private data to be passed on to get_new_folio()
2047	* @mode: The migration mode that specifies the constraints for
2048	* folio migration, if any.
2049	* @reason: The reason for folio migration.
2050	* @ret_succeeded: Set to the number of folios migrated successfully if
2051	* the caller passes a non-NULL pointer.
2052	*
2053	* The function returns after NR_MAX_MIGRATE_PAGES_RETRY attempts or if no folios
2054	* are movable any more because the list has become empty or no retryable folios
2055	* exist any more. It is caller's responsibility to call putback_movable_pages()
2056	* only if ret != 0.
2057	*
2058	* Returns the number of {normal folio, large folio, hugetlb} that were not
2059	* migrated, or an error code. The number of large folio splits will be
2060	* considered as the number of non-migrated large folio, no matter how many
2061	* split folios of the large folio are migrated successfully.
2062	*/
2063	int migrate_pages(struct list_head *from, new_folio_t get_new_folio,
2064	free_folio_t put_new_folio, unsigned long private,
2065	enum migrate_mode mode, int reason, unsigned int *ret_succeeded)
2066	{
2067	int rc, rc_gather;
2068	int nr_pages;
2069	struct folio folio, folio2;
2070	LIST_HEAD(folios);
2071	LIST_HEAD(ret_folios);
2072	LIST_HEAD(split_folios);
2073	struct migrate_pages_stats stats;
2074
2075	trace_mm_migrate_pages_start(mode, reason);
2076
2077	memset(s: &stats, c: `0`, n: sizeof(stats));
2078
2079	rc_gather = migrate_hugetlbs(from, get_new_folio, put_new_folio, private,
2080	mode, reason, stats: &stats, ret_folios: &ret_folios);
2081	if (rc_gather < `0`)
2082	goto out;
2083
2084	again:
2085	nr_pages = `0`;
2086	list_for_each_entry_safe(folio, folio2, from, lru) {
2087	/ Retried hugetlb folios will be kept in list /
2088	if (folio_test_hugetlb(folio)) {
2089	list_move_tail(list: &folio->lru, head: &ret_folios);
2090	continue;
2091	}
2092
2093	nr_pages += folio_nr_pages(folio);
2094	if (nr_pages >= NR_MAX_BATCHED_MIGRATION)
2095	break;
2096	}
2097	if (nr_pages >= NR_MAX_BATCHED_MIGRATION)
2098	list_cut_before(list: &folios, head: from, entry: &folio2->lru);
2099	else
2100	list_splice_init(list: from, head: &folios);
2101	if (mode == MIGRATE_ASYNC)
2102	rc = migrate_pages_batch(from: &folios, get_new_folio, put_new_folio,
2103	private, mode, reason, ret_folios: &ret_folios,
2104	split_folios: &split_folios, stats: &stats,
2105	NR_MAX_MIGRATE_PAGES_RETRY);
2106	else
2107	rc = migrate_pages_sync(from: &folios, get_new_folio, put_new_folio,
2108	private, mode, reason, ret_folios: &ret_folios,
2109	split_folios: &split_folios, stats: &stats);
2110	list_splice_tail_init(list: &folios, head: &ret_folios);
2111	if (rc < `0`) {
2112	rc_gather = rc;
2113	list_splice_tail(list: &split_folios, head: &ret_folios);
2114	goto out;
2115	}
2116	if (!list_empty(head: &split_folios)) {
2117	/*
2118	* Failure isn't counted since all split folios of a large folio
2119	* is counted as 1 failure already. And, we only try to migrate
2120	* with minimal effort, force MIGRATE_ASYNC mode and retry once.
2121	*/
2122	migrate_pages_batch(from: &split_folios, get_new_folio,
2123	put_new_folio, private, mode: MIGRATE_ASYNC, reason,
2124	ret_folios: &ret_folios, NULL, stats: &stats, nr_pass: `1`);
2125	list_splice_tail_init(list: &split_folios, head: &ret_folios);
2126	}
2127	rc_gather += rc;
2128	if (!list_empty(head: from))
2129	goto again;
2130	out:
2131	/*
2132	* Put the permanent failure folio back to migration list, they
2133	* will be put back to the right list by the caller.
2134	*/
2135	list_splice(list: &ret_folios, head: from);
2136
2137	/*
2138	* Return 0 in case all split folios of fail-to-migrate large folios
2139	* are migrated successfully.
2140	*/
2141	if (list_empty(head: from))
2142	rc_gather = `0`;
2143
2144	count_vm_events(item: PGMIGRATE_SUCCESS, delta: stats.nr_succeeded);
2145	count_vm_events(item: PGMIGRATE_FAIL, delta: stats.nr_failed_pages);
2146	count_vm_events(item: THP_MIGRATION_SUCCESS, delta: stats.nr_thp_succeeded);
2147	count_vm_events(item: THP_MIGRATION_FAIL, delta: stats.nr_thp_failed);
2148	count_vm_events(item: THP_MIGRATION_SPLIT, delta: stats.nr_thp_split);
2149	trace_mm_migrate_pages(succeeded: stats.nr_succeeded, failed: stats.nr_failed_pages,
2150	thp_succeeded: stats.nr_thp_succeeded, thp_failed: stats.nr_thp_failed,
2151	thp_split: stats.nr_thp_split, large_folio_split: stats.nr_split, mode,
2152	reason);
2153
2154	if (ret_succeeded)
2155	*ret_succeeded = stats.nr_succeeded;
2156
2157	return rc_gather;
2158	}
2159
2160	struct folio alloc_migration_target(struct* folio src, unsigned* long private)
2161	{
2162	struct migration_target_control *mtc;
2163	gfp_t gfp_mask;
2164	unsigned int order = `0`;
2165	int nid;
2166	int zidx;
2167
2168	mtc = (struct migration_target_control *)private;
2169	gfp_mask = mtc->gfp_mask;
2170	nid = mtc->nid;
2171	if (nid == NUMA_NO_NODE)
2172	nid = folio_nid(folio: src);
2173
2174	if (folio_test_hugetlb(folio: src)) {
2175	struct hstate *h = folio_hstate(folio: src);
2176
2177	gfp_mask = htlb_modify_alloc_mask(h, gfp_mask);
2178	return alloc_hugetlb_folio_nodemask(h, preferred_nid: nid,
2179	nmask: mtc->nmask, gfp_mask,
2180	allow_alloc_fallback: htlb_allow_alloc_fallback(reason: mtc->reason));
2181	}
2182
2183	if (folio_test_large(folio: src)) {
2184	/*
2185	* clear __GFP_RECLAIM to make the migration callback
2186	* consistent with regular THP allocations.
2187	*/
2188	gfp_mask &= ~__GFP_RECLAIM;
2189	gfp_mask \|= GFP_TRANSHUGE;
2190	order = folio_order(folio: src);
2191	}
2192	zidx = zone_idx(folio_zone(src));
2193	if (is_highmem_idx(idx: zidx) \|\| zidx == ZONE_MOVABLE)
2194	gfp_mask \|= __GFP_HIGHMEM;
2195
2196	return __folio_alloc(gfp_mask, order, nid, mtc->nmask);
2197	}
2198
2199	#ifdef CONFIG_NUMA
2200
2201	static int store_status(int __user status, int* start, int value, int nr)
2202	{
2203	while (nr-- > `0`) {
2204	if (put_user(value, status + start))
2205	return -EFAULT;
2206	start++;
2207	}
2208
2209	return `0`;
2210	}
2211
2212	static int do_move_pages_to_node(struct list_head pagelist, int* node)
2213	{
2214	int err;
2215	struct migration_target_control mtc = {
2216	.nid = node,
2217	.gfp_mask = GFP_HIGHUSER_MOVABLE \| __GFP_THISNODE,
2218	.reason = MR_SYSCALL,
2219	};
2220
2221	err = migrate_pages(from: pagelist, get_new_folio: alloc_migration_target, NULL,
2222	private: (unsigned long)&mtc, mode: MIGRATE_SYNC, reason: MR_SYSCALL, NULL);
2223	if (err)
2224	putback_movable_pages(l: pagelist);
2225	return err;
2226	}
2227
2228	static int __add_folio_for_migration(struct folio folio, int* node,
2229	struct list_head *pagelist, bool migrate_all)
2230	{
2231	if (is_zero_folio(folio) \|\| is_huge_zero_folio(folio))
2232	return -EFAULT;
2233
2234	if (folio_is_zone_device(folio))
2235	return -ENOENT;
2236
2237	if (folio_nid(folio) == node)
2238	return `0`;
2239
2240	if (folio_maybe_mapped_shared(folio) && !migrate_all)
2241	return -EACCES;
2242
2243	if (folio_test_hugetlb(folio)) {
2244	if (folio_isolate_hugetlb(folio, list: pagelist))
2245	return `1`;
2246	} else if (folio_isolate_lru(folio)) {
2247	list_add_tail(new: &folio->lru, head: pagelist);
2248	node_stat_mod_folio(folio,
2249	item: NR_ISOLATED_ANON + folio_is_file_lru(folio),
2250	nr: folio_nr_pages(folio));
2251	return `1`;
2252	}
2253	return -EBUSY;
2254	}
2255
2256	/*
2257	* Resolves the given address to a struct folio, isolates it from the LRU and
2258	* puts it to the given pagelist.
2259	* Returns:
2260	* errno - if the folio cannot be found/isolated
2261	* 0 - when it doesn't have to be migrated because it is already on the
2262	* target node
2263	* 1 - when it has been queued
2264	*/
2265	static int add_folio_for_migration(struct mm_struct mm, const* void __user *p,
2266	int node, struct list_head *pagelist, bool migrate_all)
2267	{
2268	struct vm_area_struct *vma;
2269	struct folio_walk fw;
2270	struct folio *folio;
2271	unsigned long addr;
2272	int err = -EFAULT;
2273
2274	mmap_read_lock(mm);
2275	addr = (unsigned long)untagged_addr_remote(mm, p);
2276
2277	vma = vma_lookup(mm, addr);
2278	if (vma && vma_migratable(vma)) {
2279	folio = folio_walk_start(fw: &fw, vma, addr, FW_ZEROPAGE);
2280	if (folio) {
2281	err = __add_folio_for_migration(folio, node, pagelist,
2282	migrate_all);
2283	folio_walk_end(&fw, vma);
2284	} else {
2285	err = -ENOENT;
2286	}
2287	}
2288	mmap_read_unlock(mm);
2289	return err;
2290	}
2291
2292	static int move_pages_and_store_status(int node,
2293	struct list_head pagelist, int* __user *status,
2294	int start, int i, unsigned long nr_pages)
2295	{
2296	int err;
2297
2298	if (list_empty(head: pagelist))
2299	return `0`;
2300
2301	err = do_move_pages_to_node(pagelist, node);
2302	if (err) {
2303	/*
2304	* Positive err means the number of failed
2305	* pages to migrate. Since we are going to
2306	* abort and return the number of non-migrated
2307	* pages, so need to include the rest of the
2308	* nr_pages that have not been attempted as
2309	* well.
2310	*/
2311	if (err > `0`)
2312	err += nr_pages - i;
2313	return err;
2314	}
2315	return store_status(status, start, value: node, nr: i - start);
2316	}
2317
2318	/*
2319	* Migrate an array of page address onto an array of nodes and fill
2320	* the corresponding array of status.
2321	*/
2322	static int do_pages_move(struct mm_struct *mm, nodemask_t task_nodes,
2323	unsigned long nr_pages,
2324	const void __user * __user *pages,
2325	const int __user *nodes,
2326	int __user status, int* flags)
2327	{
2328	compat_uptr_t __user compat_pages = (void* __user *)pages;
2329	int current_node = NUMA_NO_NODE;
2330	LIST_HEAD(pagelist);
2331	int start, i;
2332	int err = `0`, err1;
2333
2334	lru_cache_disable();
2335
2336	for (i = start = `0`; i < nr_pages; i++) {
2337	const void __user *p;
2338	int node;
2339
2340	err = -EFAULT;
2341	if (in_compat_syscall()) {
2342	compat_uptr_t cp;
2343
2344	if (get_user(cp, compat_pages + i))
2345	goto out_flush;
2346
2347	p = compat_ptr(uptr: cp);
2348	} else {
2349	if (get_user(p, pages + i))
2350	goto out_flush;
2351	}
2352	if (get_user(node, nodes + i))
2353	goto out_flush;
2354
2355	err = -ENODEV;
2356	if (node < `0` \|\| node >= MAX_NUMNODES)
2357	goto out_flush;
2358	if (!node_state(node, state: N_MEMORY))
2359	goto out_flush;
2360
2361	err = -EACCES;
2362	if (!node_isset(node, task_nodes))
2363	goto out_flush;
2364
2365	if (current_node == NUMA_NO_NODE) {
2366	current_node = node;
2367	start = i;
2368	} else if (node != current_node) {
2369	err = move_pages_and_store_status(node: current_node,
2370	pagelist: &pagelist, status, start, i, nr_pages);
2371	if (err)
2372	goto out;
2373	start = i;
2374	current_node = node;
2375	}
2376
2377	/*
2378	* Errors in the page lookup or isolation are not fatal and we simply
2379	* report them via status
2380	*/
2381	err = add_folio_for_migration(mm, p, node: current_node, pagelist: &pagelist,
2382	migrate_all: flags & MPOL_MF_MOVE_ALL);
2383
2384	if (err > `0`) {
2385	/ The page is successfully queued for migration /
2386	continue;
2387	}
2388
2389	/*
2390	* If the page is already on the target node (!err), store the
2391	* node, otherwise, store the err.
2392	*/
2393	err = store_status(status, start: i, value: err ? : current_node, nr: `1`);
2394	if (err)
2395	goto out_flush;
2396
2397	err = move_pages_and_store_status(node: current_node, pagelist: &pagelist,
2398	status, start, i, nr_pages);
2399	if (err) {
2400	/ We have accounted for page i /
2401	if (err > `0`)
2402	err--;
2403	goto out;
2404	}
2405	current_node = NUMA_NO_NODE;
2406	}
2407	out_flush:
2408	/ Make sure we do not overwrite the existing error /
2409	err1 = move_pages_and_store_status(node: current_node, pagelist: &pagelist,
2410	status, start, i, nr_pages);
2411	if (err >= `0`)
2412	err = err1;
2413	out:
2414	lru_cache_enable();
2415	return err;
2416	}
2417
2418	/*
2419	* Determine the nodes of an array of pages and store it in an array of status.
2420	*/
2421	static void do_pages_stat_array(struct mm_struct mm, unsigned* long nr_pages,
2422	const void __user *pages, int* *status)
2423	{
2424	unsigned long i;
2425
2426	mmap_read_lock(mm);
2427
2428	for (i = `0`; i < nr_pages; i++) {
2429	unsigned long addr = (unsigned long)(*pages);
2430	struct vm_area_struct *vma;
2431	struct folio_walk fw;
2432	struct folio *folio;
2433	int err = -EFAULT;
2434
2435	vma = vma_lookup(mm, addr);
2436	if (!vma)
2437	goto set_status;
2438
2439	folio = folio_walk_start(fw: &fw, vma, addr, FW_ZEROPAGE);
2440	if (folio) {
2441	if (is_zero_folio(folio) \|\| is_huge_zero_folio(folio))
2442	err = -EFAULT;
2443	else if (folio_is_zone_device(folio))
2444	err = -ENOENT;
2445	else
2446	err = folio_nid(folio);
2447	folio_walk_end(&fw, vma);
2448	} else {
2449	err = -ENOENT;
2450	}
2451	set_status:
2452	*status = err;
2453
2454	pages++;
2455	status++;
2456	}
2457
2458	mmap_read_unlock(mm);
2459	}
2460
2461	static int get_compat_pages_array(const void __user *chunk_pages[],
2462	const void __user * __user *pages,
2463	unsigned long chunk_offset,
2464	unsigned long chunk_nr)
2465	{
2466	compat_uptr_t __user pages32 = (compat_uptr_t __user )pages;
2467	compat_uptr_t p;
2468	int i;
2469
2470	for (i = `0`; i < chunk_nr; i++) {
2471	if (get_user(p, pages32 + chunk_offset + i))
2472	return -EFAULT;
2473	chunk_pages[i] = compat_ptr(uptr: p);
2474	}
2475
2476	return `0`;
2477	}
2478
2479	/*
2480	* Determine the nodes of a user array of pages and store it in
2481	* a user array of status.
2482	*/
2483	static int do_pages_stat(struct mm_struct mm, unsigned* long nr_pages,
2484	const void __user * __user *pages,
2485	int __user *status)
2486	{
2487	#define DO_PAGES_STAT_CHUNK_NR 16UL
2488	const void __user *chunk_pages[DO_PAGES_STAT_CHUNK_NR];
2489	int chunk_status[DO_PAGES_STAT_CHUNK_NR];
2490	unsigned long chunk_offset = `0`;
2491
2492	while (nr_pages) {
2493	unsigned long chunk_nr = min(nr_pages, DO_PAGES_STAT_CHUNK_NR);
2494
2495	if (in_compat_syscall()) {
2496	if (get_compat_pages_array(chunk_pages, pages,
2497	chunk_offset, chunk_nr))
2498	break;
2499	} else {
2500	if (copy_from_user(to: chunk_pages, from: pages + chunk_offset,
2501	n: chunk_nr * sizeof(*chunk_pages)))
2502	break;
2503	}
2504
2505	do_pages_stat_array(mm, nr_pages: chunk_nr, pages: chunk_pages, status: chunk_status);
2506
2507	if (copy_to_user(to: status + chunk_offset, from: chunk_status,
2508	n: chunk_nr * sizeof(*status)))
2509	break;
2510
2511	chunk_offset += chunk_nr;
2512	nr_pages -= chunk_nr;
2513	}
2514	return nr_pages ? -EFAULT : `0`;
2515	}
2516
2517	static struct mm_struct find_mm_struct(pid_t pid, nodemask_t mem_nodes)
2518	{
2519	struct task_struct *task;
2520	struct mm_struct *mm;
2521
2522	/*
2523	* There is no need to check if current process has the right to modify
2524	* the specified process when they are same.
2525	*/
2526	if (!pid) {
2527	mmget(current->mm);
2528	*mem_nodes = cpuset_mems_allowed(current);
2529	return current->mm;
2530	}
2531
2532	task = find_get_task_by_vpid(nr: pid);
2533	if (!task) {
2534	return ERR_PTR(error: -ESRCH);
2535	}
2536
2537	/*
2538	* Check if this process has the right to modify the specified
2539	* process. Use the regular "ptrace_may_access()" checks.
2540	*/
2541	if (!ptrace_may_access(task, PTRACE_MODE_READ_REALCREDS)) {
2542	mm = ERR_PTR(error: -EPERM);
2543	goto out;
2544	}
2545
2546	mm = ERR_PTR(error: security_task_movememory(p: task));
2547	if (IS_ERR(ptr: mm))
2548	goto out;
2549	*mem_nodes = cpuset_mems_allowed(p: task);
2550	mm = get_task_mm(task);
2551	out:
2552	put_task_struct(t: task);
2553	if (!mm)
2554	mm = ERR_PTR(error: -EINVAL);
2555	return mm;
2556	}
2557
2558	/*
2559	* Move a list of pages in the address space of the currently executing
2560	* process.
2561	*/
2562	static int kernel_move_pages(pid_t pid, unsigned long nr_pages,
2563	const void __user * __user *pages,
2564	const int __user *nodes,
2565	int __user status, int* flags)
2566	{
2567	struct mm_struct *mm;
2568	int err;
2569	nodemask_t task_nodes;
2570
2571	/ Check flags /
2572	if (flags & ~(MPOL_MF_MOVE\|MPOL_MF_MOVE_ALL))
2573	return -EINVAL;
2574
2575	if ((flags & MPOL_MF_MOVE_ALL) && !capable(CAP_SYS_NICE))
2576	return -EPERM;
2577
2578	mm = find_mm_struct(pid, mem_nodes: &task_nodes);
2579	if (IS_ERR(ptr: mm))
2580	return PTR_ERR(ptr: mm);
2581
2582	if (nodes)
2583	err = do_pages_move(mm, task_nodes, nr_pages, pages,
2584	nodes, status, flags);
2585	else
2586	err = do_pages_stat(mm, nr_pages, pages, status);
2587
2588	mmput(mm);
2589	return err;
2590	}
2591
2592	SYSCALL_DEFINE6(move_pages, pid_t, pid, unsigned long, nr_pages,
2593	const void __user * __user *, pages,
2594	const int __user *, nodes,
2595	int __user , status, int*, flags)
2596	{
2597	return kernel_move_pages(pid, nr_pages, pages, nodes, status, flags);
2598	}
2599
2600	#ifdef CONFIG_NUMA_BALANCING
2601	/*
2602	* Returns true if this is a safe migration target node for misplaced NUMA
2603	* pages. Currently it only checks the watermarks which is crude.
2604	*/
2605	static bool migrate_balanced_pgdat(struct pglist_data *pgdat,
2606	unsigned long nr_migrate_pages)
2607	{
2608	int z;
2609
2610	for (z = pgdat->nr_zones - `1`; z >= `0`; z--) {
2611	struct zone *zone = pgdat->node_zones + z;
2612
2613	if (!managed_zone(zone))
2614	continue;
2615
2616	/ Avoid waking kswapd by allocating pages_to_migrate pages. /
2617	if (!zone_watermark_ok(zone, `0`,
2618	high_wmark_pages(zone) +
2619	nr_migrate_pages,
2620	ZONE_MOVABLE, ALLOC_CMA))
2621	continue;
2622	return true;
2623	}
2624	return false;
2625	}
2626
2627	static struct folio alloc_misplaced_dst_folio(struct* folio *src,
2628	unsigned long data)
2629	{
2630	int nid = (int) data;
2631	int order = folio_order(src);
2632	gfp_t gfp = __GFP_THISNODE;
2633
2634	if (order > `0`)
2635	gfp \|= GFP_TRANSHUGE_LIGHT;
2636	else {
2637	gfp \|= GFP_HIGHUSER_MOVABLE \| __GFP_NOMEMALLOC \| __GFP_NORETRY \|
2638	__GFP_NOWARN;
2639	gfp &= ~__GFP_RECLAIM;
2640	}
2641	return __folio_alloc_node(gfp, order, nid);
2642	}
2643
2644	/*
2645	* Prepare for calling migrate_misplaced_folio() by isolating the folio if
2646	* permitted. Must be called with the PTL still held.
2647	*/
2648	int migrate_misplaced_folio_prepare(struct folio *folio,
2649	struct vm_area_struct vma, int* node)
2650	{
2651	int nr_pages = folio_nr_pages(folio);
2652	pg_data_t *pgdat = NODE_DATA(node);
2653
2654	if (folio_is_file_lru(folio)) {
2655	/*
2656	* Do not migrate file folios that are mapped in multiple
2657	* processes with execute permissions as they are probably
2658	* shared libraries.
2659	*
2660	* See folio_maybe_mapped_shared() on possible imprecision
2661	* when we cannot easily detect if a folio is shared.
2662	*/
2663	if ((vma->vm_flags & VM_EXEC) && folio_maybe_mapped_shared(folio))
2664	return -EACCES;
2665
2666	/*
2667	* Do not migrate dirty folios as not all filesystems can move
2668	* dirty folios in MIGRATE_ASYNC mode which is a waste of
2669	* cycles.
2670	*/
2671	if (folio_test_dirty(folio))
2672	return -EAGAIN;
2673	}
2674
2675	/ Avoid migrating to a node that is nearly full /
2676	if (!migrate_balanced_pgdat(pgdat, nr_pages)) {
2677	int z;
2678
2679	if (!(sysctl_numa_balancing_mode & NUMA_BALANCING_MEMORY_TIERING))
2680	return -EAGAIN;
2681	for (z = pgdat->nr_zones - `1`; z >= `0`; z--) {
2682	if (managed_zone(pgdat->node_zones + z))
2683	break;
2684	}
2685
2686	/*
2687	* If there are no managed zones, it should not proceed
2688	* further.
2689	*/
2690	if (z < `0`)
2691	return -EAGAIN;
2692
2693	wakeup_kswapd(pgdat->node_zones + z, `0`,
2694	folio_order(folio), ZONE_MOVABLE);
2695	return -EAGAIN;
2696	}
2697
2698	if (!folio_isolate_lru(folio))
2699	return -EAGAIN;
2700
2701	node_stat_mod_folio(folio, NR_ISOLATED_ANON + folio_is_file_lru(folio),
2702	nr_pages);
2703	return `0`;
2704	}
2705
2706	/*
2707	* Attempt to migrate a misplaced folio to the specified destination
2708	* node. Caller is expected to have isolated the folio by calling
2709	* migrate_misplaced_folio_prepare(), which will result in an
2710	* elevated reference count on the folio. This function will un-isolate the
2711	* folio, dereferencing the folio before returning.
2712	*/
2713	int migrate_misplaced_folio(struct folio folio, int* node)
2714	{
2715	pg_data_t *pgdat = NODE_DATA(node);
2716	int nr_remaining;
2717	unsigned int nr_succeeded;
2718	LIST_HEAD(migratepages);
2719	struct mem_cgroup *memcg = get_mem_cgroup_from_folio(folio);
2720	struct lruvec *lruvec = mem_cgroup_lruvec(memcg, pgdat);
2721
2722	list_add(&folio->lru, &migratepages);
2723	nr_remaining = migrate_pages(&migratepages, alloc_misplaced_dst_folio,
2724	NULL, node, MIGRATE_ASYNC,
2725	MR_NUMA_MISPLACED, &nr_succeeded);
2726	if (nr_remaining && !list_empty(&migratepages))
2727	putback_movable_pages(&migratepages);
2728	if (nr_succeeded) {
2729	count_vm_numa_events(NUMA_PAGE_MIGRATE, nr_succeeded);
2730	count_memcg_events(memcg, NUMA_PAGE_MIGRATE, nr_succeeded);
2731	if ((sysctl_numa_balancing_mode & NUMA_BALANCING_MEMORY_TIERING)
2732	&& !node_is_toptier(folio_nid(folio))
2733	&& node_is_toptier(node))
2734	mod_lruvec_state(lruvec, PGPROMOTE_SUCCESS, nr_succeeded);
2735	}
2736	mem_cgroup_put(memcg);
2737	BUG_ON(!list_empty(&migratepages));
2738	return nr_remaining ? -EAGAIN : `0`;
2739	}
2740	#endif /* CONFIG_NUMA_BALANCING */
2741	#endif /* CONFIG_NUMA */
2742

Browse the source code of Linux/mm/migrate.c