1/* SPDX-License-Identifier: GPL-2.0 */
2#ifndef _MM_SWAP_H
3#define _MM_SWAP_H
4
5#include <linux/atomic.h> /* for atomic_long_t */
6struct mempolicy;
7struct swap_iocb;
8
9extern int page_cluster;
10
11#ifdef CONFIG_THP_SWAP
12#define SWAPFILE_CLUSTER HPAGE_PMD_NR
13#define swap_entry_order(order) (order)
14#else
15#define SWAPFILE_CLUSTER 256
16#define swap_entry_order(order) 0
17#endif
18
19extern struct swap_info_struct *swap_info[];
20
21/*
22 * We use this to track usage of a cluster. A cluster is a block of swap disk
23 * space with SWAPFILE_CLUSTER pages long and naturally aligns in disk. All
24 * free clusters are organized into a list. We fetch an entry from the list to
25 * get a free cluster.
26 *
27 * The flags field determines if a cluster is free. This is
28 * protected by cluster lock.
29 */
30struct swap_cluster_info {
31 spinlock_t lock; /*
32 * Protect swap_cluster_info fields
33 * other than list, and swap_info_struct->swap_map
34 * elements corresponding to the swap cluster.
35 */
36 u16 count;
37 u8 flags;
38 u8 order;
39 atomic_long_t __rcu *table; /* Swap table entries, see mm/swap_table.h */
40 struct list_head list;
41};
42
43/* All on-list cluster must have a non-zero flag. */
44enum swap_cluster_flags {
45 CLUSTER_FLAG_NONE = 0, /* For temporary off-list cluster */
46 CLUSTER_FLAG_FREE,
47 CLUSTER_FLAG_NONFULL,
48 CLUSTER_FLAG_FRAG,
49 /* Clusters with flags above are allocatable */
50 CLUSTER_FLAG_USABLE = CLUSTER_FLAG_FRAG,
51 CLUSTER_FLAG_FULL,
52 CLUSTER_FLAG_DISCARD,
53 CLUSTER_FLAG_MAX,
54};
55
56#ifdef CONFIG_SWAP
57#include <linux/swapops.h> /* for swp_offset */
58#include <linux/blk_types.h> /* for bio_end_io_t */
59
60static inline unsigned int swp_cluster_offset(swp_entry_t entry)
61{
62 return swp_offset(entry) % SWAPFILE_CLUSTER;
63}
64
65/*
66 * Callers of all helpers below must ensure the entry, type, or offset is
67 * valid, and protect the swap device with reference count or locks.
68 */
69static inline struct swap_info_struct *__swap_type_to_info(int type)
70{
71 struct swap_info_struct *si;
72
73 si = READ_ONCE(swap_info[type]); /* rcu_dereference() */
74 VM_WARN_ON_ONCE(percpu_ref_is_zero(&si->users)); /* race with swapoff */
75 return si;
76}
77
78static inline struct swap_info_struct *__swap_entry_to_info(swp_entry_t entry)
79{
80 return __swap_type_to_info(type: swp_type(entry));
81}
82
83static inline struct swap_cluster_info *__swap_offset_to_cluster(
84 struct swap_info_struct *si, pgoff_t offset)
85{
86 VM_WARN_ON_ONCE(percpu_ref_is_zero(&si->users)); /* race with swapoff */
87 VM_WARN_ON_ONCE(offset >= si->max);
88 return &si->cluster_info[offset / SWAPFILE_CLUSTER];
89}
90
91static inline struct swap_cluster_info *__swap_entry_to_cluster(swp_entry_t entry)
92{
93 return __swap_offset_to_cluster(si: __swap_entry_to_info(entry),
94 offset: swp_offset(entry));
95}
96
97static __always_inline struct swap_cluster_info *__swap_cluster_lock(
98 struct swap_info_struct *si, unsigned long offset, bool irq)
99{
100 struct swap_cluster_info *ci = __swap_offset_to_cluster(si, offset);
101
102 /*
103 * Nothing modifies swap cache in an IRQ context. All access to
104 * swap cache is wrapped by swap_cache_* helpers, and swap cache
105 * writeback is handled outside of IRQs. Swapin or swapout never
106 * occurs in IRQ, and neither does in-place split or replace.
107 *
108 * Besides, modifying swap cache requires synchronization with
109 * swap_map, which was never IRQ safe.
110 */
111 VM_WARN_ON_ONCE(!in_task());
112 VM_WARN_ON_ONCE(percpu_ref_is_zero(&si->users)); /* race with swapoff */
113 if (irq)
114 spin_lock_irq(lock: &ci->lock);
115 else
116 spin_lock(lock: &ci->lock);
117 return ci;
118}
119
120/**
121 * swap_cluster_lock - Lock and return the swap cluster of given offset.
122 * @si: swap device the cluster belongs to.
123 * @offset: the swap entry offset, pointing to a valid slot.
124 *
125 * Context: The caller must ensure the offset is in the valid range and
126 * protect the swap device with reference count or locks.
127 */
128static inline struct swap_cluster_info *swap_cluster_lock(
129 struct swap_info_struct *si, unsigned long offset)
130{
131 return __swap_cluster_lock(si, offset, irq: false);
132}
133
134static inline struct swap_cluster_info *__swap_cluster_get_and_lock(
135 const struct folio *folio, bool irq)
136{
137 VM_WARN_ON_ONCE_FOLIO(!folio_test_locked(folio), folio);
138 VM_WARN_ON_ONCE_FOLIO(!folio_test_swapcache(folio), folio);
139 return __swap_cluster_lock(si: __swap_entry_to_info(entry: folio->swap),
140 offset: swp_offset(entry: folio->swap), irq);
141}
142
143/*
144 * swap_cluster_get_and_lock - Locks the cluster that holds a folio's entries.
145 * @folio: The folio.
146 *
147 * This locks and returns the swap cluster that contains a folio's swap
148 * entries. The swap entries of a folio are always in one single cluster.
149 * The folio has to be locked so its swap entries won't change and the
150 * cluster won't be freed.
151 *
152 * Context: Caller must ensure the folio is locked and in the swap cache.
153 * Return: Pointer to the swap cluster.
154 */
155static inline struct swap_cluster_info *swap_cluster_get_and_lock(
156 const struct folio *folio)
157{
158 return __swap_cluster_get_and_lock(folio, irq: false);
159}
160
161/*
162 * swap_cluster_get_and_lock_irq - Locks the cluster that holds a folio's entries.
163 * @folio: The folio.
164 *
165 * Same as swap_cluster_get_and_lock but also disable IRQ.
166 *
167 * Context: Caller must ensure the folio is locked and in the swap cache.
168 * Return: Pointer to the swap cluster.
169 */
170static inline struct swap_cluster_info *swap_cluster_get_and_lock_irq(
171 const struct folio *folio)
172{
173 return __swap_cluster_get_and_lock(folio, irq: true);
174}
175
176static inline void swap_cluster_unlock(struct swap_cluster_info *ci)
177{
178 spin_unlock(lock: &ci->lock);
179}
180
181static inline void swap_cluster_unlock_irq(struct swap_cluster_info *ci)
182{
183 spin_unlock_irq(lock: &ci->lock);
184}
185
186/* linux/mm/page_io.c */
187int sio_pool_init(void);
188struct swap_iocb;
189void swap_read_folio(struct folio *folio, struct swap_iocb **plug);
190void __swap_read_unplug(struct swap_iocb *plug);
191static inline void swap_read_unplug(struct swap_iocb *plug)
192{
193 if (unlikely(plug))
194 __swap_read_unplug(plug);
195}
196void swap_write_unplug(struct swap_iocb *sio);
197int swap_writeout(struct folio *folio, struct swap_iocb **swap_plug);
198void __swap_writepage(struct folio *folio, struct swap_iocb **swap_plug);
199
200/* linux/mm/swap_state.c */
201extern struct address_space swap_space __ro_after_init;
202static inline struct address_space *swap_address_space(swp_entry_t entry)
203{
204 return &swap_space;
205}
206
207/*
208 * Return the swap device position of the swap entry.
209 */
210static inline loff_t swap_dev_pos(swp_entry_t entry)
211{
212 return ((loff_t)swp_offset(entry)) << PAGE_SHIFT;
213}
214
215/**
216 * folio_matches_swap_entry - Check if a folio matches a given swap entry.
217 * @folio: The folio.
218 * @entry: The swap entry to check against.
219 *
220 * Context: The caller should have the folio locked to ensure it's stable
221 * and nothing will move it in or out of the swap cache.
222 * Return: true or false.
223 */
224static inline bool folio_matches_swap_entry(const struct folio *folio,
225 swp_entry_t entry)
226{
227 swp_entry_t folio_entry = folio->swap;
228 long nr_pages = folio_nr_pages(folio);
229
230 VM_WARN_ON_ONCE_FOLIO(!folio_test_locked(folio), folio);
231 if (!folio_test_swapcache(folio))
232 return false;
233 VM_WARN_ON_ONCE_FOLIO(!IS_ALIGNED(folio_entry.val, nr_pages), folio);
234 return folio_entry.val == round_down(entry.val, nr_pages);
235}
236
237/*
238 * All swap cache helpers below require the caller to ensure the swap entries
239 * used are valid and stablize the device by any of the following ways:
240 * - Hold a reference by get_swap_device(): this ensures a single entry is
241 * valid and increases the swap device's refcount.
242 * - Locking a folio in the swap cache: this ensures the folio's swap entries
243 * are valid and pinned, also implies reference to the device.
244 * - Locking anything referencing the swap entry: e.g. PTL that protects
245 * swap entries in the page table, similar to locking swap cache folio.
246 * - See the comment of get_swap_device() for more complex usage.
247 */
248struct folio *swap_cache_get_folio(swp_entry_t entry);
249void *swap_cache_get_shadow(swp_entry_t entry);
250void swap_cache_add_folio(struct folio *folio, swp_entry_t entry, void **shadow);
251void swap_cache_del_folio(struct folio *folio);
252/* Below helpers require the caller to lock and pass in the swap cluster. */
253void __swap_cache_del_folio(struct swap_cluster_info *ci,
254 struct folio *folio, swp_entry_t entry, void *shadow);
255void __swap_cache_replace_folio(struct swap_cluster_info *ci,
256 struct folio *old, struct folio *new);
257void __swap_cache_clear_shadow(swp_entry_t entry, int nr_ents);
258
259void show_swap_cache_info(void);
260void swapcache_clear(struct swap_info_struct *si, swp_entry_t entry, int nr);
261struct folio *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
262 struct vm_area_struct *vma, unsigned long addr,
263 struct swap_iocb **plug);
264struct folio *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_flags,
265 struct mempolicy *mpol, pgoff_t ilx, bool *new_page_allocated,
266 bool skip_if_exists);
267struct folio *swap_cluster_readahead(swp_entry_t entry, gfp_t flag,
268 struct mempolicy *mpol, pgoff_t ilx);
269struct folio *swapin_readahead(swp_entry_t entry, gfp_t flag,
270 struct vm_fault *vmf);
271void swap_update_readahead(struct folio *folio, struct vm_area_struct *vma,
272 unsigned long addr);
273
274static inline unsigned int folio_swap_flags(struct folio *folio)
275{
276 return __swap_entry_to_info(entry: folio->swap)->flags;
277}
278
279/*
280 * Return the count of contiguous swap entries that share the same
281 * zeromap status as the starting entry. If is_zeromap is not NULL,
282 * it will return the zeromap status of the starting entry.
283 */
284static inline int swap_zeromap_batch(swp_entry_t entry, int max_nr,
285 bool *is_zeromap)
286{
287 struct swap_info_struct *sis = __swap_entry_to_info(entry);
288 unsigned long start = swp_offset(entry);
289 unsigned long end = start + max_nr;
290 bool first_bit;
291
292 first_bit = test_bit(start, sis->zeromap);
293 if (is_zeromap)
294 *is_zeromap = first_bit;
295
296 if (max_nr <= 1)
297 return max_nr;
298 if (first_bit)
299 return find_next_zero_bit(addr: sis->zeromap, size: end, offset: start) - start;
300 else
301 return find_next_bit(addr: sis->zeromap, size: end, offset: start) - start;
302}
303
304static inline int non_swapcache_batch(swp_entry_t entry, int max_nr)
305{
306 struct swap_info_struct *si = __swap_entry_to_info(entry);
307 pgoff_t offset = swp_offset(entry);
308 int i;
309
310 /*
311 * While allocating a large folio and doing mTHP swapin, we need to
312 * ensure all entries are not cached, otherwise, the mTHP folio will
313 * be in conflict with the folio in swap cache.
314 */
315 for (i = 0; i < max_nr; i++) {
316 if ((si->swap_map[offset + i] & SWAP_HAS_CACHE))
317 return i;
318 }
319
320 return i;
321}
322
323#else /* CONFIG_SWAP */
324struct swap_iocb;
325static inline struct swap_cluster_info *swap_cluster_lock(
326 struct swap_info_struct *si, pgoff_t offset, bool irq)
327{
328 return NULL;
329}
330
331static inline struct swap_cluster_info *swap_cluster_get_and_lock(
332 struct folio *folio)
333{
334 return NULL;
335}
336
337static inline struct swap_cluster_info *swap_cluster_get_and_lock_irq(
338 struct folio *folio)
339{
340 return NULL;
341}
342
343static inline void swap_cluster_unlock(struct swap_cluster_info *ci)
344{
345}
346
347static inline void swap_cluster_unlock_irq(struct swap_cluster_info *ci)
348{
349}
350
351static inline struct swap_info_struct *__swap_entry_to_info(swp_entry_t entry)
352{
353 return NULL;
354}
355
356static inline void swap_read_folio(struct folio *folio, struct swap_iocb **plug)
357{
358}
359static inline void swap_write_unplug(struct swap_iocb *sio)
360{
361}
362
363static inline struct address_space *swap_address_space(swp_entry_t entry)
364{
365 return NULL;
366}
367
368static inline bool folio_matches_swap_entry(const struct folio *folio, swp_entry_t entry)
369{
370 return false;
371}
372
373static inline void show_swap_cache_info(void)
374{
375}
376
377static inline struct folio *swap_cluster_readahead(swp_entry_t entry,
378 gfp_t gfp_mask, struct mempolicy *mpol, pgoff_t ilx)
379{
380 return NULL;
381}
382
383static inline struct folio *swapin_readahead(swp_entry_t swp, gfp_t gfp_mask,
384 struct vm_fault *vmf)
385{
386 return NULL;
387}
388
389static inline void swap_update_readahead(struct folio *folio,
390 struct vm_area_struct *vma, unsigned long addr)
391{
392}
393
394static inline int swap_writeout(struct folio *folio,
395 struct swap_iocb **swap_plug)
396{
397 return 0;
398}
399
400static inline void swapcache_clear(struct swap_info_struct *si, swp_entry_t entry, int nr)
401{
402}
403
404static inline struct folio *swap_cache_get_folio(swp_entry_t entry)
405{
406 return NULL;
407}
408
409static inline void *swap_cache_get_shadow(swp_entry_t entry)
410{
411 return NULL;
412}
413
414static inline void swap_cache_add_folio(struct folio *folio, swp_entry_t entry, void **shadow)
415{
416}
417
418static inline void swap_cache_del_folio(struct folio *folio)
419{
420}
421
422static inline void __swap_cache_del_folio(struct swap_cluster_info *ci,
423 struct folio *folio, swp_entry_t entry, void *shadow)
424{
425}
426
427static inline void __swap_cache_replace_folio(struct swap_cluster_info *ci,
428 struct folio *old, struct folio *new)
429{
430}
431
432static inline unsigned int folio_swap_flags(struct folio *folio)
433{
434 return 0;
435}
436
437static inline int swap_zeromap_batch(swp_entry_t entry, int max_nr,
438 bool *has_zeromap)
439{
440 return 0;
441}
442
443static inline int non_swapcache_batch(swp_entry_t entry, int max_nr)
444{
445 return 0;
446}
447#endif /* CONFIG_SWAP */
448
449/**
450 * folio_index - File index of a folio.
451 * @folio: The folio.
452 *
453 * For a folio which is either in the page cache or the swap cache,
454 * return its index within the address_space it belongs to. If you know
455 * the folio is definitely in the page cache, you can look at the folio's
456 * index directly.
457 *
458 * Return: The index (offset in units of pages) of a folio in its file.
459 */
460static inline pgoff_t folio_index(struct folio *folio)
461{
462#ifdef CONFIG_SWAP
463 if (unlikely(folio_test_swapcache(folio)))
464 return swp_offset(entry: folio->swap);
465#endif
466 return folio->index;
467}
468
469#endif /* _MM_SWAP_H */
470