| 1 | // SPDX-License-Identifier: GPL-2.0 | 
|---|
| 2 | #include <linux/memblock.h> | 
|---|
| 3 | #include <linux/compiler.h> | 
|---|
| 4 | #include <linux/fs.h> | 
|---|
| 5 | #include <linux/init.h> | 
|---|
| 6 | #include <linux/ksm.h> | 
|---|
| 7 | #include <linux/mm.h> | 
|---|
| 8 | #include <linux/mmzone.h> | 
|---|
| 9 | #include <linux/huge_mm.h> | 
|---|
| 10 | #include <linux/proc_fs.h> | 
|---|
| 11 | #include <linux/seq_file.h> | 
|---|
| 12 | #include <linux/hugetlb.h> | 
|---|
| 13 | #include <linux/memremap.h> | 
|---|
| 14 | #include <linux/memcontrol.h> | 
|---|
| 15 | #include <linux/mmu_notifier.h> | 
|---|
| 16 | #include <linux/page_idle.h> | 
|---|
| 17 | #include <linux/kernel-page-flags.h> | 
|---|
| 18 | #include <linux/uaccess.h> | 
|---|
| 19 | #include "internal.h" | 
|---|
| 20 |  | 
|---|
| 21 | #define KPMSIZE sizeof(u64) | 
|---|
| 22 | #define KPMMASK (KPMSIZE - 1) | 
|---|
| 23 | #define KPMBITS (KPMSIZE * BITS_PER_BYTE) | 
|---|
| 24 |  | 
|---|
| 25 | enum kpage_operation { | 
|---|
| 26 | KPAGE_FLAGS, | 
|---|
| 27 | KPAGE_COUNT, | 
|---|
| 28 | KPAGE_CGROUP, | 
|---|
| 29 | }; | 
|---|
| 30 |  | 
|---|
| 31 | static inline unsigned long get_max_dump_pfn(void) | 
|---|
| 32 | { | 
|---|
| 33 | #ifdef CONFIG_SPARSEMEM | 
|---|
| 34 | /* | 
|---|
| 35 | * The memmap of early sections is completely populated and marked | 
|---|
| 36 | * online even if max_pfn does not fall on a section boundary - | 
|---|
| 37 | * pfn_to_online_page() will succeed on all pages. Allow inspecting | 
|---|
| 38 | * these memmaps. | 
|---|
| 39 | */ | 
|---|
| 40 | return round_up(max_pfn, PAGES_PER_SECTION); | 
|---|
| 41 | #else | 
|---|
| 42 | return max_pfn; | 
|---|
| 43 | #endif | 
|---|
| 44 | } | 
|---|
| 45 |  | 
|---|
| 46 | static u64 get_kpage_count(const struct page *page) | 
|---|
| 47 | { | 
|---|
| 48 | struct page_snapshot ps; | 
|---|
| 49 | u64 ret; | 
|---|
| 50 |  | 
|---|
| 51 | snapshot_page(ps: &ps, page); | 
|---|
| 52 |  | 
|---|
| 53 | if (IS_ENABLED(CONFIG_PAGE_MAPCOUNT)) | 
|---|
| 54 | ret = folio_precise_page_mapcount(folio: &ps.folio_snapshot, | 
|---|
| 55 | page: &ps.page_snapshot); | 
|---|
| 56 | else | 
|---|
| 57 | ret = folio_average_page_mapcount(folio: &ps.folio_snapshot); | 
|---|
| 58 |  | 
|---|
| 59 | return ret; | 
|---|
| 60 | } | 
|---|
| 61 |  | 
|---|
| 62 | static ssize_t kpage_read(struct file *file, char __user *buf, | 
|---|
| 63 | size_t count, loff_t *ppos, | 
|---|
| 64 | enum kpage_operation op) | 
|---|
| 65 | { | 
|---|
| 66 | const unsigned long max_dump_pfn = get_max_dump_pfn(); | 
|---|
| 67 | u64 __user *out = (u64 __user *)buf; | 
|---|
| 68 | struct page *page; | 
|---|
| 69 | unsigned long src = *ppos; | 
|---|
| 70 | unsigned long pfn; | 
|---|
| 71 | ssize_t ret = 0; | 
|---|
| 72 | u64 info; | 
|---|
| 73 |  | 
|---|
| 74 | pfn = src / KPMSIZE; | 
|---|
| 75 | if (src & KPMMASK || count & KPMMASK) | 
|---|
| 76 | return -EINVAL; | 
|---|
| 77 | if (src >= max_dump_pfn * KPMSIZE) | 
|---|
| 78 | return 0; | 
|---|
| 79 | count = min_t(unsigned long, count, (max_dump_pfn * KPMSIZE) - src); | 
|---|
| 80 |  | 
|---|
| 81 | while (count > 0) { | 
|---|
| 82 | /* | 
|---|
| 83 | * TODO: ZONE_DEVICE support requires to identify | 
|---|
| 84 | * memmaps that were actually initialized. | 
|---|
| 85 | */ | 
|---|
| 86 | page = pfn_to_online_page(pfn); | 
|---|
| 87 |  | 
|---|
| 88 | if (page) { | 
|---|
| 89 | switch (op) { | 
|---|
| 90 | case KPAGE_FLAGS: | 
|---|
| 91 | info = stable_page_flags(page); | 
|---|
| 92 | break; | 
|---|
| 93 | case KPAGE_COUNT: | 
|---|
| 94 | info = get_kpage_count(page); | 
|---|
| 95 | break; | 
|---|
| 96 | case KPAGE_CGROUP: | 
|---|
| 97 | info = page_cgroup_ino(page); | 
|---|
| 98 | break; | 
|---|
| 99 | default: | 
|---|
| 100 | info = 0; | 
|---|
| 101 | break; | 
|---|
| 102 | } | 
|---|
| 103 | } else | 
|---|
| 104 | info = 0; | 
|---|
| 105 |  | 
|---|
| 106 | if (put_user(info, out)) { | 
|---|
| 107 | ret = -EFAULT; | 
|---|
| 108 | break; | 
|---|
| 109 | } | 
|---|
| 110 |  | 
|---|
| 111 | pfn++; | 
|---|
| 112 | out++; | 
|---|
| 113 | count -= KPMSIZE; | 
|---|
| 114 |  | 
|---|
| 115 | cond_resched(); | 
|---|
| 116 | } | 
|---|
| 117 |  | 
|---|
| 118 | *ppos += (char __user *)out - buf; | 
|---|
| 119 | if (!ret) | 
|---|
| 120 | ret = (char __user *)out - buf; | 
|---|
| 121 | return ret; | 
|---|
| 122 | } | 
|---|
| 123 |  | 
|---|
| 124 | /* /proc/kpagecount - an array exposing page mapcounts | 
|---|
| 125 | * | 
|---|
| 126 | * Each entry is a u64 representing the corresponding | 
|---|
| 127 | * physical page mapcount. | 
|---|
| 128 | */ | 
|---|
| 129 | static ssize_t kpagecount_read(struct file *file, char __user *buf, | 
|---|
| 130 | size_t count, loff_t *ppos) | 
|---|
| 131 | { | 
|---|
| 132 | return kpage_read(file, buf, count, ppos, op: KPAGE_COUNT); | 
|---|
| 133 | } | 
|---|
| 134 |  | 
|---|
| 135 | static const struct proc_ops kpagecount_proc_ops = { | 
|---|
| 136 | .proc_flags	= PROC_ENTRY_PERMANENT, | 
|---|
| 137 | .proc_lseek	= mem_lseek, | 
|---|
| 138 | .proc_read	= kpagecount_read, | 
|---|
| 139 | }; | 
|---|
| 140 |  | 
|---|
| 141 |  | 
|---|
| 142 | static inline u64 kpf_copy_bit(u64 kflags, int ubit, int kbit) | 
|---|
| 143 | { | 
|---|
| 144 | return ((kflags >> kbit) & 1) << ubit; | 
|---|
| 145 | } | 
|---|
| 146 |  | 
|---|
| 147 | u64 stable_page_flags(const struct page *page) | 
|---|
| 148 | { | 
|---|
| 149 | const struct folio *folio; | 
|---|
| 150 | struct page_snapshot ps; | 
|---|
| 151 | unsigned long k; | 
|---|
| 152 | unsigned long mapping; | 
|---|
| 153 | bool is_anon; | 
|---|
| 154 | u64 u = 0; | 
|---|
| 155 |  | 
|---|
| 156 | /* | 
|---|
| 157 | * pseudo flag: KPF_NOPAGE | 
|---|
| 158 | * it differentiates a memory hole from a page with no flags | 
|---|
| 159 | */ | 
|---|
| 160 | if (!page) | 
|---|
| 161 | return 1 << KPF_NOPAGE; | 
|---|
| 162 |  | 
|---|
| 163 | snapshot_page(ps: &ps, page); | 
|---|
| 164 | folio = &ps.folio_snapshot; | 
|---|
| 165 |  | 
|---|
| 166 | k = folio->flags.f; | 
|---|
| 167 | mapping = (unsigned long)folio->mapping; | 
|---|
| 168 | is_anon = mapping & FOLIO_MAPPING_ANON; | 
|---|
| 169 |  | 
|---|
| 170 | /* | 
|---|
| 171 | * pseudo flags for the well known (anonymous) memory mapped pages | 
|---|
| 172 | */ | 
|---|
| 173 | if (folio_mapped(folio)) | 
|---|
| 174 | u |= 1 << KPF_MMAP; | 
|---|
| 175 | if (is_anon) { | 
|---|
| 176 | u |= 1 << KPF_ANON; | 
|---|
| 177 | if (mapping & FOLIO_MAPPING_KSM) | 
|---|
| 178 | u |= 1 << KPF_KSM; | 
|---|
| 179 | } | 
|---|
| 180 |  | 
|---|
| 181 | /* | 
|---|
| 182 | * compound pages: export both head/tail info | 
|---|
| 183 | * they together define a compound page's start/end pos and order | 
|---|
| 184 | */ | 
|---|
| 185 | if (ps.idx == 0) | 
|---|
| 186 | u |= kpf_copy_bit(kflags: k, KPF_COMPOUND_HEAD, kbit: PG_head); | 
|---|
| 187 | else | 
|---|
| 188 | u |= 1 << KPF_COMPOUND_TAIL; | 
|---|
| 189 | if (folio_test_hugetlb(folio)) | 
|---|
| 190 | u |= 1 << KPF_HUGE; | 
|---|
| 191 | else if (folio_test_large(folio) && | 
|---|
| 192 | folio_test_large_rmappable(folio)) { | 
|---|
| 193 | /* Note: we indicate any THPs here, not just PMD-sized ones */ | 
|---|
| 194 | u |= 1 << KPF_THP; | 
|---|
| 195 | } else if (is_huge_zero_pfn(pfn: ps.pfn)) { | 
|---|
| 196 | u |= 1 << KPF_ZERO_PAGE; | 
|---|
| 197 | u |= 1 << KPF_THP; | 
|---|
| 198 | } else if (is_zero_pfn(pfn: ps.pfn)) { | 
|---|
| 199 | u |= 1 << KPF_ZERO_PAGE; | 
|---|
| 200 | } | 
|---|
| 201 |  | 
|---|
| 202 | if (ps.flags & PAGE_SNAPSHOT_PG_BUDDY) | 
|---|
| 203 | u |= 1 << KPF_BUDDY; | 
|---|
| 204 |  | 
|---|
| 205 | if (folio_test_offline(folio)) | 
|---|
| 206 | u |= 1 << KPF_OFFLINE; | 
|---|
| 207 | if (folio_test_pgtable(folio)) | 
|---|
| 208 | u |= 1 << KPF_PGTABLE; | 
|---|
| 209 | if (folio_test_slab(folio)) | 
|---|
| 210 | u |= 1 << KPF_SLAB; | 
|---|
| 211 |  | 
|---|
| 212 | #if defined(CONFIG_PAGE_IDLE_FLAG) && defined(CONFIG_64BIT) | 
|---|
| 213 | u |= kpf_copy_bit(k, KPF_IDLE,          PG_idle); | 
|---|
| 214 | #else | 
|---|
| 215 | if (ps.flags & PAGE_SNAPSHOT_PG_IDLE) | 
|---|
| 216 | u |= 1 << KPF_IDLE; | 
|---|
| 217 | #endif | 
|---|
| 218 |  | 
|---|
| 219 | u |= kpf_copy_bit(kflags: k, KPF_LOCKED,	kbit: PG_locked); | 
|---|
| 220 | u |= kpf_copy_bit(kflags: k, KPF_DIRTY,		kbit: PG_dirty); | 
|---|
| 221 | u |= kpf_copy_bit(kflags: k, KPF_UPTODATE,	kbit: PG_uptodate); | 
|---|
| 222 | u |= kpf_copy_bit(kflags: k, KPF_WRITEBACK,	kbit: PG_writeback); | 
|---|
| 223 |  | 
|---|
| 224 | u |= kpf_copy_bit(kflags: k, KPF_LRU,		kbit: PG_lru); | 
|---|
| 225 | u |= kpf_copy_bit(kflags: k, KPF_REFERENCED,	kbit: PG_referenced); | 
|---|
| 226 | u |= kpf_copy_bit(kflags: k, KPF_ACTIVE,	kbit: PG_active); | 
|---|
| 227 | u |= kpf_copy_bit(kflags: k, KPF_RECLAIM,	kbit: PG_reclaim); | 
|---|
| 228 |  | 
|---|
| 229 | #define SWAPCACHE ((1 << PG_swapbacked) | (1 << PG_swapcache)) | 
|---|
| 230 | if ((k & SWAPCACHE) == SWAPCACHE) | 
|---|
| 231 | u |= 1 << KPF_SWAPCACHE; | 
|---|
| 232 | u |= kpf_copy_bit(kflags: k, KPF_SWAPBACKED,	kbit: PG_swapbacked); | 
|---|
| 233 |  | 
|---|
| 234 | u |= kpf_copy_bit(kflags: k, KPF_UNEVICTABLE,	kbit: PG_unevictable); | 
|---|
| 235 | u |= kpf_copy_bit(kflags: k, KPF_MLOCKED,	kbit: PG_mlocked); | 
|---|
| 236 |  | 
|---|
| 237 | #ifdef CONFIG_MEMORY_FAILURE | 
|---|
| 238 | if (u & (1 << KPF_HUGE)) | 
|---|
| 239 | u |= kpf_copy_bit(k, KPF_HWPOISON,	PG_hwpoison); | 
|---|
| 240 | else | 
|---|
| 241 | u |= kpf_copy_bit(ps.page_snapshot.flags.f, KPF_HWPOISON, PG_hwpoison); | 
|---|
| 242 | #endif | 
|---|
| 243 |  | 
|---|
| 244 | u |= kpf_copy_bit(kflags: k, KPF_RESERVED,	kbit: PG_reserved); | 
|---|
| 245 | u |= kpf_copy_bit(kflags: k, KPF_OWNER_2,	kbit: PG_owner_2); | 
|---|
| 246 | u |= kpf_copy_bit(kflags: k, KPF_PRIVATE,	kbit: PG_private); | 
|---|
| 247 | u |= kpf_copy_bit(kflags: k, KPF_PRIVATE_2,	kbit: PG_private_2); | 
|---|
| 248 | u |= kpf_copy_bit(kflags: k, KPF_OWNER_PRIVATE,	kbit: PG_owner_priv_1); | 
|---|
| 249 | u |= kpf_copy_bit(kflags: k, KPF_ARCH,		kbit: PG_arch_1); | 
|---|
| 250 | #ifdef CONFIG_ARCH_USES_PG_ARCH_2 | 
|---|
| 251 | u |= kpf_copy_bit(kflags: k, KPF_ARCH_2,	kbit: PG_arch_2); | 
|---|
| 252 | #endif | 
|---|
| 253 | #ifdef CONFIG_ARCH_USES_PG_ARCH_3 | 
|---|
| 254 | u |= kpf_copy_bit(k, KPF_ARCH_3,	PG_arch_3); | 
|---|
| 255 | #endif | 
|---|
| 256 |  | 
|---|
| 257 | return u; | 
|---|
| 258 | } | 
|---|
| 259 | EXPORT_SYMBOL_GPL(stable_page_flags); | 
|---|
| 260 |  | 
|---|
| 261 | /* /proc/kpageflags - an array exposing page flags | 
|---|
| 262 | * | 
|---|
| 263 | * Each entry is a u64 representing the corresponding | 
|---|
| 264 | * physical page flags. | 
|---|
| 265 | */ | 
|---|
| 266 | static ssize_t kpageflags_read(struct file *file, char __user *buf, | 
|---|
| 267 | size_t count, loff_t *ppos) | 
|---|
| 268 | { | 
|---|
| 269 | return kpage_read(file, buf, count, ppos, op: KPAGE_FLAGS); | 
|---|
| 270 | } | 
|---|
| 271 |  | 
|---|
| 272 | static const struct proc_ops kpageflags_proc_ops = { | 
|---|
| 273 | .proc_flags	= PROC_ENTRY_PERMANENT, | 
|---|
| 274 | .proc_lseek	= mem_lseek, | 
|---|
| 275 | .proc_read	= kpageflags_read, | 
|---|
| 276 | }; | 
|---|
| 277 |  | 
|---|
| 278 | #ifdef CONFIG_MEMCG | 
|---|
| 279 | static ssize_t kpagecgroup_read(struct file *file, char __user *buf, | 
|---|
| 280 | size_t count, loff_t *ppos) | 
|---|
| 281 | { | 
|---|
| 282 | return kpage_read(file, buf, count, ppos, KPAGE_CGROUP); | 
|---|
| 283 | } | 
|---|
| 284 | static const struct proc_ops kpagecgroup_proc_ops = { | 
|---|
| 285 | .proc_flags	= PROC_ENTRY_PERMANENT, | 
|---|
| 286 | .proc_lseek	= mem_lseek, | 
|---|
| 287 | .proc_read	= kpagecgroup_read, | 
|---|
| 288 | }; | 
|---|
| 289 | #endif /* CONFIG_MEMCG */ | 
|---|
| 290 |  | 
|---|
| 291 | static int __init proc_page_init(void) | 
|---|
| 292 | { | 
|---|
| 293 | proc_create(name: "kpagecount", S_IRUSR, NULL, proc_ops: &kpagecount_proc_ops); | 
|---|
| 294 | proc_create(name: "kpageflags", S_IRUSR, NULL, proc_ops: &kpageflags_proc_ops); | 
|---|
| 295 | #ifdef CONFIG_MEMCG | 
|---|
| 296 | proc_create( "kpagecgroup", S_IRUSR, NULL, &kpagecgroup_proc_ops); | 
|---|
| 297 | #endif | 
|---|
| 298 | return 0; | 
|---|
| 299 | } | 
|---|
| 300 | fs_initcall(proc_page_init); | 
|---|
| 301 |  | 
|---|