1/* SPDX-License-Identifier: GPL-2.0-or-later */
2/* Internal procfs definitions
3 *
4 * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved.
5 * Written by David Howells (dhowells@redhat.com)
6 */
7
8#include <linux/proc_fs.h>
9#include <linux/proc_ns.h>
10#include <linux/refcount.h>
11#include <linux/spinlock.h>
12#include <linux/atomic.h>
13#include <linux/binfmts.h>
14#include <linux/sched/coredump.h>
15#include <linux/sched/task.h>
16#include <linux/mm.h>
17
18struct ctl_table_header;
19struct mempolicy;
20
21/*
22 * This is not completely implemented yet. The idea is to
23 * create an in-memory tree (like the actual /proc filesystem
24 * tree) of these proc_dir_entries, so that we can dynamically
25 * add new files to /proc.
26 *
27 * parent/subdir are used for the directory structure (every /proc file has a
28 * parent, but "subdir" is empty for all non-directory entries).
29 * subdir_node is used to build the rb tree "subdir" of the parent.
30 */
31struct proc_dir_entry {
32 /*
33 * number of callers into module in progress;
34 * negative -> it's going away RSN
35 */
36 atomic_t in_use;
37 refcount_t refcnt;
38 struct list_head pde_openers; /* who did ->open, but not ->release */
39 /* protects ->pde_openers and all struct pde_opener instances */
40 spinlock_t pde_unload_lock;
41 struct completion *pde_unload_completion;
42 const struct inode_operations *proc_iops;
43 union {
44 const struct proc_ops *proc_ops;
45 const struct file_operations *proc_dir_ops;
46 };
47 union {
48 const struct seq_operations *seq_ops;
49 int (*single_show)(struct seq_file *, void *);
50 };
51 proc_write_t write;
52 void *data;
53 unsigned int state_size;
54 unsigned int low_ino;
55 nlink_t nlink;
56 kuid_t uid;
57 kgid_t gid;
58 loff_t size;
59 struct proc_dir_entry *parent;
60 struct rb_root subdir;
61 struct rb_node subdir_node;
62 char *name;
63 umode_t mode;
64 u8 flags;
65 u8 namelen;
66 char inline_name[];
67} __randomize_layout;
68
69#define SIZEOF_PDE ( \
70 sizeof(struct proc_dir_entry) < 128 ? 128 : \
71 sizeof(struct proc_dir_entry) < 192 ? 192 : \
72 sizeof(struct proc_dir_entry) < 256 ? 256 : \
73 sizeof(struct proc_dir_entry) < 512 ? 512 : \
74 0)
75#define SIZEOF_PDE_INLINE_NAME (SIZEOF_PDE - sizeof(struct proc_dir_entry))
76
77static inline bool pde_is_permanent(const struct proc_dir_entry *pde)
78{
79 return pde->flags & PROC_ENTRY_PERMANENT;
80}
81
82static inline void pde_make_permanent(struct proc_dir_entry *pde)
83{
84 pde->flags |= PROC_ENTRY_PERMANENT;
85}
86
87static inline bool pde_has_proc_read_iter(const struct proc_dir_entry *pde)
88{
89 return pde->flags & PROC_ENTRY_proc_read_iter;
90}
91
92static inline bool pde_has_proc_compat_ioctl(const struct proc_dir_entry *pde)
93{
94#ifdef CONFIG_COMPAT
95 return pde->flags & PROC_ENTRY_proc_compat_ioctl;
96#else
97 return false;
98#endif
99}
100
101static inline bool pde_has_proc_lseek(const struct proc_dir_entry *pde)
102{
103 return pde->flags & PROC_ENTRY_proc_lseek;
104}
105
106extern struct kmem_cache *proc_dir_entry_cache;
107void pde_free(struct proc_dir_entry *pde);
108
109union proc_op {
110 int (*proc_get_link)(struct dentry *, struct path *);
111 int (*proc_show)(struct seq_file *m,
112 struct pid_namespace *ns, struct pid *pid,
113 struct task_struct *task);
114 int lsmid;
115};
116
117struct proc_inode {
118 struct pid *pid;
119 unsigned int fd;
120 union proc_op op;
121 struct proc_dir_entry *pde;
122 struct ctl_table_header *sysctl;
123 const struct ctl_table *sysctl_entry;
124 struct hlist_node sibling_inodes;
125 const struct proc_ns_operations *ns_ops;
126 struct inode vfs_inode;
127} __randomize_layout;
128
129/*
130 * General functions
131 */
132static inline struct proc_inode *PROC_I(const struct inode *inode)
133{
134 return container_of(inode, struct proc_inode, vfs_inode);
135}
136
137static inline struct proc_dir_entry *PDE(const struct inode *inode)
138{
139 return PROC_I(inode)->pde;
140}
141
142static inline struct pid *proc_pid(const struct inode *inode)
143{
144 return PROC_I(inode)->pid;
145}
146
147static inline struct task_struct *get_proc_task(const struct inode *inode)
148{
149 return get_pid_task(pid: proc_pid(inode), PIDTYPE_PID);
150}
151
152void task_dump_owner(struct task_struct *task, umode_t mode,
153 kuid_t *ruid, kgid_t *rgid);
154
155unsigned name_to_int(const struct qstr *qstr);
156/*
157 * Offset of the first process in the /proc root directory..
158 */
159#define FIRST_PROCESS_ENTRY 256
160
161/* Worst case buffer size needed for holding an integer. */
162#define PROC_NUMBUF 13
163
164#ifdef CONFIG_PAGE_MAPCOUNT
165/**
166 * folio_precise_page_mapcount() - Number of mappings of this folio page.
167 * @folio: The folio.
168 * @page: The page.
169 *
170 * The number of present user page table entries that reference this page
171 * as tracked via the RMAP: either referenced directly (PTE) or as part of
172 * a larger area that covers this page (e.g., PMD).
173 *
174 * Use this function only for the calculation of existing statistics
175 * (USS, PSS, mapcount_max) and for debugging purposes (/proc/kpagecount).
176 *
177 * Do not add new users.
178 *
179 * Returns: The number of mappings of this folio page. 0 for
180 * folios that are not mapped to user space or are not tracked via the RMAP
181 * (e.g., shared zeropage).
182 */
183static inline int folio_precise_page_mapcount(struct folio *folio,
184 struct page *page)
185{
186 int mapcount = atomic_read(v: &page->_mapcount) + 1;
187
188 if (page_mapcount_is_type(mapcount))
189 mapcount = 0;
190 if (folio_test_large(folio))
191 mapcount += folio_entire_mapcount(folio);
192
193 return mapcount;
194}
195#else /* !CONFIG_PAGE_MAPCOUNT */
196static inline int folio_precise_page_mapcount(struct folio *folio,
197 struct page *page)
198{
199 BUILD_BUG();
200}
201#endif /* CONFIG_PAGE_MAPCOUNT */
202
203/**
204 * folio_average_page_mapcount() - Average number of mappings per page in this
205 * folio
206 * @folio: The folio.
207 *
208 * The average number of user page table entries that reference each page in
209 * this folio as tracked via the RMAP: either referenced directly (PTE) or
210 * as part of a larger area that covers this page (e.g., PMD).
211 *
212 * The average is calculated by rounding to the nearest integer; however,
213 * to avoid duplicated code in current callers, the average is at least
214 * 1 if any page of the folio is mapped.
215 *
216 * Returns: The average number of mappings per page in this folio.
217 */
218static inline int folio_average_page_mapcount(struct folio *folio)
219{
220 int mapcount, entire_mapcount, avg;
221
222 if (!folio_test_large(folio))
223 return atomic_read(v: &folio->_mapcount) + 1;
224
225 mapcount = folio_large_mapcount(folio);
226 if (unlikely(mapcount <= 0))
227 return 0;
228 entire_mapcount = folio_entire_mapcount(folio);
229 if (mapcount <= entire_mapcount)
230 return entire_mapcount;
231 mapcount -= entire_mapcount;
232
233 /* Round to closest integer ... */
234 avg = ((unsigned int)mapcount + folio_large_nr_pages(folio) / 2) >> folio_large_order(folio);
235 /* ... but return at least 1. */
236 return max_t(int, avg + entire_mapcount, 1);
237}
238/*
239 * array.c
240 */
241extern const struct file_operations proc_tid_children_operations;
242
243extern void proc_task_name(struct seq_file *m, struct task_struct *p,
244 bool escape);
245extern int proc_tid_stat(struct seq_file *, struct pid_namespace *,
246 struct pid *, struct task_struct *);
247extern int proc_tgid_stat(struct seq_file *, struct pid_namespace *,
248 struct pid *, struct task_struct *);
249extern int proc_pid_status(struct seq_file *, struct pid_namespace *,
250 struct pid *, struct task_struct *);
251extern int proc_pid_statm(struct seq_file *, struct pid_namespace *,
252 struct pid *, struct task_struct *);
253
254/*
255 * base.c
256 */
257extern const struct dentry_operations pid_dentry_operations;
258extern int pid_getattr(struct mnt_idmap *, const struct path *,
259 struct kstat *, u32, unsigned int);
260extern int proc_setattr(struct mnt_idmap *, struct dentry *,
261 struct iattr *);
262extern void proc_pid_evict_inode(struct proc_inode *);
263extern struct inode *proc_pid_make_inode(struct super_block *, struct task_struct *, umode_t);
264extern void pid_update_inode(struct task_struct *, struct inode *);
265extern int pid_delete_dentry(const struct dentry *);
266extern int proc_pid_readdir(struct file *, struct dir_context *);
267struct dentry *proc_pid_lookup(struct dentry *, unsigned int);
268extern loff_t mem_lseek(struct file *, loff_t, int);
269
270/* Lookups */
271typedef struct dentry *instantiate_t(struct dentry *,
272 struct task_struct *, const void *);
273bool proc_fill_cache(struct file *, struct dir_context *, const char *, unsigned int,
274 instantiate_t, struct task_struct *, const void *);
275
276/*
277 * generic.c
278 */
279struct proc_dir_entry *proc_create_reg(const char *name, umode_t mode,
280 struct proc_dir_entry **parent, void *data);
281struct proc_dir_entry *proc_register(struct proc_dir_entry *dir,
282 struct proc_dir_entry *dp);
283extern struct dentry *proc_lookup(struct inode *, struct dentry *, unsigned int);
284struct dentry *proc_lookup_de(struct inode *, struct dentry *, struct proc_dir_entry *);
285extern int proc_readdir(struct file *, struct dir_context *);
286int proc_readdir_de(struct file *, struct dir_context *, struct proc_dir_entry *);
287
288static inline void pde_get(struct proc_dir_entry *pde)
289{
290 refcount_inc(r: &pde->refcnt);
291}
292extern void pde_put(struct proc_dir_entry *);
293
294static inline bool is_empty_pde(const struct proc_dir_entry *pde)
295{
296 return S_ISDIR(pde->mode) && !pde->proc_iops;
297}
298extern ssize_t proc_simple_write(struct file *, const char __user *, size_t, loff_t *);
299
300/*
301 * inode.c
302 */
303struct pde_opener {
304 struct list_head lh;
305 struct file *file;
306 bool closing;
307 struct completion *c;
308} __randomize_layout;
309extern const struct inode_operations proc_link_inode_operations;
310extern const struct inode_operations proc_pid_link_inode_operations;
311extern const struct super_operations proc_sops;
312
313void proc_init_kmemcache(void);
314void proc_invalidate_siblings_dcache(struct hlist_head *inodes, spinlock_t *lock);
315void set_proc_pid_nlink(void);
316extern struct inode *proc_get_inode(struct super_block *, struct proc_dir_entry *);
317extern void proc_entry_rundown(struct proc_dir_entry *);
318
319/*
320 * proc_namespaces.c
321 */
322extern const struct inode_operations proc_ns_dir_inode_operations;
323extern const struct file_operations proc_ns_dir_operations;
324
325/*
326 * proc_net.c
327 */
328extern const struct file_operations proc_net_operations;
329extern const struct inode_operations proc_net_inode_operations;
330
331#ifdef CONFIG_NET
332extern int proc_net_init(void);
333#else
334static inline int proc_net_init(void) { return 0; }
335#endif
336
337/*
338 * proc_self.c
339 */
340extern int proc_setup_self(struct super_block *);
341
342/*
343 * proc_thread_self.c
344 */
345extern int proc_setup_thread_self(struct super_block *);
346extern void proc_thread_self_init(void);
347
348/*
349 * proc_sysctl.c
350 */
351#ifdef CONFIG_PROC_SYSCTL
352extern int proc_sys_init(void);
353extern void proc_sys_evict_inode(struct inode *inode,
354 struct ctl_table_header *head);
355#else
356static inline void proc_sys_init(void) { }
357static inline void proc_sys_evict_inode(struct inode *inode,
358 struct ctl_table_header *head) { }
359#endif
360
361/*
362 * proc_tty.c
363 */
364#ifdef CONFIG_TTY
365extern void proc_tty_init(void);
366#else
367static inline void proc_tty_init(void) {}
368#endif
369
370/*
371 * root.c
372 */
373extern struct proc_dir_entry proc_root;
374
375extern void proc_self_init(void);
376
377/*
378 * task_[no]mmu.c
379 */
380struct mem_size_stats;
381
382struct proc_maps_locking_ctx {
383 struct mm_struct *mm;
384#ifdef CONFIG_PER_VMA_LOCK
385 bool mmap_locked;
386 struct vm_area_struct *locked_vma;
387#endif
388};
389
390struct proc_maps_private {
391 struct inode *inode;
392 struct task_struct *task;
393 struct vma_iterator iter;
394 loff_t last_pos;
395 struct proc_maps_locking_ctx lock_ctx;
396#ifdef CONFIG_NUMA
397 struct mempolicy *task_mempolicy;
398#endif
399} __randomize_layout;
400
401struct mm_struct *proc_mem_open(struct inode *inode, unsigned int mode);
402
403extern const struct file_operations proc_pid_maps_operations;
404extern const struct file_operations proc_pid_numa_maps_operations;
405extern const struct file_operations proc_pid_smaps_operations;
406extern const struct file_operations proc_pid_smaps_rollup_operations;
407extern const struct file_operations proc_clear_refs_operations;
408extern const struct file_operations proc_pagemap_operations;
409
410extern unsigned long task_vsize(struct mm_struct *);
411extern unsigned long task_statm(struct mm_struct *,
412 unsigned long *, unsigned long *,
413 unsigned long *, unsigned long *);
414extern void task_mem(struct seq_file *, struct mm_struct *);
415
416extern const struct dentry_operations proc_net_dentry_ops;
417static inline void pde_force_lookup(struct proc_dir_entry *pde)
418{
419 /* /proc/net/ entries can be changed under us by setns(CLONE_NEWNET) */
420 pde->flags |= PROC_ENTRY_FORCE_LOOKUP;
421}
422
423/*
424 * Add a new procfs dentry that can't serve as a mountpoint. That should
425 * encompass anything that is ephemeral and can just disappear while the
426 * process is still around.
427 */
428static inline struct dentry *proc_splice_unmountable(struct inode *inode,
429 struct dentry *dentry, const struct dentry_operations *d_ops)
430{
431 dont_mount(dentry);
432 return d_splice_alias_ops(inode, dentry, d_ops);
433}
434