| 1 | // SPDX-License-Identifier: GPL-2.0-only | 
|---|
| 2 | /* | 
|---|
| 3 | * Generic pidhash and scalable, time-bounded PID allocator | 
|---|
| 4 | * | 
|---|
| 5 | * (C) 2002-2003 Nadia Yvette Chambers, IBM | 
|---|
| 6 | * (C) 2004 Nadia Yvette Chambers, Oracle | 
|---|
| 7 | * (C) 2002-2004 Ingo Molnar, Red Hat | 
|---|
| 8 | * | 
|---|
| 9 | * pid-structures are backing objects for tasks sharing a given ID to chain | 
|---|
| 10 | * against. There is very little to them aside from hashing them and | 
|---|
| 11 | * parking tasks using given ID's on a list. | 
|---|
| 12 | * | 
|---|
| 13 | * The hash is always changed with the tasklist_lock write-acquired, | 
|---|
| 14 | * and the hash is only accessed with the tasklist_lock at least | 
|---|
| 15 | * read-acquired, so there's no additional SMP locking needed here. | 
|---|
| 16 | * | 
|---|
| 17 | * We have a list of bitmap pages, which bitmaps represent the PID space. | 
|---|
| 18 | * Allocating and freeing PIDs is completely lockless. The worst-case | 
|---|
| 19 | * allocation scenario when all but one out of 1 million PIDs possible are | 
|---|
| 20 | * allocated already: the scanning of 32 list entries and at most PAGE_SIZE | 
|---|
| 21 | * bytes. The typical fastpath is a single successful setbit. Freeing is O(1). | 
|---|
| 22 | * | 
|---|
| 23 | * Pid namespaces: | 
|---|
| 24 | *    (C) 2007 Pavel Emelyanov <xemul@openvz.org>, OpenVZ, SWsoft Inc. | 
|---|
| 25 | *    (C) 2007 Sukadev Bhattiprolu <sukadev@us.ibm.com>, IBM | 
|---|
| 26 | *     Many thanks to Oleg Nesterov for comments and help | 
|---|
| 27 | * | 
|---|
| 28 | */ | 
|---|
| 29 |  | 
|---|
| 30 | #include <linux/mm.h> | 
|---|
| 31 | #include <linux/export.h> | 
|---|
| 32 | #include <linux/slab.h> | 
|---|
| 33 | #include <linux/init.h> | 
|---|
| 34 | #include <linux/rculist.h> | 
|---|
| 35 | #include <linux/memblock.h> | 
|---|
| 36 | #include <linux/pid_namespace.h> | 
|---|
| 37 | #include <linux/init_task.h> | 
|---|
| 38 | #include <linux/syscalls.h> | 
|---|
| 39 | #include <linux/proc_ns.h> | 
|---|
| 40 | #include <linux/refcount.h> | 
|---|
| 41 | #include <linux/anon_inodes.h> | 
|---|
| 42 | #include <linux/sched/signal.h> | 
|---|
| 43 | #include <linux/sched/task.h> | 
|---|
| 44 | #include <linux/idr.h> | 
|---|
| 45 | #include <linux/pidfs.h> | 
|---|
| 46 | #include <linux/seqlock.h> | 
|---|
| 47 | #include <net/sock.h> | 
|---|
| 48 | #include <uapi/linux/pidfd.h> | 
|---|
| 49 |  | 
|---|
| 50 | struct pid init_struct_pid = { | 
|---|
| 51 | .count		= REFCOUNT_INIT(1), | 
|---|
| 52 | .tasks		= { | 
|---|
| 53 | { .first = NULL }, | 
|---|
| 54 | { .first = NULL }, | 
|---|
| 55 | { .first = NULL }, | 
|---|
| 56 | }, | 
|---|
| 57 | .level		= 0, | 
|---|
| 58 | .numbers	= { { | 
|---|
| 59 | .nr		= 0, | 
|---|
| 60 | .ns		= &init_pid_ns, | 
|---|
| 61 | }, } | 
|---|
| 62 | }; | 
|---|
| 63 |  | 
|---|
| 64 | static int pid_max_min = RESERVED_PIDS + 1; | 
|---|
| 65 | static int pid_max_max = PID_MAX_LIMIT; | 
|---|
| 66 |  | 
|---|
| 67 | /* | 
|---|
| 68 | * PID-map pages start out as NULL, they get allocated upon | 
|---|
| 69 | * first use and are never deallocated. This way a low pid_max | 
|---|
| 70 | * value does not cause lots of bitmaps to be allocated, but | 
|---|
| 71 | * the scheme scales to up to 4 million PIDs, runtime. | 
|---|
| 72 | */ | 
|---|
| 73 | struct pid_namespace init_pid_ns = { | 
|---|
| 74 | .ns.__ns_ref = REFCOUNT_INIT(2), | 
|---|
| 75 | .idr = IDR_INIT(init_pid_ns.idr), | 
|---|
| 76 | .pid_allocated = PIDNS_ADDING, | 
|---|
| 77 | .level = 0, | 
|---|
| 78 | .child_reaper = &init_task, | 
|---|
| 79 | .user_ns = &init_user_ns, | 
|---|
| 80 | .ns.inum = ns_init_inum(&init_pid_ns), | 
|---|
| 81 | #ifdef CONFIG_PID_NS | 
|---|
| 82 | .ns.ops = &pidns_operations, | 
|---|
| 83 | #endif | 
|---|
| 84 | .pid_max = PID_MAX_DEFAULT, | 
|---|
| 85 | #if defined(CONFIG_SYSCTL) && defined(CONFIG_MEMFD_CREATE) | 
|---|
| 86 | .memfd_noexec_scope = MEMFD_NOEXEC_SCOPE_EXEC, | 
|---|
| 87 | #endif | 
|---|
| 88 | .ns.ns_type = ns_common_type(&init_pid_ns), | 
|---|
| 89 | }; | 
|---|
| 90 | EXPORT_SYMBOL_GPL(init_pid_ns); | 
|---|
| 91 |  | 
|---|
| 92 | static  __cacheline_aligned_in_smp DEFINE_SPINLOCK(pidmap_lock); | 
|---|
| 93 | seqcount_spinlock_t pidmap_lock_seq = SEQCNT_SPINLOCK_ZERO(pidmap_lock_seq, &pidmap_lock); | 
|---|
| 94 |  | 
|---|
| 95 | void put_pid(struct pid *pid) | 
|---|
| 96 | { | 
|---|
| 97 | struct pid_namespace *ns; | 
|---|
| 98 |  | 
|---|
| 99 | if (!pid) | 
|---|
| 100 | return; | 
|---|
| 101 |  | 
|---|
| 102 | ns = pid->numbers[pid->level].ns; | 
|---|
| 103 | if (refcount_dec_and_test(r: &pid->count)) { | 
|---|
| 104 | pidfs_free_pid(pid); | 
|---|
| 105 | kmem_cache_free(s: ns->pid_cachep, objp: pid); | 
|---|
| 106 | put_pid_ns(ns); | 
|---|
| 107 | } | 
|---|
| 108 | } | 
|---|
| 109 | EXPORT_SYMBOL_GPL(put_pid); | 
|---|
| 110 |  | 
|---|
| 111 | static void delayed_put_pid(struct rcu_head *rhp) | 
|---|
| 112 | { | 
|---|
| 113 | struct pid *pid = container_of(rhp, struct pid, rcu); | 
|---|
| 114 | put_pid(pid); | 
|---|
| 115 | } | 
|---|
| 116 |  | 
|---|
| 117 | void free_pid(struct pid *pid) | 
|---|
| 118 | { | 
|---|
| 119 | int i; | 
|---|
| 120 |  | 
|---|
| 121 | lockdep_assert_not_held(&tasklist_lock); | 
|---|
| 122 |  | 
|---|
| 123 | spin_lock(lock: &pidmap_lock); | 
|---|
| 124 | for (i = 0; i <= pid->level; i++) { | 
|---|
| 125 | struct upid *upid = pid->numbers + i; | 
|---|
| 126 | struct pid_namespace *ns = upid->ns; | 
|---|
| 127 | switch (--ns->pid_allocated) { | 
|---|
| 128 | case 2: | 
|---|
| 129 | case 1: | 
|---|
| 130 | /* When all that is left in the pid namespace | 
|---|
| 131 | * is the reaper wake up the reaper.  The reaper | 
|---|
| 132 | * may be sleeping in zap_pid_ns_processes(). | 
|---|
| 133 | */ | 
|---|
| 134 | wake_up_process(tsk: ns->child_reaper); | 
|---|
| 135 | break; | 
|---|
| 136 | case PIDNS_ADDING: | 
|---|
| 137 | /* Handle a fork failure of the first process */ | 
|---|
| 138 | WARN_ON(ns->child_reaper); | 
|---|
| 139 | ns->pid_allocated = 0; | 
|---|
| 140 | break; | 
|---|
| 141 | } | 
|---|
| 142 |  | 
|---|
| 143 | idr_remove(&ns->idr, id: upid->nr); | 
|---|
| 144 | } | 
|---|
| 145 | pidfs_remove_pid(pid); | 
|---|
| 146 | spin_unlock(lock: &pidmap_lock); | 
|---|
| 147 |  | 
|---|
| 148 | call_rcu(head: &pid->rcu, func: delayed_put_pid); | 
|---|
| 149 | } | 
|---|
| 150 |  | 
|---|
| 151 | void free_pids(struct pid **pids) | 
|---|
| 152 | { | 
|---|
| 153 | int tmp; | 
|---|
| 154 |  | 
|---|
| 155 | /* | 
|---|
| 156 | * This can batch pidmap_lock. | 
|---|
| 157 | */ | 
|---|
| 158 | for (tmp = PIDTYPE_MAX; --tmp >= 0; ) | 
|---|
| 159 | if (pids[tmp]) | 
|---|
| 160 | free_pid(pid: pids[tmp]); | 
|---|
| 161 | } | 
|---|
| 162 |  | 
|---|
| 163 | struct pid *alloc_pid(struct pid_namespace *ns, pid_t *set_tid, | 
|---|
| 164 | size_t set_tid_size) | 
|---|
| 165 | { | 
|---|
| 166 | struct pid *pid; | 
|---|
| 167 | enum pid_type type; | 
|---|
| 168 | int i, nr; | 
|---|
| 169 | struct pid_namespace *tmp; | 
|---|
| 170 | struct upid *upid; | 
|---|
| 171 | int retval = -ENOMEM; | 
|---|
| 172 |  | 
|---|
| 173 | /* | 
|---|
| 174 | * set_tid_size contains the size of the set_tid array. Starting at | 
|---|
| 175 | * the most nested currently active PID namespace it tells alloc_pid() | 
|---|
| 176 | * which PID to set for a process in that most nested PID namespace | 
|---|
| 177 | * up to set_tid_size PID namespaces. It does not have to set the PID | 
|---|
| 178 | * for a process in all nested PID namespaces but set_tid_size must | 
|---|
| 179 | * never be greater than the current ns->level + 1. | 
|---|
| 180 | */ | 
|---|
| 181 | if (set_tid_size > ns->level + 1) | 
|---|
| 182 | return ERR_PTR(error: -EINVAL); | 
|---|
| 183 |  | 
|---|
| 184 | pid = kmem_cache_alloc(ns->pid_cachep, GFP_KERNEL); | 
|---|
| 185 | if (!pid) | 
|---|
| 186 | return ERR_PTR(error: retval); | 
|---|
| 187 |  | 
|---|
| 188 | tmp = ns; | 
|---|
| 189 | pid->level = ns->level; | 
|---|
| 190 |  | 
|---|
| 191 | for (i = ns->level; i >= 0; i--) { | 
|---|
| 192 | int tid = 0; | 
|---|
| 193 | int pid_max = READ_ONCE(tmp->pid_max); | 
|---|
| 194 |  | 
|---|
| 195 | if (set_tid_size) { | 
|---|
| 196 | tid = set_tid[ns->level - i]; | 
|---|
| 197 |  | 
|---|
| 198 | retval = -EINVAL; | 
|---|
| 199 | if (tid < 1 || tid >= pid_max) | 
|---|
| 200 | goto out_free; | 
|---|
| 201 | /* | 
|---|
| 202 | * Also fail if a PID != 1 is requested and | 
|---|
| 203 | * no PID 1 exists. | 
|---|
| 204 | */ | 
|---|
| 205 | if (tid != 1 && !tmp->child_reaper) | 
|---|
| 206 | goto out_free; | 
|---|
| 207 | retval = -EPERM; | 
|---|
| 208 | if (!checkpoint_restore_ns_capable(ns: tmp->user_ns)) | 
|---|
| 209 | goto out_free; | 
|---|
| 210 | set_tid_size--; | 
|---|
| 211 | } | 
|---|
| 212 |  | 
|---|
| 213 | idr_preload(GFP_KERNEL); | 
|---|
| 214 | spin_lock(lock: &pidmap_lock); | 
|---|
| 215 |  | 
|---|
| 216 | if (tid) { | 
|---|
| 217 | nr = idr_alloc(&tmp->idr, NULL, start: tid, | 
|---|
| 218 | end: tid + 1, GFP_ATOMIC); | 
|---|
| 219 | /* | 
|---|
| 220 | * If ENOSPC is returned it means that the PID is | 
|---|
| 221 | * alreay in use. Return EEXIST in that case. | 
|---|
| 222 | */ | 
|---|
| 223 | if (nr == -ENOSPC) | 
|---|
| 224 | nr = -EEXIST; | 
|---|
| 225 | } else { | 
|---|
| 226 | int pid_min = 1; | 
|---|
| 227 | /* | 
|---|
| 228 | * init really needs pid 1, but after reaching the | 
|---|
| 229 | * maximum wrap back to RESERVED_PIDS | 
|---|
| 230 | */ | 
|---|
| 231 | if (idr_get_cursor(idr: &tmp->idr) > RESERVED_PIDS) | 
|---|
| 232 | pid_min = RESERVED_PIDS; | 
|---|
| 233 |  | 
|---|
| 234 | /* | 
|---|
| 235 | * Store a null pointer so find_pid_ns does not find | 
|---|
| 236 | * a partially initialized PID (see below). | 
|---|
| 237 | */ | 
|---|
| 238 | nr = idr_alloc_cyclic(&tmp->idr, NULL, start: pid_min, | 
|---|
| 239 | end: pid_max, GFP_ATOMIC); | 
|---|
| 240 | } | 
|---|
| 241 | spin_unlock(lock: &pidmap_lock); | 
|---|
| 242 | idr_preload_end(); | 
|---|
| 243 |  | 
|---|
| 244 | if (nr < 0) { | 
|---|
| 245 | retval = (nr == -ENOSPC) ? -EAGAIN : nr; | 
|---|
| 246 | goto out_free; | 
|---|
| 247 | } | 
|---|
| 248 |  | 
|---|
| 249 | pid->numbers[i].nr = nr; | 
|---|
| 250 | pid->numbers[i].ns = tmp; | 
|---|
| 251 | tmp = tmp->parent; | 
|---|
| 252 | } | 
|---|
| 253 |  | 
|---|
| 254 | /* | 
|---|
| 255 | * ENOMEM is not the most obvious choice especially for the case | 
|---|
| 256 | * where the child subreaper has already exited and the pid | 
|---|
| 257 | * namespace denies the creation of any new processes. But ENOMEM | 
|---|
| 258 | * is what we have exposed to userspace for a long time and it is | 
|---|
| 259 | * documented behavior for pid namespaces. So we can't easily | 
|---|
| 260 | * change it even if there were an error code better suited. | 
|---|
| 261 | */ | 
|---|
| 262 | retval = -ENOMEM; | 
|---|
| 263 |  | 
|---|
| 264 | get_pid_ns(ns); | 
|---|
| 265 | refcount_set(r: &pid->count, n: 1); | 
|---|
| 266 | spin_lock_init(&pid->lock); | 
|---|
| 267 | for (type = 0; type < PIDTYPE_MAX; ++type) | 
|---|
| 268 | INIT_HLIST_HEAD(&pid->tasks[type]); | 
|---|
| 269 |  | 
|---|
| 270 | init_waitqueue_head(&pid->wait_pidfd); | 
|---|
| 271 | INIT_HLIST_HEAD(&pid->inodes); | 
|---|
| 272 |  | 
|---|
| 273 | upid = pid->numbers + ns->level; | 
|---|
| 274 | idr_preload(GFP_KERNEL); | 
|---|
| 275 | spin_lock(lock: &pidmap_lock); | 
|---|
| 276 | if (!(ns->pid_allocated & PIDNS_ADDING)) | 
|---|
| 277 | goto out_unlock; | 
|---|
| 278 | pidfs_add_pid(pid); | 
|---|
| 279 | for ( ; upid >= pid->numbers; --upid) { | 
|---|
| 280 | /* Make the PID visible to find_pid_ns. */ | 
|---|
| 281 | idr_replace(&upid->ns->idr, pid, id: upid->nr); | 
|---|
| 282 | upid->ns->pid_allocated++; | 
|---|
| 283 | } | 
|---|
| 284 | spin_unlock(lock: &pidmap_lock); | 
|---|
| 285 | idr_preload_end(); | 
|---|
| 286 |  | 
|---|
| 287 | return pid; | 
|---|
| 288 |  | 
|---|
| 289 | out_unlock: | 
|---|
| 290 | spin_unlock(lock: &pidmap_lock); | 
|---|
| 291 | idr_preload_end(); | 
|---|
| 292 | put_pid_ns(ns); | 
|---|
| 293 |  | 
|---|
| 294 | out_free: | 
|---|
| 295 | spin_lock(lock: &pidmap_lock); | 
|---|
| 296 | while (++i <= ns->level) { | 
|---|
| 297 | upid = pid->numbers + i; | 
|---|
| 298 | idr_remove(&upid->ns->idr, id: upid->nr); | 
|---|
| 299 | } | 
|---|
| 300 |  | 
|---|
| 301 | /* On failure to allocate the first pid, reset the state */ | 
|---|
| 302 | if (ns->pid_allocated == PIDNS_ADDING) | 
|---|
| 303 | idr_set_cursor(idr: &ns->idr, val: 0); | 
|---|
| 304 |  | 
|---|
| 305 | spin_unlock(lock: &pidmap_lock); | 
|---|
| 306 |  | 
|---|
| 307 | kmem_cache_free(s: ns->pid_cachep, objp: pid); | 
|---|
| 308 | return ERR_PTR(error: retval); | 
|---|
| 309 | } | 
|---|
| 310 |  | 
|---|
| 311 | void disable_pid_allocation(struct pid_namespace *ns) | 
|---|
| 312 | { | 
|---|
| 313 | spin_lock(lock: &pidmap_lock); | 
|---|
| 314 | ns->pid_allocated &= ~PIDNS_ADDING; | 
|---|
| 315 | spin_unlock(lock: &pidmap_lock); | 
|---|
| 316 | } | 
|---|
| 317 |  | 
|---|
| 318 | struct pid *find_pid_ns(int nr, struct pid_namespace *ns) | 
|---|
| 319 | { | 
|---|
| 320 | return idr_find(&ns->idr, id: nr); | 
|---|
| 321 | } | 
|---|
| 322 | EXPORT_SYMBOL_GPL(find_pid_ns); | 
|---|
| 323 |  | 
|---|
| 324 | struct pid *find_vpid(int nr) | 
|---|
| 325 | { | 
|---|
| 326 | return find_pid_ns(nr, task_active_pid_ns(current)); | 
|---|
| 327 | } | 
|---|
| 328 | EXPORT_SYMBOL_GPL(find_vpid); | 
|---|
| 329 |  | 
|---|
| 330 | static struct pid **task_pid_ptr(struct task_struct *task, enum pid_type type) | 
|---|
| 331 | { | 
|---|
| 332 | return (type == PIDTYPE_PID) ? | 
|---|
| 333 | &task->thread_pid : | 
|---|
| 334 | &task->signal->pids[type]; | 
|---|
| 335 | } | 
|---|
| 336 |  | 
|---|
| 337 | /* | 
|---|
| 338 | * attach_pid() must be called with the tasklist_lock write-held. | 
|---|
| 339 | */ | 
|---|
| 340 | void attach_pid(struct task_struct *task, enum pid_type type) | 
|---|
| 341 | { | 
|---|
| 342 | struct pid *pid; | 
|---|
| 343 |  | 
|---|
| 344 | lockdep_assert_held_write(&tasklist_lock); | 
|---|
| 345 |  | 
|---|
| 346 | pid = *task_pid_ptr(task, type); | 
|---|
| 347 | hlist_add_head_rcu(n: &task->pid_links[type], h: &pid->tasks[type]); | 
|---|
| 348 | } | 
|---|
| 349 |  | 
|---|
| 350 | static void __change_pid(struct pid **pids, struct task_struct *task, | 
|---|
| 351 | enum pid_type type, struct pid *new) | 
|---|
| 352 | { | 
|---|
| 353 | struct pid **pid_ptr, *pid; | 
|---|
| 354 | int tmp; | 
|---|
| 355 |  | 
|---|
| 356 | lockdep_assert_held_write(&tasklist_lock); | 
|---|
| 357 |  | 
|---|
| 358 | pid_ptr = task_pid_ptr(task, type); | 
|---|
| 359 | pid = *pid_ptr; | 
|---|
| 360 |  | 
|---|
| 361 | hlist_del_rcu(n: &task->pid_links[type]); | 
|---|
| 362 | *pid_ptr = new; | 
|---|
| 363 |  | 
|---|
| 364 | for (tmp = PIDTYPE_MAX; --tmp >= 0; ) | 
|---|
| 365 | if (pid_has_task(pid, type: tmp)) | 
|---|
| 366 | return; | 
|---|
| 367 |  | 
|---|
| 368 | WARN_ON(pids[type]); | 
|---|
| 369 | pids[type] = pid; | 
|---|
| 370 | } | 
|---|
| 371 |  | 
|---|
| 372 | void detach_pid(struct pid **pids, struct task_struct *task, enum pid_type type) | 
|---|
| 373 | { | 
|---|
| 374 | __change_pid(pids, task, type, NULL); | 
|---|
| 375 | } | 
|---|
| 376 |  | 
|---|
| 377 | void change_pid(struct pid **pids, struct task_struct *task, enum pid_type type, | 
|---|
| 378 | struct pid *pid) | 
|---|
| 379 | { | 
|---|
| 380 | __change_pid(pids, task, type, new: pid); | 
|---|
| 381 | attach_pid(task, type); | 
|---|
| 382 | } | 
|---|
| 383 |  | 
|---|
| 384 | void exchange_tids(struct task_struct *left, struct task_struct *right) | 
|---|
| 385 | { | 
|---|
| 386 | struct pid *pid1 = left->thread_pid; | 
|---|
| 387 | struct pid *pid2 = right->thread_pid; | 
|---|
| 388 | struct hlist_head *head1 = &pid1->tasks[PIDTYPE_PID]; | 
|---|
| 389 | struct hlist_head *head2 = &pid2->tasks[PIDTYPE_PID]; | 
|---|
| 390 |  | 
|---|
| 391 | lockdep_assert_held_write(&tasklist_lock); | 
|---|
| 392 |  | 
|---|
| 393 | /* Swap the single entry tid lists */ | 
|---|
| 394 | hlists_swap_heads_rcu(left: head1, right: head2); | 
|---|
| 395 |  | 
|---|
| 396 | /* Swap the per task_struct pid */ | 
|---|
| 397 | rcu_assign_pointer(left->thread_pid, pid2); | 
|---|
| 398 | rcu_assign_pointer(right->thread_pid, pid1); | 
|---|
| 399 |  | 
|---|
| 400 | /* Swap the cached value */ | 
|---|
| 401 | WRITE_ONCE(left->pid, pid_nr(pid2)); | 
|---|
| 402 | WRITE_ONCE(right->pid, pid_nr(pid1)); | 
|---|
| 403 | } | 
|---|
| 404 |  | 
|---|
| 405 | /* transfer_pid is an optimization of attach_pid(new), detach_pid(old) */ | 
|---|
| 406 | void transfer_pid(struct task_struct *old, struct task_struct *new, | 
|---|
| 407 | enum pid_type type) | 
|---|
| 408 | { | 
|---|
| 409 | WARN_ON_ONCE(type == PIDTYPE_PID); | 
|---|
| 410 | lockdep_assert_held_write(&tasklist_lock); | 
|---|
| 411 | hlist_replace_rcu(old: &old->pid_links[type], new: &new->pid_links[type]); | 
|---|
| 412 | } | 
|---|
| 413 |  | 
|---|
| 414 | struct task_struct *pid_task(struct pid *pid, enum pid_type type) | 
|---|
| 415 | { | 
|---|
| 416 | struct task_struct *result = NULL; | 
|---|
| 417 | if (pid) { | 
|---|
| 418 | struct hlist_node *first; | 
|---|
| 419 | first = rcu_dereference_check(hlist_first_rcu(&pid->tasks[type]), | 
|---|
| 420 | lockdep_tasklist_lock_is_held()); | 
|---|
| 421 | if (first) | 
|---|
| 422 | result = hlist_entry(first, struct task_struct, pid_links[(type)]); | 
|---|
| 423 | } | 
|---|
| 424 | return result; | 
|---|
| 425 | } | 
|---|
| 426 | EXPORT_SYMBOL(pid_task); | 
|---|
| 427 |  | 
|---|
| 428 | /* | 
|---|
| 429 | * Must be called under rcu_read_lock(). | 
|---|
| 430 | */ | 
|---|
| 431 | struct task_struct *find_task_by_pid_ns(pid_t nr, struct pid_namespace *ns) | 
|---|
| 432 | { | 
|---|
| 433 | RCU_LOCKDEP_WARN(!rcu_read_lock_held(), | 
|---|
| 434 | "find_task_by_pid_ns() needs rcu_read_lock() protection"); | 
|---|
| 435 | return pid_task(find_pid_ns(nr, ns), PIDTYPE_PID); | 
|---|
| 436 | } | 
|---|
| 437 |  | 
|---|
| 438 | struct task_struct *find_task_by_vpid(pid_t vnr) | 
|---|
| 439 | { | 
|---|
| 440 | return find_task_by_pid_ns(nr: vnr, ns: task_active_pid_ns(current)); | 
|---|
| 441 | } | 
|---|
| 442 |  | 
|---|
| 443 | struct task_struct *find_get_task_by_vpid(pid_t nr) | 
|---|
| 444 | { | 
|---|
| 445 | struct task_struct *task; | 
|---|
| 446 |  | 
|---|
| 447 | rcu_read_lock(); | 
|---|
| 448 | task = find_task_by_vpid(vnr: nr); | 
|---|
| 449 | if (task) | 
|---|
| 450 | get_task_struct(t: task); | 
|---|
| 451 | rcu_read_unlock(); | 
|---|
| 452 |  | 
|---|
| 453 | return task; | 
|---|
| 454 | } | 
|---|
| 455 |  | 
|---|
| 456 | struct pid *get_task_pid(struct task_struct *task, enum pid_type type) | 
|---|
| 457 | { | 
|---|
| 458 | struct pid *pid; | 
|---|
| 459 | rcu_read_lock(); | 
|---|
| 460 | pid = get_pid(rcu_dereference(*task_pid_ptr(task, type))); | 
|---|
| 461 | rcu_read_unlock(); | 
|---|
| 462 | return pid; | 
|---|
| 463 | } | 
|---|
| 464 | EXPORT_SYMBOL_GPL(get_task_pid); | 
|---|
| 465 |  | 
|---|
| 466 | struct task_struct *get_pid_task(struct pid *pid, enum pid_type type) | 
|---|
| 467 | { | 
|---|
| 468 | struct task_struct *result; | 
|---|
| 469 | rcu_read_lock(); | 
|---|
| 470 | result = pid_task(pid, type); | 
|---|
| 471 | if (result) | 
|---|
| 472 | get_task_struct(t: result); | 
|---|
| 473 | rcu_read_unlock(); | 
|---|
| 474 | return result; | 
|---|
| 475 | } | 
|---|
| 476 | EXPORT_SYMBOL_GPL(get_pid_task); | 
|---|
| 477 |  | 
|---|
| 478 | struct pid *find_get_pid(pid_t nr) | 
|---|
| 479 | { | 
|---|
| 480 | struct pid *pid; | 
|---|
| 481 |  | 
|---|
| 482 | rcu_read_lock(); | 
|---|
| 483 | pid = get_pid(pid: find_vpid(nr)); | 
|---|
| 484 | rcu_read_unlock(); | 
|---|
| 485 |  | 
|---|
| 486 | return pid; | 
|---|
| 487 | } | 
|---|
| 488 | EXPORT_SYMBOL_GPL(find_get_pid); | 
|---|
| 489 |  | 
|---|
| 490 | pid_t pid_nr_ns(struct pid *pid, struct pid_namespace *ns) | 
|---|
| 491 | { | 
|---|
| 492 | struct upid *upid; | 
|---|
| 493 | pid_t nr = 0; | 
|---|
| 494 |  | 
|---|
| 495 | if (pid && ns && ns->level <= pid->level) { | 
|---|
| 496 | upid = &pid->numbers[ns->level]; | 
|---|
| 497 | if (upid->ns == ns) | 
|---|
| 498 | nr = upid->nr; | 
|---|
| 499 | } | 
|---|
| 500 | return nr; | 
|---|
| 501 | } | 
|---|
| 502 | EXPORT_SYMBOL_GPL(pid_nr_ns); | 
|---|
| 503 |  | 
|---|
| 504 | pid_t pid_vnr(struct pid *pid) | 
|---|
| 505 | { | 
|---|
| 506 | return pid_nr_ns(pid, task_active_pid_ns(current)); | 
|---|
| 507 | } | 
|---|
| 508 | EXPORT_SYMBOL_GPL(pid_vnr); | 
|---|
| 509 |  | 
|---|
| 510 | pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type, | 
|---|
| 511 | struct pid_namespace *ns) | 
|---|
| 512 | { | 
|---|
| 513 | pid_t nr = 0; | 
|---|
| 514 |  | 
|---|
| 515 | rcu_read_lock(); | 
|---|
| 516 | if (!ns) | 
|---|
| 517 | ns = task_active_pid_ns(current); | 
|---|
| 518 | if (ns) | 
|---|
| 519 | nr = pid_nr_ns(rcu_dereference(*task_pid_ptr(task, type)), ns); | 
|---|
| 520 | rcu_read_unlock(); | 
|---|
| 521 |  | 
|---|
| 522 | return nr; | 
|---|
| 523 | } | 
|---|
| 524 | EXPORT_SYMBOL(__task_pid_nr_ns); | 
|---|
| 525 |  | 
|---|
| 526 | struct pid_namespace *task_active_pid_ns(struct task_struct *tsk) | 
|---|
| 527 | { | 
|---|
| 528 | return ns_of_pid(pid: task_pid(task: tsk)); | 
|---|
| 529 | } | 
|---|
| 530 | EXPORT_SYMBOL_GPL(task_active_pid_ns); | 
|---|
| 531 |  | 
|---|
| 532 | /* | 
|---|
| 533 | * Used by proc to find the first pid that is greater than or equal to nr. | 
|---|
| 534 | * | 
|---|
| 535 | * If there is a pid at nr this function is exactly the same as find_pid_ns. | 
|---|
| 536 | */ | 
|---|
| 537 | struct pid *find_ge_pid(int nr, struct pid_namespace *ns) | 
|---|
| 538 | { | 
|---|
| 539 | return idr_get_next(&ns->idr, nextid: &nr); | 
|---|
| 540 | } | 
|---|
| 541 | EXPORT_SYMBOL_GPL(find_ge_pid); | 
|---|
| 542 |  | 
|---|
| 543 | struct pid *pidfd_get_pid(unsigned int fd, unsigned int *flags) | 
|---|
| 544 | { | 
|---|
| 545 | CLASS(fd, f)(fd); | 
|---|
| 546 | struct pid *pid; | 
|---|
| 547 |  | 
|---|
| 548 | if (fd_empty(f)) | 
|---|
| 549 | return ERR_PTR(error: -EBADF); | 
|---|
| 550 |  | 
|---|
| 551 | pid = pidfd_pid(fd_file(f)); | 
|---|
| 552 | if (!IS_ERR(ptr: pid)) { | 
|---|
| 553 | get_pid(pid); | 
|---|
| 554 | *flags = fd_file(f)->f_flags; | 
|---|
| 555 | } | 
|---|
| 556 | return pid; | 
|---|
| 557 | } | 
|---|
| 558 |  | 
|---|
| 559 | /** | 
|---|
| 560 | * pidfd_get_task() - Get the task associated with a pidfd | 
|---|
| 561 | * | 
|---|
| 562 | * @pidfd: pidfd for which to get the task | 
|---|
| 563 | * @flags: flags associated with this pidfd | 
|---|
| 564 | * | 
|---|
| 565 | * Return the task associated with @pidfd. The function takes a reference on | 
|---|
| 566 | * the returned task. The caller is responsible for releasing that reference. | 
|---|
| 567 | * | 
|---|
| 568 | * Return: On success, the task_struct associated with the pidfd. | 
|---|
| 569 | *	   On error, a negative errno number will be returned. | 
|---|
| 570 | */ | 
|---|
| 571 | struct task_struct *pidfd_get_task(int pidfd, unsigned int *flags) | 
|---|
| 572 | { | 
|---|
| 573 | unsigned int f_flags = 0; | 
|---|
| 574 | struct pid *pid; | 
|---|
| 575 | struct task_struct *task; | 
|---|
| 576 | enum pid_type type; | 
|---|
| 577 |  | 
|---|
| 578 | switch (pidfd) { | 
|---|
| 579 | case  PIDFD_SELF_THREAD: | 
|---|
| 580 | type = PIDTYPE_PID; | 
|---|
| 581 | pid = get_task_pid(current, type); | 
|---|
| 582 | break; | 
|---|
| 583 | case  PIDFD_SELF_THREAD_GROUP: | 
|---|
| 584 | type = PIDTYPE_TGID; | 
|---|
| 585 | pid = get_task_pid(current, type); | 
|---|
| 586 | break; | 
|---|
| 587 | default: | 
|---|
| 588 | pid = pidfd_get_pid(fd: pidfd, flags: &f_flags); | 
|---|
| 589 | if (IS_ERR(ptr: pid)) | 
|---|
| 590 | return ERR_CAST(ptr: pid); | 
|---|
| 591 | type = PIDTYPE_TGID; | 
|---|
| 592 | break; | 
|---|
| 593 | } | 
|---|
| 594 |  | 
|---|
| 595 | task = get_pid_task(pid, type); | 
|---|
| 596 | put_pid(pid); | 
|---|
| 597 | if (!task) | 
|---|
| 598 | return ERR_PTR(error: -ESRCH); | 
|---|
| 599 |  | 
|---|
| 600 | *flags = f_flags; | 
|---|
| 601 | return task; | 
|---|
| 602 | } | 
|---|
| 603 |  | 
|---|
| 604 | /** | 
|---|
| 605 | * pidfd_create() - Create a new pid file descriptor. | 
|---|
| 606 | * | 
|---|
| 607 | * @pid:   struct pid that the pidfd will reference | 
|---|
| 608 | * @flags: flags to pass | 
|---|
| 609 | * | 
|---|
| 610 | * This creates a new pid file descriptor with the O_CLOEXEC flag set. | 
|---|
| 611 | * | 
|---|
| 612 | * Note, that this function can only be called after the fd table has | 
|---|
| 613 | * been unshared to avoid leaking the pidfd to the new process. | 
|---|
| 614 | * | 
|---|
| 615 | * This symbol should not be explicitly exported to loadable modules. | 
|---|
| 616 | * | 
|---|
| 617 | * Return: On success, a cloexec pidfd is returned. | 
|---|
| 618 | *         On error, a negative errno number will be returned. | 
|---|
| 619 | */ | 
|---|
| 620 | static int pidfd_create(struct pid *pid, unsigned int flags) | 
|---|
| 621 | { | 
|---|
| 622 | int pidfd; | 
|---|
| 623 | struct file *pidfd_file; | 
|---|
| 624 |  | 
|---|
| 625 | pidfd = pidfd_prepare(pid, flags, ret_file: &pidfd_file); | 
|---|
| 626 | if (pidfd < 0) | 
|---|
| 627 | return pidfd; | 
|---|
| 628 |  | 
|---|
| 629 | fd_install(fd: pidfd, file: pidfd_file); | 
|---|
| 630 | return pidfd; | 
|---|
| 631 | } | 
|---|
| 632 |  | 
|---|
| 633 | /** | 
|---|
| 634 | * sys_pidfd_open() - Open new pid file descriptor. | 
|---|
| 635 | * | 
|---|
| 636 | * @pid:   pid for which to retrieve a pidfd | 
|---|
| 637 | * @flags: flags to pass | 
|---|
| 638 | * | 
|---|
| 639 | * This creates a new pid file descriptor with the O_CLOEXEC flag set for | 
|---|
| 640 | * the task identified by @pid. Without PIDFD_THREAD flag the target task | 
|---|
| 641 | * must be a thread-group leader. | 
|---|
| 642 | * | 
|---|
| 643 | * Return: On success, a cloexec pidfd is returned. | 
|---|
| 644 | *         On error, a negative errno number will be returned. | 
|---|
| 645 | */ | 
|---|
| 646 | SYSCALL_DEFINE2(pidfd_open, pid_t, pid, unsigned int, flags) | 
|---|
| 647 | { | 
|---|
| 648 | int fd; | 
|---|
| 649 | struct pid *p; | 
|---|
| 650 |  | 
|---|
| 651 | if (flags & ~(PIDFD_NONBLOCK | PIDFD_THREAD)) | 
|---|
| 652 | return -EINVAL; | 
|---|
| 653 |  | 
|---|
| 654 | if (pid <= 0) | 
|---|
| 655 | return -EINVAL; | 
|---|
| 656 |  | 
|---|
| 657 | p = find_get_pid(pid); | 
|---|
| 658 | if (!p) | 
|---|
| 659 | return -ESRCH; | 
|---|
| 660 |  | 
|---|
| 661 | fd = pidfd_create(pid: p, flags); | 
|---|
| 662 |  | 
|---|
| 663 | put_pid(p); | 
|---|
| 664 | return fd; | 
|---|
| 665 | } | 
|---|
| 666 |  | 
|---|
| 667 | #ifdef CONFIG_SYSCTL | 
|---|
| 668 | static struct ctl_table_set *pid_table_root_lookup(struct ctl_table_root *root) | 
|---|
| 669 | { | 
|---|
| 670 | return &task_active_pid_ns(current)->set; | 
|---|
| 671 | } | 
|---|
| 672 |  | 
|---|
| 673 | static int set_is_seen(struct ctl_table_set *set) | 
|---|
| 674 | { | 
|---|
| 675 | return &task_active_pid_ns(current)->set == set; | 
|---|
| 676 | } | 
|---|
| 677 |  | 
|---|
| 678 | static int pid_table_root_permissions(struct ctl_table_header *head, | 
|---|
| 679 | const struct ctl_table *table) | 
|---|
| 680 | { | 
|---|
| 681 | struct pid_namespace *pidns = | 
|---|
| 682 | container_of(head->set, struct pid_namespace, set); | 
|---|
| 683 | int mode = table->mode; | 
|---|
| 684 |  | 
|---|
| 685 | if (ns_capable_noaudit(ns: pidns->user_ns, CAP_SYS_ADMIN) || | 
|---|
| 686 | uid_eq(current_euid(), right: make_kuid(from: pidns->user_ns, uid: 0))) | 
|---|
| 687 | mode = (mode & S_IRWXU) >> 6; | 
|---|
| 688 | else if (in_egroup_p(make_kgid(from: pidns->user_ns, gid: 0))) | 
|---|
| 689 | mode = (mode & S_IRWXG) >> 3; | 
|---|
| 690 | else | 
|---|
| 691 | mode = mode & S_IROTH; | 
|---|
| 692 | return (mode << 6) | (mode << 3) | mode; | 
|---|
| 693 | } | 
|---|
| 694 |  | 
|---|
| 695 | static void pid_table_root_set_ownership(struct ctl_table_header *head, | 
|---|
| 696 | kuid_t *uid, kgid_t *gid) | 
|---|
| 697 | { | 
|---|
| 698 | struct pid_namespace *pidns = | 
|---|
| 699 | container_of(head->set, struct pid_namespace, set); | 
|---|
| 700 | kuid_t ns_root_uid; | 
|---|
| 701 | kgid_t ns_root_gid; | 
|---|
| 702 |  | 
|---|
| 703 | ns_root_uid = make_kuid(from: pidns->user_ns, uid: 0); | 
|---|
| 704 | if (uid_valid(uid: ns_root_uid)) | 
|---|
| 705 | *uid = ns_root_uid; | 
|---|
| 706 |  | 
|---|
| 707 | ns_root_gid = make_kgid(from: pidns->user_ns, gid: 0); | 
|---|
| 708 | if (gid_valid(gid: ns_root_gid)) | 
|---|
| 709 | *gid = ns_root_gid; | 
|---|
| 710 | } | 
|---|
| 711 |  | 
|---|
| 712 | static struct ctl_table_root pid_table_root = { | 
|---|
| 713 | .lookup		= pid_table_root_lookup, | 
|---|
| 714 | .permissions	= pid_table_root_permissions, | 
|---|
| 715 | .set_ownership	= pid_table_root_set_ownership, | 
|---|
| 716 | }; | 
|---|
| 717 |  | 
|---|
| 718 | static int proc_do_cad_pid(const struct ctl_table *table, int write, void *buffer, | 
|---|
| 719 | size_t *lenp, loff_t *ppos) | 
|---|
| 720 | { | 
|---|
| 721 | struct pid *new_pid; | 
|---|
| 722 | pid_t tmp_pid; | 
|---|
| 723 | int r; | 
|---|
| 724 | struct ctl_table tmp_table = *table; | 
|---|
| 725 |  | 
|---|
| 726 | tmp_pid = pid_vnr(cad_pid); | 
|---|
| 727 | tmp_table.data = &tmp_pid; | 
|---|
| 728 |  | 
|---|
| 729 | r = proc_dointvec(&tmp_table, write, buffer, lenp, ppos); | 
|---|
| 730 | if (r || !write) | 
|---|
| 731 | return r; | 
|---|
| 732 |  | 
|---|
| 733 | new_pid = find_get_pid(tmp_pid); | 
|---|
| 734 | if (!new_pid) | 
|---|
| 735 | return -ESRCH; | 
|---|
| 736 |  | 
|---|
| 737 | put_pid(xchg(&cad_pid, new_pid)); | 
|---|
| 738 | return 0; | 
|---|
| 739 | } | 
|---|
| 740 |  | 
|---|
| 741 | static const struct ctl_table pid_table[] = { | 
|---|
| 742 | { | 
|---|
| 743 | .procname	= "pid_max", | 
|---|
| 744 | .data		= &init_pid_ns.pid_max, | 
|---|
| 745 | .maxlen		= sizeof(int), | 
|---|
| 746 | .mode		= 0644, | 
|---|
| 747 | .proc_handler	= proc_dointvec_minmax, | 
|---|
| 748 | .extra1		= &pid_max_min, | 
|---|
| 749 | .extra2		= &pid_max_max, | 
|---|
| 750 | }, | 
|---|
| 751 | #ifdef CONFIG_PROC_SYSCTL | 
|---|
| 752 | { | 
|---|
| 753 | .procname	= "cad_pid", | 
|---|
| 754 | .maxlen		= sizeof(int), | 
|---|
| 755 | .mode		= 0600, | 
|---|
| 756 | .proc_handler	= proc_do_cad_pid, | 
|---|
| 757 | }, | 
|---|
| 758 | #endif | 
|---|
| 759 | }; | 
|---|
| 760 | #endif | 
|---|
| 761 |  | 
|---|
| 762 | int register_pidns_sysctls(struct pid_namespace *pidns) | 
|---|
| 763 | { | 
|---|
| 764 | #ifdef CONFIG_SYSCTL | 
|---|
| 765 | struct ctl_table *tbl; | 
|---|
| 766 |  | 
|---|
| 767 | setup_sysctl_set(p: &pidns->set, root: &pid_table_root, is_seen: set_is_seen); | 
|---|
| 768 |  | 
|---|
| 769 | tbl = kmemdup(pid_table, sizeof(pid_table), GFP_KERNEL); | 
|---|
| 770 | if (!tbl) | 
|---|
| 771 | return -ENOMEM; | 
|---|
| 772 | tbl->data = &pidns->pid_max; | 
|---|
| 773 | pidns->pid_max = min(pid_max_max, max_t(int, pidns->pid_max, | 
|---|
| 774 | PIDS_PER_CPU_DEFAULT * num_possible_cpus())); | 
|---|
| 775 |  | 
|---|
| 776 | pidns->sysctls = __register_sysctl_table(set: &pidns->set, path: "kernel", table: tbl, | 
|---|
| 777 | ARRAY_SIZE(pid_table)); | 
|---|
| 778 | if (!pidns->sysctls) { | 
|---|
| 779 | kfree(objp: tbl); | 
|---|
| 780 | retire_sysctl_set(set: &pidns->set); | 
|---|
| 781 | return -ENOMEM; | 
|---|
| 782 | } | 
|---|
| 783 | #endif | 
|---|
| 784 | return 0; | 
|---|
| 785 | } | 
|---|
| 786 |  | 
|---|
| 787 | void unregister_pidns_sysctls(struct pid_namespace *pidns) | 
|---|
| 788 | { | 
|---|
| 789 | #ifdef CONFIG_SYSCTL | 
|---|
| 790 | const struct ctl_table *tbl; | 
|---|
| 791 |  | 
|---|
| 792 | tbl = pidns->sysctls->ctl_table_arg; | 
|---|
| 793 | unregister_sysctl_table(table: pidns->sysctls); | 
|---|
| 794 | retire_sysctl_set(set: &pidns->set); | 
|---|
| 795 | kfree(objp: tbl); | 
|---|
| 796 | #endif | 
|---|
| 797 | } | 
|---|
| 798 |  | 
|---|
| 799 | void __init pid_idr_init(void) | 
|---|
| 800 | { | 
|---|
| 801 | /* Verify no one has done anything silly: */ | 
|---|
| 802 | BUILD_BUG_ON(PID_MAX_LIMIT >= PIDNS_ADDING); | 
|---|
| 803 |  | 
|---|
| 804 | /* bump default and minimum pid_max based on number of cpus */ | 
|---|
| 805 | init_pid_ns.pid_max = min(pid_max_max, max_t(int, init_pid_ns.pid_max, | 
|---|
| 806 | PIDS_PER_CPU_DEFAULT * num_possible_cpus())); | 
|---|
| 807 | pid_max_min = max_t(int, pid_max_min, | 
|---|
| 808 | PIDS_PER_CPU_MIN * num_possible_cpus()); | 
|---|
| 809 | pr_info( "pid_max: default: %u minimum: %u\n", init_pid_ns.pid_max, pid_max_min); | 
|---|
| 810 |  | 
|---|
| 811 | idr_init(idr: &init_pid_ns.idr); | 
|---|
| 812 |  | 
|---|
| 813 | init_pid_ns.pid_cachep = kmem_cache_create( "pid", | 
|---|
| 814 | struct_size_t(struct pid, numbers, 1), | 
|---|
| 815 | __alignof__(struct pid), | 
|---|
| 816 | SLAB_HWCACHE_ALIGN | SLAB_PANIC | SLAB_ACCOUNT, | 
|---|
| 817 | NULL); | 
|---|
| 818 | } | 
|---|
| 819 |  | 
|---|
| 820 | static __init int pid_namespace_sysctl_init(void) | 
|---|
| 821 | { | 
|---|
| 822 | #ifdef CONFIG_SYSCTL | 
|---|
| 823 | /* "kernel" directory will have already been initialized. */ | 
|---|
| 824 | BUG_ON(register_pidns_sysctls(&init_pid_ns)); | 
|---|
| 825 | #endif | 
|---|
| 826 | return 0; | 
|---|
| 827 | } | 
|---|
| 828 | subsys_initcall(pid_namespace_sysctl_init); | 
|---|
| 829 |  | 
|---|
| 830 | static struct file *__pidfd_fget(struct task_struct *task, int fd) | 
|---|
| 831 | { | 
|---|
| 832 | struct file *file; | 
|---|
| 833 | int ret; | 
|---|
| 834 |  | 
|---|
| 835 | ret = down_read_killable(sem: &task->signal->exec_update_lock); | 
|---|
| 836 | if (ret) | 
|---|
| 837 | return ERR_PTR(error: ret); | 
|---|
| 838 |  | 
|---|
| 839 | if (ptrace_may_access(task, PTRACE_MODE_ATTACH_REALCREDS)) | 
|---|
| 840 | file = fget_task(task, fd); | 
|---|
| 841 | else | 
|---|
| 842 | file = ERR_PTR(error: -EPERM); | 
|---|
| 843 |  | 
|---|
| 844 | up_read(sem: &task->signal->exec_update_lock); | 
|---|
| 845 |  | 
|---|
| 846 | if (!file) { | 
|---|
| 847 | /* | 
|---|
| 848 | * It is possible that the target thread is exiting; it can be | 
|---|
| 849 | * either: | 
|---|
| 850 | * 1. before exit_signals(), which gives a real fd | 
|---|
| 851 | * 2. before exit_files() takes the task_lock() gives a real fd | 
|---|
| 852 | * 3. after exit_files() releases task_lock(), ->files is NULL; | 
|---|
| 853 | *    this has PF_EXITING, since it was set in exit_signals(), | 
|---|
| 854 | *    __pidfd_fget() returns EBADF. | 
|---|
| 855 | * In case 3 we get EBADF, but that really means ESRCH, since | 
|---|
| 856 | * the task is currently exiting and has freed its files | 
|---|
| 857 | * struct, so we fix it up. | 
|---|
| 858 | */ | 
|---|
| 859 | if (task->flags & PF_EXITING) | 
|---|
| 860 | file = ERR_PTR(error: -ESRCH); | 
|---|
| 861 | else | 
|---|
| 862 | file = ERR_PTR(error: -EBADF); | 
|---|
| 863 | } | 
|---|
| 864 |  | 
|---|
| 865 | return file; | 
|---|
| 866 | } | 
|---|
| 867 |  | 
|---|
| 868 | static int pidfd_getfd(struct pid *pid, int fd) | 
|---|
| 869 | { | 
|---|
| 870 | struct task_struct *task; | 
|---|
| 871 | struct file *file; | 
|---|
| 872 | int ret; | 
|---|
| 873 |  | 
|---|
| 874 | task = get_pid_task(pid, PIDTYPE_PID); | 
|---|
| 875 | if (!task) | 
|---|
| 876 | return -ESRCH; | 
|---|
| 877 |  | 
|---|
| 878 | file = __pidfd_fget(task, fd); | 
|---|
| 879 | put_task_struct(t: task); | 
|---|
| 880 | if (IS_ERR(ptr: file)) | 
|---|
| 881 | return PTR_ERR(ptr: file); | 
|---|
| 882 |  | 
|---|
| 883 | ret = receive_fd(file, NULL, O_CLOEXEC); | 
|---|
| 884 | fput(file); | 
|---|
| 885 |  | 
|---|
| 886 | return ret; | 
|---|
| 887 | } | 
|---|
| 888 |  | 
|---|
| 889 | /** | 
|---|
| 890 | * sys_pidfd_getfd() - Get a file descriptor from another process | 
|---|
| 891 | * | 
|---|
| 892 | * @pidfd:	the pidfd file descriptor of the process | 
|---|
| 893 | * @fd:		the file descriptor number to get | 
|---|
| 894 | * @flags:	flags on how to get the fd (reserved) | 
|---|
| 895 | * | 
|---|
| 896 | * This syscall gets a copy of a file descriptor from another process | 
|---|
| 897 | * based on the pidfd, and file descriptor number. It requires that | 
|---|
| 898 | * the calling process has the ability to ptrace the process represented | 
|---|
| 899 | * by the pidfd. The process which is having its file descriptor copied | 
|---|
| 900 | * is otherwise unaffected. | 
|---|
| 901 | * | 
|---|
| 902 | * Return: On success, a cloexec file descriptor is returned. | 
|---|
| 903 | *         On error, a negative errno number will be returned. | 
|---|
| 904 | */ | 
|---|
| 905 | SYSCALL_DEFINE3(pidfd_getfd, int, pidfd, int, fd, | 
|---|
| 906 | unsigned int, flags) | 
|---|
| 907 | { | 
|---|
| 908 | struct pid *pid; | 
|---|
| 909 |  | 
|---|
| 910 | /* flags is currently unused - make sure it's unset */ | 
|---|
| 911 | if (flags) | 
|---|
| 912 | return -EINVAL; | 
|---|
| 913 |  | 
|---|
| 914 | CLASS(fd, f)(fd: pidfd); | 
|---|
| 915 | if (fd_empty(f)) | 
|---|
| 916 | return -EBADF; | 
|---|
| 917 |  | 
|---|
| 918 | pid = pidfd_pid(fd_file(f)); | 
|---|
| 919 | if (IS_ERR(ptr: pid)) | 
|---|
| 920 | return PTR_ERR(ptr: pid); | 
|---|
| 921 |  | 
|---|
| 922 | return pidfd_getfd(pid, fd); | 
|---|
| 923 | } | 
|---|
| 924 |  | 
|---|