| 1 | // SPDX-License-Identifier: GPL-2.0-only | 
|---|
| 2 | /* | 
|---|
| 3 | * fs/kernfs/file.c - kernfs file implementation | 
|---|
| 4 | * | 
|---|
| 5 | * Copyright (c) 2001-3 Patrick Mochel | 
|---|
| 6 | * Copyright (c) 2007 SUSE Linux Products GmbH | 
|---|
| 7 | * Copyright (c) 2007, 2013 Tejun Heo <tj@kernel.org> | 
|---|
| 8 | */ | 
|---|
| 9 |  | 
|---|
| 10 | #include <linux/fs.h> | 
|---|
| 11 | #include <linux/seq_file.h> | 
|---|
| 12 | #include <linux/slab.h> | 
|---|
| 13 | #include <linux/poll.h> | 
|---|
| 14 | #include <linux/pagemap.h> | 
|---|
| 15 | #include <linux/sched/mm.h> | 
|---|
| 16 | #include <linux/fsnotify.h> | 
|---|
| 17 | #include <linux/uio.h> | 
|---|
| 18 |  | 
|---|
| 19 | #include "kernfs-internal.h" | 
|---|
| 20 |  | 
|---|
| 21 | struct kernfs_open_node { | 
|---|
| 22 | struct rcu_head		rcu_head; | 
|---|
| 23 | atomic_t		event; | 
|---|
| 24 | wait_queue_head_t	poll; | 
|---|
| 25 | struct list_head	files; /* goes through kernfs_open_file.list */ | 
|---|
| 26 | unsigned int		nr_mmapped; | 
|---|
| 27 | unsigned int		nr_to_release; | 
|---|
| 28 | }; | 
|---|
| 29 |  | 
|---|
| 30 | /* | 
|---|
| 31 | * kernfs_notify() may be called from any context and bounces notifications | 
|---|
| 32 | * through a work item.  To minimize space overhead in kernfs_node, the | 
|---|
| 33 | * pending queue is implemented as a singly linked list of kernfs_nodes. | 
|---|
| 34 | * The list is terminated with the self pointer so that whether a | 
|---|
| 35 | * kernfs_node is on the list or not can be determined by testing the next | 
|---|
| 36 | * pointer for %NULL. | 
|---|
| 37 | */ | 
|---|
| 38 | #define KERNFS_NOTIFY_EOL			((void *)&kernfs_notify_list) | 
|---|
| 39 |  | 
|---|
| 40 | static DEFINE_SPINLOCK(kernfs_notify_lock); | 
|---|
| 41 | static struct kernfs_node *kernfs_notify_list = KERNFS_NOTIFY_EOL; | 
|---|
| 42 |  | 
|---|
| 43 | static inline struct mutex *kernfs_open_file_mutex_ptr(struct kernfs_node *kn) | 
|---|
| 44 | { | 
|---|
| 45 | int idx = hash_ptr(ptr: kn, NR_KERNFS_LOCK_BITS); | 
|---|
| 46 |  | 
|---|
| 47 | return &kernfs_locks->open_file_mutex[idx]; | 
|---|
| 48 | } | 
|---|
| 49 |  | 
|---|
| 50 | static inline struct mutex *kernfs_open_file_mutex_lock(struct kernfs_node *kn) | 
|---|
| 51 | { | 
|---|
| 52 | struct mutex *lock; | 
|---|
| 53 |  | 
|---|
| 54 | lock = kernfs_open_file_mutex_ptr(kn); | 
|---|
| 55 |  | 
|---|
| 56 | mutex_lock(lock); | 
|---|
| 57 |  | 
|---|
| 58 | return lock; | 
|---|
| 59 | } | 
|---|
| 60 |  | 
|---|
| 61 | /** | 
|---|
| 62 | * of_on - Get the kernfs_open_node of the specified kernfs_open_file | 
|---|
| 63 | * @of: target kernfs_open_file | 
|---|
| 64 | * | 
|---|
| 65 | * Return: the kernfs_open_node of the kernfs_open_file | 
|---|
| 66 | */ | 
|---|
| 67 | static struct kernfs_open_node *of_on(struct kernfs_open_file *of) | 
|---|
| 68 | { | 
|---|
| 69 | return rcu_dereference_protected(of->kn->attr.open, | 
|---|
| 70 | !list_empty(&of->list)); | 
|---|
| 71 | } | 
|---|
| 72 |  | 
|---|
| 73 | /* Get active reference to kernfs node for an open file */ | 
|---|
| 74 | static struct kernfs_open_file *kernfs_get_active_of(struct kernfs_open_file *of) | 
|---|
| 75 | { | 
|---|
| 76 | /* Skip if file was already released */ | 
|---|
| 77 | if (unlikely(of->released)) | 
|---|
| 78 | return NULL; | 
|---|
| 79 |  | 
|---|
| 80 | if (!kernfs_get_active(kn: of->kn)) | 
|---|
| 81 | return NULL; | 
|---|
| 82 |  | 
|---|
| 83 | return of; | 
|---|
| 84 | } | 
|---|
| 85 |  | 
|---|
| 86 | static void kernfs_put_active_of(struct kernfs_open_file *of) | 
|---|
| 87 | { | 
|---|
| 88 | return kernfs_put_active(kn: of->kn); | 
|---|
| 89 | } | 
|---|
| 90 |  | 
|---|
| 91 | /** | 
|---|
| 92 | * kernfs_deref_open_node_locked - Get kernfs_open_node corresponding to @kn | 
|---|
| 93 | * | 
|---|
| 94 | * @kn: target kernfs_node. | 
|---|
| 95 | * | 
|---|
| 96 | * Fetch and return ->attr.open of @kn when caller holds the | 
|---|
| 97 | * kernfs_open_file_mutex_ptr(kn). | 
|---|
| 98 | * | 
|---|
| 99 | * Update of ->attr.open happens under kernfs_open_file_mutex_ptr(kn). So when | 
|---|
| 100 | * the caller guarantees that this mutex is being held, other updaters can't | 
|---|
| 101 | * change ->attr.open and this means that we can safely deref ->attr.open | 
|---|
| 102 | * outside RCU read-side critical section. | 
|---|
| 103 | * | 
|---|
| 104 | * The caller needs to make sure that kernfs_open_file_mutex is held. | 
|---|
| 105 | * | 
|---|
| 106 | * Return: @kn->attr.open when kernfs_open_file_mutex is held. | 
|---|
| 107 | */ | 
|---|
| 108 | static struct kernfs_open_node * | 
|---|
| 109 | kernfs_deref_open_node_locked(struct kernfs_node *kn) | 
|---|
| 110 | { | 
|---|
| 111 | return rcu_dereference_protected(kn->attr.open, | 
|---|
| 112 | lockdep_is_held(kernfs_open_file_mutex_ptr(kn))); | 
|---|
| 113 | } | 
|---|
| 114 |  | 
|---|
| 115 | static struct kernfs_open_file *kernfs_of(struct file *file) | 
|---|
| 116 | { | 
|---|
| 117 | return ((struct seq_file *)file->private_data)->private; | 
|---|
| 118 | } | 
|---|
| 119 |  | 
|---|
| 120 | /* | 
|---|
| 121 | * Determine the kernfs_ops for the given kernfs_node.  This function must | 
|---|
| 122 | * be called while holding an active reference. | 
|---|
| 123 | */ | 
|---|
| 124 | static const struct kernfs_ops *kernfs_ops(struct kernfs_node *kn) | 
|---|
| 125 | { | 
|---|
| 126 | if (kn->flags & KERNFS_LOCKDEP) | 
|---|
| 127 | lockdep_assert_held(kn); | 
|---|
| 128 | return kn->attr.ops; | 
|---|
| 129 | } | 
|---|
| 130 |  | 
|---|
| 131 | /* | 
|---|
| 132 | * As kernfs_seq_stop() is also called after kernfs_seq_start() or | 
|---|
| 133 | * kernfs_seq_next() failure, it needs to distinguish whether it's stopping | 
|---|
| 134 | * a seq_file iteration which is fully initialized with an active reference | 
|---|
| 135 | * or an aborted kernfs_seq_start() due to get_active failure.  The | 
|---|
| 136 | * position pointer is the only context for each seq_file iteration and | 
|---|
| 137 | * thus the stop condition should be encoded in it.  As the return value is | 
|---|
| 138 | * directly visible to userland, ERR_PTR(-ENODEV) is the only acceptable | 
|---|
| 139 | * choice to indicate get_active failure. | 
|---|
| 140 | * | 
|---|
| 141 | * Unfortunately, this is complicated due to the optional custom seq_file | 
|---|
| 142 | * operations which may return ERR_PTR(-ENODEV) too.  kernfs_seq_stop() | 
|---|
| 143 | * can't distinguish whether ERR_PTR(-ENODEV) is from get_active failure or | 
|---|
| 144 | * custom seq_file operations and thus can't decide whether put_active | 
|---|
| 145 | * should be performed or not only on ERR_PTR(-ENODEV). | 
|---|
| 146 | * | 
|---|
| 147 | * This is worked around by factoring out the custom seq_stop() and | 
|---|
| 148 | * put_active part into kernfs_seq_stop_active(), skipping it from | 
|---|
| 149 | * kernfs_seq_stop() if ERR_PTR(-ENODEV) while invoking it directly after | 
|---|
| 150 | * custom seq_file operations fail with ERR_PTR(-ENODEV) - this ensures | 
|---|
| 151 | * that kernfs_seq_stop_active() is skipped only after get_active failure. | 
|---|
| 152 | */ | 
|---|
| 153 | static void kernfs_seq_stop_active(struct seq_file *sf, void *v) | 
|---|
| 154 | { | 
|---|
| 155 | struct kernfs_open_file *of = sf->private; | 
|---|
| 156 | const struct kernfs_ops *ops = kernfs_ops(kn: of->kn); | 
|---|
| 157 |  | 
|---|
| 158 | if (ops->seq_stop) | 
|---|
| 159 | ops->seq_stop(sf, v); | 
|---|
| 160 | kernfs_put_active_of(of); | 
|---|
| 161 | } | 
|---|
| 162 |  | 
|---|
| 163 | static void *kernfs_seq_start(struct seq_file *sf, loff_t *ppos) | 
|---|
| 164 | { | 
|---|
| 165 | struct kernfs_open_file *of = sf->private; | 
|---|
| 166 | const struct kernfs_ops *ops; | 
|---|
| 167 |  | 
|---|
| 168 | /* | 
|---|
| 169 | * @of->mutex nests outside active ref and is primarily to ensure that | 
|---|
| 170 | * the ops aren't called concurrently for the same open file. | 
|---|
| 171 | */ | 
|---|
| 172 | mutex_lock(lock: &of->mutex); | 
|---|
| 173 | if (!kernfs_get_active_of(of)) | 
|---|
| 174 | return ERR_PTR(error: -ENODEV); | 
|---|
| 175 |  | 
|---|
| 176 | ops = kernfs_ops(kn: of->kn); | 
|---|
| 177 | if (ops->seq_start) { | 
|---|
| 178 | void *next = ops->seq_start(sf, ppos); | 
|---|
| 179 | /* see the comment above kernfs_seq_stop_active() */ | 
|---|
| 180 | if (next == ERR_PTR(error: -ENODEV)) | 
|---|
| 181 | kernfs_seq_stop_active(sf, v: next); | 
|---|
| 182 | return next; | 
|---|
| 183 | } | 
|---|
| 184 | return single_start(sf, ppos); | 
|---|
| 185 | } | 
|---|
| 186 |  | 
|---|
| 187 | static void *kernfs_seq_next(struct seq_file *sf, void *v, loff_t *ppos) | 
|---|
| 188 | { | 
|---|
| 189 | struct kernfs_open_file *of = sf->private; | 
|---|
| 190 | const struct kernfs_ops *ops = kernfs_ops(kn: of->kn); | 
|---|
| 191 |  | 
|---|
| 192 | if (ops->seq_next) { | 
|---|
| 193 | void *next = ops->seq_next(sf, v, ppos); | 
|---|
| 194 | /* see the comment above kernfs_seq_stop_active() */ | 
|---|
| 195 | if (next == ERR_PTR(error: -ENODEV)) | 
|---|
| 196 | kernfs_seq_stop_active(sf, v: next); | 
|---|
| 197 | return next; | 
|---|
| 198 | } else { | 
|---|
| 199 | /* | 
|---|
| 200 | * The same behavior and code as single_open(), always | 
|---|
| 201 | * terminate after the initial read. | 
|---|
| 202 | */ | 
|---|
| 203 | ++*ppos; | 
|---|
| 204 | return NULL; | 
|---|
| 205 | } | 
|---|
| 206 | } | 
|---|
| 207 |  | 
|---|
| 208 | static void kernfs_seq_stop(struct seq_file *sf, void *v) | 
|---|
| 209 | { | 
|---|
| 210 | struct kernfs_open_file *of = sf->private; | 
|---|
| 211 |  | 
|---|
| 212 | if (v != ERR_PTR(error: -ENODEV)) | 
|---|
| 213 | kernfs_seq_stop_active(sf, v); | 
|---|
| 214 | mutex_unlock(lock: &of->mutex); | 
|---|
| 215 | } | 
|---|
| 216 |  | 
|---|
| 217 | static int kernfs_seq_show(struct seq_file *sf, void *v) | 
|---|
| 218 | { | 
|---|
| 219 | struct kernfs_open_file *of = sf->private; | 
|---|
| 220 |  | 
|---|
| 221 | of->event = atomic_read(v: &of_on(of)->event); | 
|---|
| 222 |  | 
|---|
| 223 | return of->kn->attr.ops->seq_show(sf, v); | 
|---|
| 224 | } | 
|---|
| 225 |  | 
|---|
| 226 | static const struct seq_operations kernfs_seq_ops = { | 
|---|
| 227 | .start = kernfs_seq_start, | 
|---|
| 228 | .next = kernfs_seq_next, | 
|---|
| 229 | .stop = kernfs_seq_stop, | 
|---|
| 230 | .show = kernfs_seq_show, | 
|---|
| 231 | }; | 
|---|
| 232 |  | 
|---|
| 233 | /* | 
|---|
| 234 | * As reading a bin file can have side-effects, the exact offset and bytes | 
|---|
| 235 | * specified in read(2) call should be passed to the read callback making | 
|---|
| 236 | * it difficult to use seq_file.  Implement simplistic custom buffering for | 
|---|
| 237 | * bin files. | 
|---|
| 238 | */ | 
|---|
| 239 | static ssize_t kernfs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) | 
|---|
| 240 | { | 
|---|
| 241 | struct kernfs_open_file *of = kernfs_of(file: iocb->ki_filp); | 
|---|
| 242 | ssize_t len = min_t(size_t, iov_iter_count(iter), PAGE_SIZE); | 
|---|
| 243 | const struct kernfs_ops *ops; | 
|---|
| 244 | char *buf; | 
|---|
| 245 |  | 
|---|
| 246 | buf = of->prealloc_buf; | 
|---|
| 247 | if (buf) | 
|---|
| 248 | mutex_lock(lock: &of->prealloc_mutex); | 
|---|
| 249 | else | 
|---|
| 250 | buf = kmalloc(len, GFP_KERNEL); | 
|---|
| 251 | if (!buf) | 
|---|
| 252 | return -ENOMEM; | 
|---|
| 253 |  | 
|---|
| 254 | /* | 
|---|
| 255 | * @of->mutex nests outside active ref and is used both to ensure that | 
|---|
| 256 | * the ops aren't called concurrently for the same open file. | 
|---|
| 257 | */ | 
|---|
| 258 | mutex_lock(lock: &of->mutex); | 
|---|
| 259 | if (!kernfs_get_active_of(of)) { | 
|---|
| 260 | len = -ENODEV; | 
|---|
| 261 | mutex_unlock(lock: &of->mutex); | 
|---|
| 262 | goto out_free; | 
|---|
| 263 | } | 
|---|
| 264 |  | 
|---|
| 265 | of->event = atomic_read(v: &of_on(of)->event); | 
|---|
| 266 |  | 
|---|
| 267 | ops = kernfs_ops(kn: of->kn); | 
|---|
| 268 | if (ops->read) | 
|---|
| 269 | len = ops->read(of, buf, len, iocb->ki_pos); | 
|---|
| 270 | else | 
|---|
| 271 | len = -EINVAL; | 
|---|
| 272 |  | 
|---|
| 273 | kernfs_put_active_of(of); | 
|---|
| 274 | mutex_unlock(lock: &of->mutex); | 
|---|
| 275 |  | 
|---|
| 276 | if (len < 0) | 
|---|
| 277 | goto out_free; | 
|---|
| 278 |  | 
|---|
| 279 | if (copy_to_iter(addr: buf, bytes: len, i: iter) != len) { | 
|---|
| 280 | len = -EFAULT; | 
|---|
| 281 | goto out_free; | 
|---|
| 282 | } | 
|---|
| 283 |  | 
|---|
| 284 | iocb->ki_pos += len; | 
|---|
| 285 |  | 
|---|
| 286 | out_free: | 
|---|
| 287 | if (buf == of->prealloc_buf) | 
|---|
| 288 | mutex_unlock(lock: &of->prealloc_mutex); | 
|---|
| 289 | else | 
|---|
| 290 | kfree(objp: buf); | 
|---|
| 291 | return len; | 
|---|
| 292 | } | 
|---|
| 293 |  | 
|---|
| 294 | static ssize_t kernfs_fop_read_iter(struct kiocb *iocb, struct iov_iter *iter) | 
|---|
| 295 | { | 
|---|
| 296 | if (kernfs_of(file: iocb->ki_filp)->kn->flags & KERNFS_HAS_SEQ_SHOW) | 
|---|
| 297 | return seq_read_iter(iocb, iter); | 
|---|
| 298 | return kernfs_file_read_iter(iocb, iter); | 
|---|
| 299 | } | 
|---|
| 300 |  | 
|---|
| 301 | /* | 
|---|
| 302 | * Copy data in from userland and pass it to the matching kernfs write | 
|---|
| 303 | * operation. | 
|---|
| 304 | * | 
|---|
| 305 | * There is no easy way for us to know if userspace is only doing a partial | 
|---|
| 306 | * write, so we don't support them. We expect the entire buffer to come on | 
|---|
| 307 | * the first write.  Hint: if you're writing a value, first read the file, | 
|---|
| 308 | * modify only the value you're changing, then write entire buffer | 
|---|
| 309 | * back. | 
|---|
| 310 | */ | 
|---|
| 311 | static ssize_t kernfs_fop_write_iter(struct kiocb *iocb, struct iov_iter *iter) | 
|---|
| 312 | { | 
|---|
| 313 | struct kernfs_open_file *of = kernfs_of(file: iocb->ki_filp); | 
|---|
| 314 | ssize_t len = iov_iter_count(i: iter); | 
|---|
| 315 | const struct kernfs_ops *ops; | 
|---|
| 316 | char *buf; | 
|---|
| 317 |  | 
|---|
| 318 | if (of->atomic_write_len) { | 
|---|
| 319 | if (len > of->atomic_write_len) | 
|---|
| 320 | return -E2BIG; | 
|---|
| 321 | } else { | 
|---|
| 322 | len = min_t(size_t, len, PAGE_SIZE); | 
|---|
| 323 | } | 
|---|
| 324 |  | 
|---|
| 325 | buf = of->prealloc_buf; | 
|---|
| 326 | if (buf) | 
|---|
| 327 | mutex_lock(lock: &of->prealloc_mutex); | 
|---|
| 328 | else | 
|---|
| 329 | buf = kmalloc(len + 1, GFP_KERNEL); | 
|---|
| 330 | if (!buf) | 
|---|
| 331 | return -ENOMEM; | 
|---|
| 332 |  | 
|---|
| 333 | if (copy_from_iter(addr: buf, bytes: len, i: iter) != len) { | 
|---|
| 334 | len = -EFAULT; | 
|---|
| 335 | goto out_free; | 
|---|
| 336 | } | 
|---|
| 337 | buf[len] = '\0';	/* guarantee string termination */ | 
|---|
| 338 |  | 
|---|
| 339 | /* | 
|---|
| 340 | * @of->mutex nests outside active ref and is used both to ensure that | 
|---|
| 341 | * the ops aren't called concurrently for the same open file. | 
|---|
| 342 | */ | 
|---|
| 343 | mutex_lock(lock: &of->mutex); | 
|---|
| 344 | if (!kernfs_get_active_of(of)) { | 
|---|
| 345 | mutex_unlock(lock: &of->mutex); | 
|---|
| 346 | len = -ENODEV; | 
|---|
| 347 | goto out_free; | 
|---|
| 348 | } | 
|---|
| 349 |  | 
|---|
| 350 | ops = kernfs_ops(kn: of->kn); | 
|---|
| 351 | if (ops->write) | 
|---|
| 352 | len = ops->write(of, buf, len, iocb->ki_pos); | 
|---|
| 353 | else | 
|---|
| 354 | len = -EINVAL; | 
|---|
| 355 |  | 
|---|
| 356 | kernfs_put_active_of(of); | 
|---|
| 357 | mutex_unlock(lock: &of->mutex); | 
|---|
| 358 |  | 
|---|
| 359 | if (len > 0) | 
|---|
| 360 | iocb->ki_pos += len; | 
|---|
| 361 |  | 
|---|
| 362 | out_free: | 
|---|
| 363 | if (buf == of->prealloc_buf) | 
|---|
| 364 | mutex_unlock(lock: &of->prealloc_mutex); | 
|---|
| 365 | else | 
|---|
| 366 | kfree(objp: buf); | 
|---|
| 367 | return len; | 
|---|
| 368 | } | 
|---|
| 369 |  | 
|---|
| 370 | static void kernfs_vma_open(struct vm_area_struct *vma) | 
|---|
| 371 | { | 
|---|
| 372 | struct file *file = vma->vm_file; | 
|---|
| 373 | struct kernfs_open_file *of = kernfs_of(file); | 
|---|
| 374 |  | 
|---|
| 375 | if (!of->vm_ops) | 
|---|
| 376 | return; | 
|---|
| 377 |  | 
|---|
| 378 | if (!kernfs_get_active_of(of)) | 
|---|
| 379 | return; | 
|---|
| 380 |  | 
|---|
| 381 | if (of->vm_ops->open) | 
|---|
| 382 | of->vm_ops->open(vma); | 
|---|
| 383 |  | 
|---|
| 384 | kernfs_put_active_of(of); | 
|---|
| 385 | } | 
|---|
| 386 |  | 
|---|
| 387 | static vm_fault_t kernfs_vma_fault(struct vm_fault *vmf) | 
|---|
| 388 | { | 
|---|
| 389 | struct file *file = vmf->vma->vm_file; | 
|---|
| 390 | struct kernfs_open_file *of = kernfs_of(file); | 
|---|
| 391 | vm_fault_t ret; | 
|---|
| 392 |  | 
|---|
| 393 | if (!of->vm_ops) | 
|---|
| 394 | return VM_FAULT_SIGBUS; | 
|---|
| 395 |  | 
|---|
| 396 | if (!kernfs_get_active_of(of)) | 
|---|
| 397 | return VM_FAULT_SIGBUS; | 
|---|
| 398 |  | 
|---|
| 399 | ret = VM_FAULT_SIGBUS; | 
|---|
| 400 | if (of->vm_ops->fault) | 
|---|
| 401 | ret = of->vm_ops->fault(vmf); | 
|---|
| 402 |  | 
|---|
| 403 | kernfs_put_active_of(of); | 
|---|
| 404 | return ret; | 
|---|
| 405 | } | 
|---|
| 406 |  | 
|---|
| 407 | static vm_fault_t kernfs_vma_page_mkwrite(struct vm_fault *vmf) | 
|---|
| 408 | { | 
|---|
| 409 | struct file *file = vmf->vma->vm_file; | 
|---|
| 410 | struct kernfs_open_file *of = kernfs_of(file); | 
|---|
| 411 | vm_fault_t ret; | 
|---|
| 412 |  | 
|---|
| 413 | if (!of->vm_ops) | 
|---|
| 414 | return VM_FAULT_SIGBUS; | 
|---|
| 415 |  | 
|---|
| 416 | if (!kernfs_get_active_of(of)) | 
|---|
| 417 | return VM_FAULT_SIGBUS; | 
|---|
| 418 |  | 
|---|
| 419 | ret = 0; | 
|---|
| 420 | if (of->vm_ops->page_mkwrite) | 
|---|
| 421 | ret = of->vm_ops->page_mkwrite(vmf); | 
|---|
| 422 | else | 
|---|
| 423 | file_update_time(file); | 
|---|
| 424 |  | 
|---|
| 425 | kernfs_put_active_of(of); | 
|---|
| 426 | return ret; | 
|---|
| 427 | } | 
|---|
| 428 |  | 
|---|
| 429 | static int kernfs_vma_access(struct vm_area_struct *vma, unsigned long addr, | 
|---|
| 430 | void *buf, int len, int write) | 
|---|
| 431 | { | 
|---|
| 432 | struct file *file = vma->vm_file; | 
|---|
| 433 | struct kernfs_open_file *of = kernfs_of(file); | 
|---|
| 434 | int ret; | 
|---|
| 435 |  | 
|---|
| 436 | if (!of->vm_ops) | 
|---|
| 437 | return -EINVAL; | 
|---|
| 438 |  | 
|---|
| 439 | if (!kernfs_get_active_of(of)) | 
|---|
| 440 | return -EINVAL; | 
|---|
| 441 |  | 
|---|
| 442 | ret = -EINVAL; | 
|---|
| 443 | if (of->vm_ops->access) | 
|---|
| 444 | ret = of->vm_ops->access(vma, addr, buf, len, write); | 
|---|
| 445 |  | 
|---|
| 446 | kernfs_put_active_of(of); | 
|---|
| 447 | return ret; | 
|---|
| 448 | } | 
|---|
| 449 |  | 
|---|
| 450 | static const struct vm_operations_struct kernfs_vm_ops = { | 
|---|
| 451 | .open		= kernfs_vma_open, | 
|---|
| 452 | .fault		= kernfs_vma_fault, | 
|---|
| 453 | .page_mkwrite	= kernfs_vma_page_mkwrite, | 
|---|
| 454 | .access		= kernfs_vma_access, | 
|---|
| 455 | }; | 
|---|
| 456 |  | 
|---|
| 457 | static int kernfs_fop_mmap(struct file *file, struct vm_area_struct *vma) | 
|---|
| 458 | { | 
|---|
| 459 | struct kernfs_open_file *of = kernfs_of(file); | 
|---|
| 460 | const struct kernfs_ops *ops; | 
|---|
| 461 | int rc; | 
|---|
| 462 |  | 
|---|
| 463 | /* | 
|---|
| 464 | * mmap path and of->mutex are prone to triggering spurious lockdep | 
|---|
| 465 | * warnings and we don't want to add spurious locking dependency | 
|---|
| 466 | * between the two.  Check whether mmap is actually implemented | 
|---|
| 467 | * without grabbing @of->mutex by testing HAS_MMAP flag.  See the | 
|---|
| 468 | * comment in kernfs_fop_open() for more details. | 
|---|
| 469 | */ | 
|---|
| 470 | if (!(of->kn->flags & KERNFS_HAS_MMAP)) | 
|---|
| 471 | return -ENODEV; | 
|---|
| 472 |  | 
|---|
| 473 | mutex_lock(lock: &of->mutex); | 
|---|
| 474 |  | 
|---|
| 475 | rc = -ENODEV; | 
|---|
| 476 | if (!kernfs_get_active_of(of)) | 
|---|
| 477 | goto out_unlock; | 
|---|
| 478 |  | 
|---|
| 479 | ops = kernfs_ops(kn: of->kn); | 
|---|
| 480 | rc = ops->mmap(of, vma); | 
|---|
| 481 | if (rc) | 
|---|
| 482 | goto out_put; | 
|---|
| 483 |  | 
|---|
| 484 | /* | 
|---|
| 485 | * PowerPC's pci_mmap of legacy_mem uses shmem_zero_setup() | 
|---|
| 486 | * to satisfy versions of X which crash if the mmap fails: that | 
|---|
| 487 | * substitutes a new vm_file, and we don't then want bin_vm_ops. | 
|---|
| 488 | */ | 
|---|
| 489 | if (vma->vm_file != file) | 
|---|
| 490 | goto out_put; | 
|---|
| 491 |  | 
|---|
| 492 | rc = -EINVAL; | 
|---|
| 493 | if (of->mmapped && of->vm_ops != vma->vm_ops) | 
|---|
| 494 | goto out_put; | 
|---|
| 495 |  | 
|---|
| 496 | /* | 
|---|
| 497 | * It is not possible to successfully wrap close. | 
|---|
| 498 | * So error if someone is trying to use close. | 
|---|
| 499 | */ | 
|---|
| 500 | if (vma->vm_ops && vma->vm_ops->close) | 
|---|
| 501 | goto out_put; | 
|---|
| 502 |  | 
|---|
| 503 | rc = 0; | 
|---|
| 504 | if (!of->mmapped) { | 
|---|
| 505 | of->mmapped = true; | 
|---|
| 506 | of_on(of)->nr_mmapped++; | 
|---|
| 507 | of->vm_ops = vma->vm_ops; | 
|---|
| 508 | } | 
|---|
| 509 | vma->vm_ops = &kernfs_vm_ops; | 
|---|
| 510 | out_put: | 
|---|
| 511 | kernfs_put_active_of(of); | 
|---|
| 512 | out_unlock: | 
|---|
| 513 | mutex_unlock(lock: &of->mutex); | 
|---|
| 514 |  | 
|---|
| 515 | return rc; | 
|---|
| 516 | } | 
|---|
| 517 |  | 
|---|
| 518 | /** | 
|---|
| 519 | *	kernfs_get_open_node - get or create kernfs_open_node | 
|---|
| 520 | *	@kn: target kernfs_node | 
|---|
| 521 | *	@of: kernfs_open_file for this instance of open | 
|---|
| 522 | * | 
|---|
| 523 | *	If @kn->attr.open exists, increment its reference count; otherwise, | 
|---|
| 524 | *	create one.  @of is chained to the files list. | 
|---|
| 525 | * | 
|---|
| 526 | *	Locking: | 
|---|
| 527 | *	Kernel thread context (may sleep). | 
|---|
| 528 | * | 
|---|
| 529 | *	Return: | 
|---|
| 530 | *	%0 on success, -errno on failure. | 
|---|
| 531 | */ | 
|---|
| 532 | static int kernfs_get_open_node(struct kernfs_node *kn, | 
|---|
| 533 | struct kernfs_open_file *of) | 
|---|
| 534 | { | 
|---|
| 535 | struct kernfs_open_node *on; | 
|---|
| 536 | struct mutex *mutex; | 
|---|
| 537 |  | 
|---|
| 538 | mutex = kernfs_open_file_mutex_lock(kn); | 
|---|
| 539 | on = kernfs_deref_open_node_locked(kn); | 
|---|
| 540 |  | 
|---|
| 541 | if (!on) { | 
|---|
| 542 | /* not there, initialize a new one */ | 
|---|
| 543 | on = kzalloc(sizeof(*on), GFP_KERNEL); | 
|---|
| 544 | if (!on) { | 
|---|
| 545 | mutex_unlock(lock: mutex); | 
|---|
| 546 | return -ENOMEM; | 
|---|
| 547 | } | 
|---|
| 548 | atomic_set(v: &on->event, i: 1); | 
|---|
| 549 | init_waitqueue_head(&on->poll); | 
|---|
| 550 | INIT_LIST_HEAD(list: &on->files); | 
|---|
| 551 | rcu_assign_pointer(kn->attr.open, on); | 
|---|
| 552 | } | 
|---|
| 553 |  | 
|---|
| 554 | list_add_tail(new: &of->list, head: &on->files); | 
|---|
| 555 | if (kn->flags & KERNFS_HAS_RELEASE) | 
|---|
| 556 | on->nr_to_release++; | 
|---|
| 557 |  | 
|---|
| 558 | mutex_unlock(lock: mutex); | 
|---|
| 559 | return 0; | 
|---|
| 560 | } | 
|---|
| 561 |  | 
|---|
| 562 | /** | 
|---|
| 563 | *	kernfs_unlink_open_file - Unlink @of from @kn. | 
|---|
| 564 | * | 
|---|
| 565 | *	@kn: target kernfs_node | 
|---|
| 566 | *	@of: associated kernfs_open_file | 
|---|
| 567 | *	@open_failed: ->open() failed, cancel ->release() | 
|---|
| 568 | * | 
|---|
| 569 | *	Unlink @of from list of @kn's associated open files. If list of | 
|---|
| 570 | *	associated open files becomes empty, disassociate and free | 
|---|
| 571 | *	kernfs_open_node. | 
|---|
| 572 | * | 
|---|
| 573 | *	LOCKING: | 
|---|
| 574 | *	None. | 
|---|
| 575 | */ | 
|---|
| 576 | static void kernfs_unlink_open_file(struct kernfs_node *kn, | 
|---|
| 577 | struct kernfs_open_file *of, | 
|---|
| 578 | bool open_failed) | 
|---|
| 579 | { | 
|---|
| 580 | struct kernfs_open_node *on; | 
|---|
| 581 | struct mutex *mutex; | 
|---|
| 582 |  | 
|---|
| 583 | mutex = kernfs_open_file_mutex_lock(kn); | 
|---|
| 584 |  | 
|---|
| 585 | on = kernfs_deref_open_node_locked(kn); | 
|---|
| 586 | if (!on) { | 
|---|
| 587 | mutex_unlock(lock: mutex); | 
|---|
| 588 | return; | 
|---|
| 589 | } | 
|---|
| 590 |  | 
|---|
| 591 | if (of) { | 
|---|
| 592 | if (kn->flags & KERNFS_HAS_RELEASE) { | 
|---|
| 593 | WARN_ON_ONCE(of->released == open_failed); | 
|---|
| 594 | if (open_failed) | 
|---|
| 595 | on->nr_to_release--; | 
|---|
| 596 | } | 
|---|
| 597 | if (of->mmapped) | 
|---|
| 598 | on->nr_mmapped--; | 
|---|
| 599 | list_del(entry: &of->list); | 
|---|
| 600 | } | 
|---|
| 601 |  | 
|---|
| 602 | if (list_empty(head: &on->files)) { | 
|---|
| 603 | rcu_assign_pointer(kn->attr.open, NULL); | 
|---|
| 604 | kfree_rcu(on, rcu_head); | 
|---|
| 605 | } | 
|---|
| 606 |  | 
|---|
| 607 | mutex_unlock(lock: mutex); | 
|---|
| 608 | } | 
|---|
| 609 |  | 
|---|
| 610 | static int kernfs_fop_open(struct inode *inode, struct file *file) | 
|---|
| 611 | { | 
|---|
| 612 | struct kernfs_node *kn = inode->i_private; | 
|---|
| 613 | struct kernfs_root *root = kernfs_root(kn); | 
|---|
| 614 | const struct kernfs_ops *ops; | 
|---|
| 615 | struct kernfs_open_file *of; | 
|---|
| 616 | bool has_read, has_write, has_mmap; | 
|---|
| 617 | int error = -EACCES; | 
|---|
| 618 |  | 
|---|
| 619 | if (!kernfs_get_active(kn)) | 
|---|
| 620 | return -ENODEV; | 
|---|
| 621 |  | 
|---|
| 622 | ops = kernfs_ops(kn); | 
|---|
| 623 |  | 
|---|
| 624 | has_read = ops->seq_show || ops->read || ops->mmap; | 
|---|
| 625 | has_write = ops->write || ops->mmap; | 
|---|
| 626 | has_mmap = ops->mmap; | 
|---|
| 627 |  | 
|---|
| 628 | /* see the flag definition for details */ | 
|---|
| 629 | if (root->flags & KERNFS_ROOT_EXTRA_OPEN_PERM_CHECK) { | 
|---|
| 630 | if ((file->f_mode & FMODE_WRITE) && | 
|---|
| 631 | (!(inode->i_mode & S_IWUGO) || !has_write)) | 
|---|
| 632 | goto err_out; | 
|---|
| 633 |  | 
|---|
| 634 | if ((file->f_mode & FMODE_READ) && | 
|---|
| 635 | (!(inode->i_mode & S_IRUGO) || !has_read)) | 
|---|
| 636 | goto err_out; | 
|---|
| 637 | } | 
|---|
| 638 |  | 
|---|
| 639 | /* allocate a kernfs_open_file for the file */ | 
|---|
| 640 | error = -ENOMEM; | 
|---|
| 641 | of = kzalloc(sizeof(struct kernfs_open_file), GFP_KERNEL); | 
|---|
| 642 | if (!of) | 
|---|
| 643 | goto err_out; | 
|---|
| 644 |  | 
|---|
| 645 | /* | 
|---|
| 646 | * The following is done to give a different lockdep key to | 
|---|
| 647 | * @of->mutex for files which implement mmap.  This is a rather | 
|---|
| 648 | * crude way to avoid false positive lockdep warning around | 
|---|
| 649 | * mm->mmap_lock - mmap nests @of->mutex under mm->mmap_lock and | 
|---|
| 650 | * reading /sys/block/sda/trace/act_mask grabs sr_mutex, under | 
|---|
| 651 | * which mm->mmap_lock nests, while holding @of->mutex.  As each | 
|---|
| 652 | * open file has a separate mutex, it's okay as long as those don't | 
|---|
| 653 | * happen on the same file.  At this point, we can't easily give | 
|---|
| 654 | * each file a separate locking class.  Let's differentiate on | 
|---|
| 655 | * whether the file has mmap or not for now. | 
|---|
| 656 | * | 
|---|
| 657 | * For similar reasons, writable and readonly files are given different | 
|---|
| 658 | * lockdep key, because the writable file /sys/power/resume may call vfs | 
|---|
| 659 | * lookup helpers for arbitrary paths and readonly files can be read by | 
|---|
| 660 | * overlayfs from vfs helpers when sysfs is a lower layer of overalyfs. | 
|---|
| 661 | * | 
|---|
| 662 | * All three cases look the same.  They're supposed to | 
|---|
| 663 | * look that way and give @of->mutex different static lockdep keys. | 
|---|
| 664 | */ | 
|---|
| 665 | if (has_mmap) | 
|---|
| 666 | mutex_init(&of->mutex); | 
|---|
| 667 | else if (file->f_mode & FMODE_WRITE) | 
|---|
| 668 | mutex_init(&of->mutex); | 
|---|
| 669 | else | 
|---|
| 670 | mutex_init(&of->mutex); | 
|---|
| 671 |  | 
|---|
| 672 | of->kn = kn; | 
|---|
| 673 | of->file = file; | 
|---|
| 674 |  | 
|---|
| 675 | /* | 
|---|
| 676 | * Write path needs to atomic_write_len outside active reference. | 
|---|
| 677 | * Cache it in open_file.  See kernfs_fop_write_iter() for details. | 
|---|
| 678 | */ | 
|---|
| 679 | of->atomic_write_len = ops->atomic_write_len; | 
|---|
| 680 |  | 
|---|
| 681 | error = -EINVAL; | 
|---|
| 682 | /* | 
|---|
| 683 | * ->seq_show is incompatible with ->prealloc, | 
|---|
| 684 | * as seq_read does its own allocation. | 
|---|
| 685 | * ->read must be used instead. | 
|---|
| 686 | */ | 
|---|
| 687 | if (ops->prealloc && ops->seq_show) | 
|---|
| 688 | goto err_free; | 
|---|
| 689 | if (ops->prealloc) { | 
|---|
| 690 | int len = of->atomic_write_len ?: PAGE_SIZE; | 
|---|
| 691 | of->prealloc_buf = kmalloc(len + 1, GFP_KERNEL); | 
|---|
| 692 | error = -ENOMEM; | 
|---|
| 693 | if (!of->prealloc_buf) | 
|---|
| 694 | goto err_free; | 
|---|
| 695 | mutex_init(&of->prealloc_mutex); | 
|---|
| 696 | } | 
|---|
| 697 |  | 
|---|
| 698 | /* | 
|---|
| 699 | * Always instantiate seq_file even if read access doesn't use | 
|---|
| 700 | * seq_file or is not requested.  This unifies private data access | 
|---|
| 701 | * and readable regular files are the vast majority anyway. | 
|---|
| 702 | */ | 
|---|
| 703 | if (ops->seq_show) | 
|---|
| 704 | error = seq_open(file, &kernfs_seq_ops); | 
|---|
| 705 | else | 
|---|
| 706 | error = seq_open(file, NULL); | 
|---|
| 707 | if (error) | 
|---|
| 708 | goto err_free; | 
|---|
| 709 |  | 
|---|
| 710 | of->seq_file = file->private_data; | 
|---|
| 711 | of->seq_file->private = of; | 
|---|
| 712 |  | 
|---|
| 713 | /* seq_file clears PWRITE unconditionally, restore it if WRITE */ | 
|---|
| 714 | if (file->f_mode & FMODE_WRITE) | 
|---|
| 715 | file->f_mode |= FMODE_PWRITE; | 
|---|
| 716 |  | 
|---|
| 717 | /* make sure we have open node struct */ | 
|---|
| 718 | error = kernfs_get_open_node(kn, of); | 
|---|
| 719 | if (error) | 
|---|
| 720 | goto err_seq_release; | 
|---|
| 721 |  | 
|---|
| 722 | if (ops->open) { | 
|---|
| 723 | /* nobody has access to @of yet, skip @of->mutex */ | 
|---|
| 724 | error = ops->open(of); | 
|---|
| 725 | if (error) | 
|---|
| 726 | goto err_put_node; | 
|---|
| 727 | } | 
|---|
| 728 |  | 
|---|
| 729 | /* open succeeded, put active references */ | 
|---|
| 730 | kernfs_put_active(kn); | 
|---|
| 731 | return 0; | 
|---|
| 732 |  | 
|---|
| 733 | err_put_node: | 
|---|
| 734 | kernfs_unlink_open_file(kn, of, open_failed: true); | 
|---|
| 735 | err_seq_release: | 
|---|
| 736 | seq_release(inode, file); | 
|---|
| 737 | err_free: | 
|---|
| 738 | kfree(objp: of->prealloc_buf); | 
|---|
| 739 | kfree(objp: of); | 
|---|
| 740 | err_out: | 
|---|
| 741 | kernfs_put_active(kn); | 
|---|
| 742 | return error; | 
|---|
| 743 | } | 
|---|
| 744 |  | 
|---|
| 745 | /* used from release/drain to ensure that ->release() is called exactly once */ | 
|---|
| 746 | static void kernfs_release_file(struct kernfs_node *kn, | 
|---|
| 747 | struct kernfs_open_file *of) | 
|---|
| 748 | { | 
|---|
| 749 | /* | 
|---|
| 750 | * @of is guaranteed to have no other file operations in flight and | 
|---|
| 751 | * we just want to synchronize release and drain paths. | 
|---|
| 752 | * @kernfs_open_file_mutex_ptr(kn) is enough. @of->mutex can't be used | 
|---|
| 753 | * here because drain path may be called from places which can | 
|---|
| 754 | * cause circular dependency. | 
|---|
| 755 | */ | 
|---|
| 756 | lockdep_assert_held(kernfs_open_file_mutex_ptr(kn)); | 
|---|
| 757 |  | 
|---|
| 758 | if (!of->released) { | 
|---|
| 759 | /* | 
|---|
| 760 | * A file is never detached without being released and we | 
|---|
| 761 | * need to be able to release files which are deactivated | 
|---|
| 762 | * and being drained.  Don't use kernfs_ops(). | 
|---|
| 763 | */ | 
|---|
| 764 | kn->attr.ops->release(of); | 
|---|
| 765 | of->released = true; | 
|---|
| 766 | of_on(of)->nr_to_release--; | 
|---|
| 767 | } | 
|---|
| 768 | } | 
|---|
| 769 |  | 
|---|
| 770 | static int kernfs_fop_release(struct inode *inode, struct file *filp) | 
|---|
| 771 | { | 
|---|
| 772 | struct kernfs_node *kn = inode->i_private; | 
|---|
| 773 | struct kernfs_open_file *of = kernfs_of(file: filp); | 
|---|
| 774 |  | 
|---|
| 775 | if (kn->flags & KERNFS_HAS_RELEASE) { | 
|---|
| 776 | struct mutex *mutex; | 
|---|
| 777 |  | 
|---|
| 778 | mutex = kernfs_open_file_mutex_lock(kn); | 
|---|
| 779 | kernfs_release_file(kn, of); | 
|---|
| 780 | mutex_unlock(lock: mutex); | 
|---|
| 781 | } | 
|---|
| 782 |  | 
|---|
| 783 | kernfs_unlink_open_file(kn, of, open_failed: false); | 
|---|
| 784 | seq_release(inode, filp); | 
|---|
| 785 | kfree(objp: of->prealloc_buf); | 
|---|
| 786 | kfree(objp: of); | 
|---|
| 787 |  | 
|---|
| 788 | return 0; | 
|---|
| 789 | } | 
|---|
| 790 |  | 
|---|
| 791 | bool kernfs_should_drain_open_files(struct kernfs_node *kn) | 
|---|
| 792 | { | 
|---|
| 793 | struct kernfs_open_node *on; | 
|---|
| 794 | bool ret; | 
|---|
| 795 |  | 
|---|
| 796 | /* | 
|---|
| 797 | * @kn being deactivated guarantees that @kn->attr.open can't change | 
|---|
| 798 | * beneath us making the lockless test below safe. | 
|---|
| 799 | * Callers post kernfs_unbreak_active_protection may be counted in | 
|---|
| 800 | * kn->active by now, do not WARN_ON because of them. | 
|---|
| 801 | */ | 
|---|
| 802 |  | 
|---|
| 803 | rcu_read_lock(); | 
|---|
| 804 | on = rcu_dereference(kn->attr.open); | 
|---|
| 805 | ret = on && (on->nr_mmapped || on->nr_to_release); | 
|---|
| 806 | rcu_read_unlock(); | 
|---|
| 807 |  | 
|---|
| 808 | return ret; | 
|---|
| 809 | } | 
|---|
| 810 |  | 
|---|
| 811 | void kernfs_drain_open_files(struct kernfs_node *kn) | 
|---|
| 812 | { | 
|---|
| 813 | struct kernfs_open_node *on; | 
|---|
| 814 | struct kernfs_open_file *of; | 
|---|
| 815 | struct mutex *mutex; | 
|---|
| 816 |  | 
|---|
| 817 | mutex = kernfs_open_file_mutex_lock(kn); | 
|---|
| 818 | on = kernfs_deref_open_node_locked(kn); | 
|---|
| 819 | if (!on) { | 
|---|
| 820 | mutex_unlock(lock: mutex); | 
|---|
| 821 | return; | 
|---|
| 822 | } | 
|---|
| 823 |  | 
|---|
| 824 | list_for_each_entry(of, &on->files, list) { | 
|---|
| 825 | struct inode *inode = file_inode(f: of->file); | 
|---|
| 826 |  | 
|---|
| 827 | if (of->mmapped) { | 
|---|
| 828 | unmap_mapping_range(mapping: inode->i_mapping, holebegin: 0, holelen: 0, even_cows: 1); | 
|---|
| 829 | of->mmapped = false; | 
|---|
| 830 | on->nr_mmapped--; | 
|---|
| 831 | } | 
|---|
| 832 |  | 
|---|
| 833 | if (kn->flags & KERNFS_HAS_RELEASE) | 
|---|
| 834 | kernfs_release_file(kn, of); | 
|---|
| 835 | } | 
|---|
| 836 |  | 
|---|
| 837 | WARN_ON_ONCE(on->nr_mmapped || on->nr_to_release); | 
|---|
| 838 | mutex_unlock(lock: mutex); | 
|---|
| 839 | } | 
|---|
| 840 |  | 
|---|
| 841 | /* | 
|---|
| 842 | * Kernfs attribute files are pollable.  The idea is that you read | 
|---|
| 843 | * the content and then you use 'poll' or 'select' to wait for | 
|---|
| 844 | * the content to change.  When the content changes (assuming the | 
|---|
| 845 | * manager for the kobject supports notification), poll will | 
|---|
| 846 | * return EPOLLERR|EPOLLPRI, and select will return the fd whether | 
|---|
| 847 | * it is waiting for read, write, or exceptions. | 
|---|
| 848 | * Once poll/select indicates that the value has changed, you | 
|---|
| 849 | * need to close and re-open the file, or seek to 0 and read again. | 
|---|
| 850 | * Reminder: this only works for attributes which actively support | 
|---|
| 851 | * it, and it is not possible to test an attribute from userspace | 
|---|
| 852 | * to see if it supports poll (Neither 'poll' nor 'select' return | 
|---|
| 853 | * an appropriate error code).  When in doubt, set a suitable timeout value. | 
|---|
| 854 | */ | 
|---|
| 855 | __poll_t kernfs_generic_poll(struct kernfs_open_file *of, poll_table *wait) | 
|---|
| 856 | { | 
|---|
| 857 | struct kernfs_open_node *on = of_on(of); | 
|---|
| 858 |  | 
|---|
| 859 | poll_wait(filp: of->file, wait_address: &on->poll, p: wait); | 
|---|
| 860 |  | 
|---|
| 861 | if (of->event != atomic_read(v: &on->event)) | 
|---|
| 862 | return DEFAULT_POLLMASK|EPOLLERR|EPOLLPRI; | 
|---|
| 863 |  | 
|---|
| 864 | return DEFAULT_POLLMASK; | 
|---|
| 865 | } | 
|---|
| 866 |  | 
|---|
| 867 | static __poll_t kernfs_fop_poll(struct file *filp, poll_table *wait) | 
|---|
| 868 | { | 
|---|
| 869 | struct kernfs_open_file *of = kernfs_of(file: filp); | 
|---|
| 870 | struct kernfs_node *kn = kernfs_dentry_node(dentry: filp->f_path.dentry); | 
|---|
| 871 | __poll_t ret; | 
|---|
| 872 |  | 
|---|
| 873 | if (!kernfs_get_active_of(of)) | 
|---|
| 874 | return DEFAULT_POLLMASK|EPOLLERR|EPOLLPRI; | 
|---|
| 875 |  | 
|---|
| 876 | if (kn->attr.ops->poll) | 
|---|
| 877 | ret = kn->attr.ops->poll(of, wait); | 
|---|
| 878 | else | 
|---|
| 879 | ret = kernfs_generic_poll(of, wait); | 
|---|
| 880 |  | 
|---|
| 881 | kernfs_put_active_of(of); | 
|---|
| 882 | return ret; | 
|---|
| 883 | } | 
|---|
| 884 |  | 
|---|
| 885 | static loff_t kernfs_fop_llseek(struct file *file, loff_t offset, int whence) | 
|---|
| 886 | { | 
|---|
| 887 | struct kernfs_open_file *of = kernfs_of(file); | 
|---|
| 888 | const struct kernfs_ops *ops; | 
|---|
| 889 | loff_t ret; | 
|---|
| 890 |  | 
|---|
| 891 | /* | 
|---|
| 892 | * @of->mutex nests outside active ref and is primarily to ensure that | 
|---|
| 893 | * the ops aren't called concurrently for the same open file. | 
|---|
| 894 | */ | 
|---|
| 895 | mutex_lock(lock: &of->mutex); | 
|---|
| 896 | if (!kernfs_get_active_of(of)) { | 
|---|
| 897 | mutex_unlock(lock: &of->mutex); | 
|---|
| 898 | return -ENODEV; | 
|---|
| 899 | } | 
|---|
| 900 |  | 
|---|
| 901 | ops = kernfs_ops(kn: of->kn); | 
|---|
| 902 | if (ops->llseek) | 
|---|
| 903 | ret = ops->llseek(of, offset, whence); | 
|---|
| 904 | else | 
|---|
| 905 | ret = generic_file_llseek(file, offset, whence); | 
|---|
| 906 |  | 
|---|
| 907 | kernfs_put_active_of(of); | 
|---|
| 908 | mutex_unlock(lock: &of->mutex); | 
|---|
| 909 | return ret; | 
|---|
| 910 | } | 
|---|
| 911 |  | 
|---|
| 912 | static void kernfs_notify_workfn(struct work_struct *work) | 
|---|
| 913 | { | 
|---|
| 914 | struct kernfs_node *kn; | 
|---|
| 915 | struct kernfs_super_info *info; | 
|---|
| 916 | struct kernfs_root *root; | 
|---|
| 917 | repeat: | 
|---|
| 918 | /* pop one off the notify_list */ | 
|---|
| 919 | spin_lock_irq(lock: &kernfs_notify_lock); | 
|---|
| 920 | kn = kernfs_notify_list; | 
|---|
| 921 | if (kn == KERNFS_NOTIFY_EOL) { | 
|---|
| 922 | spin_unlock_irq(lock: &kernfs_notify_lock); | 
|---|
| 923 | return; | 
|---|
| 924 | } | 
|---|
| 925 | kernfs_notify_list = kn->attr.notify_next; | 
|---|
| 926 | kn->attr.notify_next = NULL; | 
|---|
| 927 | spin_unlock_irq(lock: &kernfs_notify_lock); | 
|---|
| 928 |  | 
|---|
| 929 | root = kernfs_root(kn); | 
|---|
| 930 | /* kick fsnotify */ | 
|---|
| 931 |  | 
|---|
| 932 | down_read(sem: &root->kernfs_supers_rwsem); | 
|---|
| 933 | down_read(sem: &root->kernfs_rwsem); | 
|---|
| 934 | list_for_each_entry(info, &kernfs_root(kn)->supers, node) { | 
|---|
| 935 | struct kernfs_node *parent; | 
|---|
| 936 | struct inode *p_inode = NULL; | 
|---|
| 937 | const char *kn_name; | 
|---|
| 938 | struct inode *inode; | 
|---|
| 939 | struct qstr name; | 
|---|
| 940 |  | 
|---|
| 941 | /* | 
|---|
| 942 | * We want fsnotify_modify() on @kn but as the | 
|---|
| 943 | * modifications aren't originating from userland don't | 
|---|
| 944 | * have the matching @file available.  Look up the inodes | 
|---|
| 945 | * and generate the events manually. | 
|---|
| 946 | */ | 
|---|
| 947 | inode = ilookup(sb: info->sb, ino: kernfs_ino(kn)); | 
|---|
| 948 | if (!inode) | 
|---|
| 949 | continue; | 
|---|
| 950 |  | 
|---|
| 951 | kn_name = kernfs_rcu_name(kn); | 
|---|
| 952 | name = QSTR(kn_name); | 
|---|
| 953 | parent = kernfs_get_parent(kn); | 
|---|
| 954 | if (parent) { | 
|---|
| 955 | p_inode = ilookup(sb: info->sb, ino: kernfs_ino(kn: parent)); | 
|---|
| 956 | if (p_inode) { | 
|---|
| 957 | fsnotify(FS_MODIFY | FS_EVENT_ON_CHILD, | 
|---|
| 958 | data: inode, data_type: FSNOTIFY_EVENT_INODE, | 
|---|
| 959 | dir: p_inode, name: &name, inode, cookie: 0); | 
|---|
| 960 | iput(p_inode); | 
|---|
| 961 | } | 
|---|
| 962 |  | 
|---|
| 963 | kernfs_put(kn: parent); | 
|---|
| 964 | } | 
|---|
| 965 |  | 
|---|
| 966 | if (!p_inode) | 
|---|
| 967 | fsnotify_inode(inode, FS_MODIFY); | 
|---|
| 968 |  | 
|---|
| 969 | iput(inode); | 
|---|
| 970 | } | 
|---|
| 971 |  | 
|---|
| 972 | up_read(sem: &root->kernfs_rwsem); | 
|---|
| 973 | up_read(sem: &root->kernfs_supers_rwsem); | 
|---|
| 974 | kernfs_put(kn); | 
|---|
| 975 | goto repeat; | 
|---|
| 976 | } | 
|---|
| 977 |  | 
|---|
| 978 | /** | 
|---|
| 979 | * kernfs_notify - notify a kernfs file | 
|---|
| 980 | * @kn: file to notify | 
|---|
| 981 | * | 
|---|
| 982 | * Notify @kn such that poll(2) on @kn wakes up.  Maybe be called from any | 
|---|
| 983 | * context. | 
|---|
| 984 | */ | 
|---|
| 985 | void kernfs_notify(struct kernfs_node *kn) | 
|---|
| 986 | { | 
|---|
| 987 | static DECLARE_WORK(kernfs_notify_work, kernfs_notify_workfn); | 
|---|
| 988 | unsigned long flags; | 
|---|
| 989 | struct kernfs_open_node *on; | 
|---|
| 990 |  | 
|---|
| 991 | if (WARN_ON(kernfs_type(kn) != KERNFS_FILE)) | 
|---|
| 992 | return; | 
|---|
| 993 |  | 
|---|
| 994 | /* kick poll immediately */ | 
|---|
| 995 | rcu_read_lock(); | 
|---|
| 996 | on = rcu_dereference(kn->attr.open); | 
|---|
| 997 | if (on) { | 
|---|
| 998 | atomic_inc(v: &on->event); | 
|---|
| 999 | wake_up_interruptible(&on->poll); | 
|---|
| 1000 | } | 
|---|
| 1001 | rcu_read_unlock(); | 
|---|
| 1002 |  | 
|---|
| 1003 | /* schedule work to kick fsnotify */ | 
|---|
| 1004 | spin_lock_irqsave(&kernfs_notify_lock, flags); | 
|---|
| 1005 | if (!kn->attr.notify_next) { | 
|---|
| 1006 | kernfs_get(kn); | 
|---|
| 1007 | kn->attr.notify_next = kernfs_notify_list; | 
|---|
| 1008 | kernfs_notify_list = kn; | 
|---|
| 1009 | schedule_work(work: &kernfs_notify_work); | 
|---|
| 1010 | } | 
|---|
| 1011 | spin_unlock_irqrestore(lock: &kernfs_notify_lock, flags); | 
|---|
| 1012 | } | 
|---|
| 1013 | EXPORT_SYMBOL_GPL(kernfs_notify); | 
|---|
| 1014 |  | 
|---|
| 1015 | const struct file_operations kernfs_file_fops = { | 
|---|
| 1016 | .read_iter	= kernfs_fop_read_iter, | 
|---|
| 1017 | .write_iter	= kernfs_fop_write_iter, | 
|---|
| 1018 | .llseek		= kernfs_fop_llseek, | 
|---|
| 1019 | .mmap		= kernfs_fop_mmap, | 
|---|
| 1020 | .open		= kernfs_fop_open, | 
|---|
| 1021 | .release	= kernfs_fop_release, | 
|---|
| 1022 | .poll		= kernfs_fop_poll, | 
|---|
| 1023 | .fsync		= noop_fsync, | 
|---|
| 1024 | .splice_read	= copy_splice_read, | 
|---|
| 1025 | .splice_write	= iter_file_splice_write, | 
|---|
| 1026 | }; | 
|---|
| 1027 |  | 
|---|
| 1028 | /** | 
|---|
| 1029 | * __kernfs_create_file - kernfs internal function to create a file | 
|---|
| 1030 | * @parent: directory to create the file in | 
|---|
| 1031 | * @name: name of the file | 
|---|
| 1032 | * @mode: mode of the file | 
|---|
| 1033 | * @uid: uid of the file | 
|---|
| 1034 | * @gid: gid of the file | 
|---|
| 1035 | * @size: size of the file | 
|---|
| 1036 | * @ops: kernfs operations for the file | 
|---|
| 1037 | * @priv: private data for the file | 
|---|
| 1038 | * @ns: optional namespace tag of the file | 
|---|
| 1039 | * @key: lockdep key for the file's active_ref, %NULL to disable lockdep | 
|---|
| 1040 | * | 
|---|
| 1041 | * Return: the created node on success, ERR_PTR() value on error. | 
|---|
| 1042 | */ | 
|---|
| 1043 | struct kernfs_node *__kernfs_create_file(struct kernfs_node *parent, | 
|---|
| 1044 | const char *name, | 
|---|
| 1045 | umode_t mode, kuid_t uid, kgid_t gid, | 
|---|
| 1046 | loff_t size, | 
|---|
| 1047 | const struct kernfs_ops *ops, | 
|---|
| 1048 | void *priv, const void *ns, | 
|---|
| 1049 | struct lock_class_key *key) | 
|---|
| 1050 | { | 
|---|
| 1051 | struct kernfs_node *kn; | 
|---|
| 1052 | unsigned flags; | 
|---|
| 1053 | int rc; | 
|---|
| 1054 |  | 
|---|
| 1055 | flags = KERNFS_FILE; | 
|---|
| 1056 |  | 
|---|
| 1057 | kn = kernfs_new_node(parent, name, mode: (mode & S_IALLUGO) | S_IFREG, | 
|---|
| 1058 | uid, gid, flags); | 
|---|
| 1059 | if (!kn) | 
|---|
| 1060 | return ERR_PTR(error: -ENOMEM); | 
|---|
| 1061 |  | 
|---|
| 1062 | kn->attr.ops = ops; | 
|---|
| 1063 | kn->attr.size = size; | 
|---|
| 1064 | kn->ns = ns; | 
|---|
| 1065 | kn->priv = priv; | 
|---|
| 1066 |  | 
|---|
| 1067 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | 
|---|
| 1068 | if (key) { | 
|---|
| 1069 | lockdep_init_map(&kn->dep_map, "kn->active", key, 0); | 
|---|
| 1070 | kn->flags |= KERNFS_LOCKDEP; | 
|---|
| 1071 | } | 
|---|
| 1072 | #endif | 
|---|
| 1073 |  | 
|---|
| 1074 | /* | 
|---|
| 1075 | * kn->attr.ops is accessible only while holding active ref.  We | 
|---|
| 1076 | * need to know whether some ops are implemented outside active | 
|---|
| 1077 | * ref.  Cache their existence in flags. | 
|---|
| 1078 | */ | 
|---|
| 1079 | if (ops->seq_show) | 
|---|
| 1080 | kn->flags |= KERNFS_HAS_SEQ_SHOW; | 
|---|
| 1081 | if (ops->mmap) | 
|---|
| 1082 | kn->flags |= KERNFS_HAS_MMAP; | 
|---|
| 1083 | if (ops->release) | 
|---|
| 1084 | kn->flags |= KERNFS_HAS_RELEASE; | 
|---|
| 1085 |  | 
|---|
| 1086 | rc = kernfs_add_one(kn); | 
|---|
| 1087 | if (rc) { | 
|---|
| 1088 | kernfs_put(kn); | 
|---|
| 1089 | return ERR_PTR(error: rc); | 
|---|
| 1090 | } | 
|---|
| 1091 | return kn; | 
|---|
| 1092 | } | 
|---|
| 1093 |  | 
|---|