| 1 | // SPDX-License-Identifier: GPL-2.0-or-later | 
|---|
| 2 | /* | 
|---|
| 3 | *  Copyright (C) 2008 Red Hat, Inc., Eric Paris <eparis@redhat.com> | 
|---|
| 4 | */ | 
|---|
| 5 |  | 
|---|
| 6 | #include <linux/dcache.h> | 
|---|
| 7 | #include <linux/fs.h> | 
|---|
| 8 | #include <linux/gfp.h> | 
|---|
| 9 | #include <linux/init.h> | 
|---|
| 10 | #include <linux/module.h> | 
|---|
| 11 | #include <linux/mount.h> | 
|---|
| 12 | #include <linux/srcu.h> | 
|---|
| 13 |  | 
|---|
| 14 | #include <linux/fsnotify_backend.h> | 
|---|
| 15 | #include "fsnotify.h" | 
|---|
| 16 |  | 
|---|
| 17 | /* | 
|---|
| 18 | * Clear all of the marks on an inode when it is being evicted from core | 
|---|
| 19 | */ | 
|---|
| 20 | void __fsnotify_inode_delete(struct inode *inode) | 
|---|
| 21 | { | 
|---|
| 22 | fsnotify_clear_marks_by_inode(inode); | 
|---|
| 23 | } | 
|---|
| 24 | EXPORT_SYMBOL_GPL(__fsnotify_inode_delete); | 
|---|
| 25 |  | 
|---|
| 26 | void __fsnotify_vfsmount_delete(struct vfsmount *mnt) | 
|---|
| 27 | { | 
|---|
| 28 | fsnotify_clear_marks_by_mount(mnt); | 
|---|
| 29 | } | 
|---|
| 30 |  | 
|---|
| 31 | void __fsnotify_mntns_delete(struct mnt_namespace *mntns) | 
|---|
| 32 | { | 
|---|
| 33 | fsnotify_clear_marks_by_mntns(mntns); | 
|---|
| 34 | } | 
|---|
| 35 |  | 
|---|
| 36 | /** | 
|---|
| 37 | * fsnotify_unmount_inodes - an sb is unmounting.  handle any watched inodes. | 
|---|
| 38 | * @sb: superblock being unmounted. | 
|---|
| 39 | * | 
|---|
| 40 | * Called during unmount with no locks held, so needs to be safe against | 
|---|
| 41 | * concurrent modifiers. We temporarily drop sb->s_inode_list_lock and CAN block. | 
|---|
| 42 | */ | 
|---|
| 43 | static void fsnotify_unmount_inodes(struct super_block *sb) | 
|---|
| 44 | { | 
|---|
| 45 | struct inode *inode, *iput_inode = NULL; | 
|---|
| 46 |  | 
|---|
| 47 | spin_lock(lock: &sb->s_inode_list_lock); | 
|---|
| 48 | list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { | 
|---|
| 49 | /* | 
|---|
| 50 | * We cannot __iget() an inode in state I_FREEING, | 
|---|
| 51 | * I_WILL_FREE, or I_NEW which is fine because by that point | 
|---|
| 52 | * the inode cannot have any associated watches. | 
|---|
| 53 | */ | 
|---|
| 54 | spin_lock(lock: &inode->i_lock); | 
|---|
| 55 | if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) { | 
|---|
| 56 | spin_unlock(lock: &inode->i_lock); | 
|---|
| 57 | continue; | 
|---|
| 58 | } | 
|---|
| 59 |  | 
|---|
| 60 | /* | 
|---|
| 61 | * If i_count is zero, the inode cannot have any watches and | 
|---|
| 62 | * doing an __iget/iput with SB_ACTIVE clear would actually | 
|---|
| 63 | * evict all inodes with zero i_count from icache which is | 
|---|
| 64 | * unnecessarily violent and may in fact be illegal to do. | 
|---|
| 65 | * However, we should have been called /after/ evict_inodes | 
|---|
| 66 | * removed all zero refcount inodes, in any case.  Test to | 
|---|
| 67 | * be sure. | 
|---|
| 68 | */ | 
|---|
| 69 | if (!icount_read(inode)) { | 
|---|
| 70 | spin_unlock(lock: &inode->i_lock); | 
|---|
| 71 | continue; | 
|---|
| 72 | } | 
|---|
| 73 |  | 
|---|
| 74 | __iget(inode); | 
|---|
| 75 | spin_unlock(lock: &inode->i_lock); | 
|---|
| 76 | spin_unlock(lock: &sb->s_inode_list_lock); | 
|---|
| 77 |  | 
|---|
| 78 | iput(iput_inode); | 
|---|
| 79 |  | 
|---|
| 80 | /* for each watch, send FS_UNMOUNT and then remove it */ | 
|---|
| 81 | fsnotify_inode(inode, FS_UNMOUNT); | 
|---|
| 82 |  | 
|---|
| 83 | fsnotify_inode_delete(inode); | 
|---|
| 84 |  | 
|---|
| 85 | iput_inode = inode; | 
|---|
| 86 |  | 
|---|
| 87 | cond_resched(); | 
|---|
| 88 | spin_lock(lock: &sb->s_inode_list_lock); | 
|---|
| 89 | } | 
|---|
| 90 | spin_unlock(lock: &sb->s_inode_list_lock); | 
|---|
| 91 |  | 
|---|
| 92 | iput(iput_inode); | 
|---|
| 93 | } | 
|---|
| 94 |  | 
|---|
| 95 | void fsnotify_sb_delete(struct super_block *sb) | 
|---|
| 96 | { | 
|---|
| 97 | struct fsnotify_sb_info *sbinfo = fsnotify_sb_info(sb); | 
|---|
| 98 |  | 
|---|
| 99 | /* Were any marks ever added to any object on this sb? */ | 
|---|
| 100 | if (!sbinfo) | 
|---|
| 101 | return; | 
|---|
| 102 |  | 
|---|
| 103 | fsnotify_unmount_inodes(sb); | 
|---|
| 104 | fsnotify_clear_marks_by_sb(sb); | 
|---|
| 105 | /* Wait for outstanding object references from connectors */ | 
|---|
| 106 | wait_var_event(fsnotify_sb_watched_objects(sb), | 
|---|
| 107 | !atomic_long_read(fsnotify_sb_watched_objects(sb))); | 
|---|
| 108 | WARN_ON(fsnotify_sb_has_priority_watchers(sb, FSNOTIFY_PRIO_CONTENT)); | 
|---|
| 109 | WARN_ON(fsnotify_sb_has_priority_watchers(sb, | 
|---|
| 110 | FSNOTIFY_PRIO_PRE_CONTENT)); | 
|---|
| 111 | } | 
|---|
| 112 |  | 
|---|
| 113 | void fsnotify_sb_free(struct super_block *sb) | 
|---|
| 114 | { | 
|---|
| 115 | kfree(objp: sb->s_fsnotify_info); | 
|---|
| 116 | } | 
|---|
| 117 |  | 
|---|
| 118 | /* | 
|---|
| 119 | * Given an inode, first check if we care what happens to our children.  Inotify | 
|---|
| 120 | * and dnotify both tell their parents about events.  If we care about any event | 
|---|
| 121 | * on a child we run all of our children and set a dentry flag saying that the | 
|---|
| 122 | * parent cares.  Thus when an event happens on a child it can quickly tell | 
|---|
| 123 | * if there is a need to find a parent and send the event to the parent. | 
|---|
| 124 | */ | 
|---|
| 125 | void fsnotify_set_children_dentry_flags(struct inode *inode) | 
|---|
| 126 | { | 
|---|
| 127 | struct dentry *alias; | 
|---|
| 128 |  | 
|---|
| 129 | if (!S_ISDIR(inode->i_mode)) | 
|---|
| 130 | return; | 
|---|
| 131 |  | 
|---|
| 132 | spin_lock(lock: &inode->i_lock); | 
|---|
| 133 | /* run all of the dentries associated with this inode.  Since this is a | 
|---|
| 134 | * directory, there damn well better only be one item on this list */ | 
|---|
| 135 | hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) { | 
|---|
| 136 | struct dentry *child; | 
|---|
| 137 |  | 
|---|
| 138 | /* run all of the children of the original inode and fix their | 
|---|
| 139 | * d_flags to indicate parental interest (their parent is the | 
|---|
| 140 | * original inode) */ | 
|---|
| 141 | spin_lock(lock: &alias->d_lock); | 
|---|
| 142 | hlist_for_each_entry(child, &alias->d_children, d_sib) { | 
|---|
| 143 | if (!child->d_inode) | 
|---|
| 144 | continue; | 
|---|
| 145 |  | 
|---|
| 146 | spin_lock_nested(&child->d_lock, DENTRY_D_LOCK_NESTED); | 
|---|
| 147 | child->d_flags |= DCACHE_FSNOTIFY_PARENT_WATCHED; | 
|---|
| 148 | spin_unlock(lock: &child->d_lock); | 
|---|
| 149 | } | 
|---|
| 150 | spin_unlock(lock: &alias->d_lock); | 
|---|
| 151 | } | 
|---|
| 152 | spin_unlock(lock: &inode->i_lock); | 
|---|
| 153 | } | 
|---|
| 154 |  | 
|---|
| 155 | /* | 
|---|
| 156 | * Lazily clear false positive PARENT_WATCHED flag for child whose parent had | 
|---|
| 157 | * stopped watching children. | 
|---|
| 158 | */ | 
|---|
| 159 | static void fsnotify_clear_child_dentry_flag(struct inode *pinode, | 
|---|
| 160 | struct dentry *dentry) | 
|---|
| 161 | { | 
|---|
| 162 | spin_lock(lock: &dentry->d_lock); | 
|---|
| 163 | /* | 
|---|
| 164 | * d_lock is a sufficient barrier to prevent observing a non-watched | 
|---|
| 165 | * parent state from before the fsnotify_set_children_dentry_flags() | 
|---|
| 166 | * or fsnotify_update_flags() call that had set PARENT_WATCHED. | 
|---|
| 167 | */ | 
|---|
| 168 | if (!fsnotify_inode_watches_children(inode: pinode)) | 
|---|
| 169 | dentry->d_flags &= ~DCACHE_FSNOTIFY_PARENT_WATCHED; | 
|---|
| 170 | spin_unlock(lock: &dentry->d_lock); | 
|---|
| 171 | } | 
|---|
| 172 |  | 
|---|
| 173 | /* Are inode/sb/mount interested in parent and name info with this event? */ | 
|---|
| 174 | static bool fsnotify_event_needs_parent(struct inode *inode, __u32 mnt_mask, | 
|---|
| 175 | __u32 mask) | 
|---|
| 176 | { | 
|---|
| 177 | __u32 marks_mask = 0; | 
|---|
| 178 |  | 
|---|
| 179 | /* We only send parent/name to inode/sb/mount for events on non-dir */ | 
|---|
| 180 | if (mask & FS_ISDIR) | 
|---|
| 181 | return false; | 
|---|
| 182 |  | 
|---|
| 183 | /* | 
|---|
| 184 | * All events that are possible on child can also may be reported with | 
|---|
| 185 | * parent/name info to inode/sb/mount.  Otherwise, a watching parent | 
|---|
| 186 | * could result in events reported with unexpected name info to sb/mount. | 
|---|
| 187 | */ | 
|---|
| 188 | BUILD_BUG_ON(FS_EVENTS_POSS_ON_CHILD & ~FS_EVENTS_POSS_TO_PARENT); | 
|---|
| 189 |  | 
|---|
| 190 | /* Did either inode/sb/mount subscribe for events with parent/name? */ | 
|---|
| 191 | marks_mask |= fsnotify_parent_needed_mask( | 
|---|
| 192 | READ_ONCE(inode->i_fsnotify_mask)); | 
|---|
| 193 | marks_mask |= fsnotify_parent_needed_mask( | 
|---|
| 194 | READ_ONCE(inode->i_sb->s_fsnotify_mask)); | 
|---|
| 195 | marks_mask |= fsnotify_parent_needed_mask(mask: mnt_mask); | 
|---|
| 196 |  | 
|---|
| 197 | /* Did they subscribe for this event with parent/name info? */ | 
|---|
| 198 | return mask & marks_mask; | 
|---|
| 199 | } | 
|---|
| 200 |  | 
|---|
| 201 | /* Are there any inode/mount/sb objects that watch for these events? */ | 
|---|
| 202 | static inline __u32 fsnotify_object_watched(struct inode *inode, __u32 mnt_mask, | 
|---|
| 203 | __u32 mask) | 
|---|
| 204 | { | 
|---|
| 205 | __u32 marks_mask = READ_ONCE(inode->i_fsnotify_mask) | mnt_mask | | 
|---|
| 206 | READ_ONCE(inode->i_sb->s_fsnotify_mask); | 
|---|
| 207 |  | 
|---|
| 208 | return mask & marks_mask & ALL_FSNOTIFY_EVENTS; | 
|---|
| 209 | } | 
|---|
| 210 |  | 
|---|
| 211 | /* Report pre-content event with optional range info */ | 
|---|
| 212 | int fsnotify_pre_content(const struct path *path, const loff_t *ppos, | 
|---|
| 213 | size_t count) | 
|---|
| 214 | { | 
|---|
| 215 | struct file_range range; | 
|---|
| 216 |  | 
|---|
| 217 | /* Report page aligned range only when pos is known */ | 
|---|
| 218 | if (!ppos) | 
|---|
| 219 | return fsnotify_path(path, FS_PRE_ACCESS); | 
|---|
| 220 |  | 
|---|
| 221 | range.path = path; | 
|---|
| 222 | range.pos = PAGE_ALIGN_DOWN(*ppos); | 
|---|
| 223 | range.count = PAGE_ALIGN(*ppos + count) - range.pos; | 
|---|
| 224 |  | 
|---|
| 225 | return fsnotify_parent(dentry: path->dentry, FS_PRE_ACCESS, data: &range, | 
|---|
| 226 | data_type: FSNOTIFY_EVENT_FILE_RANGE); | 
|---|
| 227 | } | 
|---|
| 228 |  | 
|---|
| 229 | /* | 
|---|
| 230 | * Notify this dentry's parent about a child's events with child name info | 
|---|
| 231 | * if parent is watching or if inode/sb/mount are interested in events with | 
|---|
| 232 | * parent and name info. | 
|---|
| 233 | * | 
|---|
| 234 | * Notify only the child without name info if parent is not watching and | 
|---|
| 235 | * inode/sb/mount are not interested in events with parent and name info. | 
|---|
| 236 | */ | 
|---|
| 237 | int __fsnotify_parent(struct dentry *dentry, __u32 mask, const void *data, | 
|---|
| 238 | int data_type) | 
|---|
| 239 | { | 
|---|
| 240 | const struct path *path = fsnotify_data_path(data, data_type); | 
|---|
| 241 | __u32 mnt_mask = path ? | 
|---|
| 242 | READ_ONCE(real_mount(path->mnt)->mnt_fsnotify_mask) : 0; | 
|---|
| 243 | struct inode *inode = d_inode(dentry); | 
|---|
| 244 | struct dentry *parent; | 
|---|
| 245 | bool parent_watched = dentry->d_flags & DCACHE_FSNOTIFY_PARENT_WATCHED; | 
|---|
| 246 | bool parent_needed, parent_interested; | 
|---|
| 247 | __u32 p_mask; | 
|---|
| 248 | struct inode *p_inode = NULL; | 
|---|
| 249 | struct name_snapshot name; | 
|---|
| 250 | struct qstr *file_name = NULL; | 
|---|
| 251 | int ret = 0; | 
|---|
| 252 |  | 
|---|
| 253 | /* Optimize the likely case of nobody watching this path */ | 
|---|
| 254 | if (likely(!parent_watched && | 
|---|
| 255 | !fsnotify_object_watched(inode, mnt_mask, mask))) | 
|---|
| 256 | return 0; | 
|---|
| 257 |  | 
|---|
| 258 | parent = NULL; | 
|---|
| 259 | parent_needed = fsnotify_event_needs_parent(inode, mnt_mask, mask); | 
|---|
| 260 | if (!parent_watched && !parent_needed) | 
|---|
| 261 | goto notify; | 
|---|
| 262 |  | 
|---|
| 263 | /* Does parent inode care about events on children? */ | 
|---|
| 264 | parent = dget_parent(dentry); | 
|---|
| 265 | p_inode = parent->d_inode; | 
|---|
| 266 | p_mask = fsnotify_inode_watches_children(inode: p_inode); | 
|---|
| 267 | if (unlikely(parent_watched && !p_mask)) | 
|---|
| 268 | fsnotify_clear_child_dentry_flag(pinode: p_inode, dentry); | 
|---|
| 269 |  | 
|---|
| 270 | /* | 
|---|
| 271 | * Include parent/name in notification either if some notification | 
|---|
| 272 | * groups require parent info or the parent is interested in this event. | 
|---|
| 273 | */ | 
|---|
| 274 | parent_interested = mask & p_mask & ALL_FSNOTIFY_EVENTS; | 
|---|
| 275 | if (parent_needed || parent_interested) { | 
|---|
| 276 | /* When notifying parent, child should be passed as data */ | 
|---|
| 277 | WARN_ON_ONCE(inode != fsnotify_data_inode(data, data_type)); | 
|---|
| 278 |  | 
|---|
| 279 | /* Notify both parent and child with child name info */ | 
|---|
| 280 | take_dentry_name_snapshot(&name, dentry); | 
|---|
| 281 | file_name = &name.name; | 
|---|
| 282 | if (parent_interested) | 
|---|
| 283 | mask |= FS_EVENT_ON_CHILD; | 
|---|
| 284 | } | 
|---|
| 285 |  | 
|---|
| 286 | notify: | 
|---|
| 287 | ret = fsnotify(mask, data, data_type, dir: p_inode, name: file_name, inode, cookie: 0); | 
|---|
| 288 |  | 
|---|
| 289 | if (file_name) | 
|---|
| 290 | release_dentry_name_snapshot(&name); | 
|---|
| 291 | dput(parent); | 
|---|
| 292 |  | 
|---|
| 293 | return ret; | 
|---|
| 294 | } | 
|---|
| 295 | EXPORT_SYMBOL_GPL(__fsnotify_parent); | 
|---|
| 296 |  | 
|---|
| 297 | static int fsnotify_handle_inode_event(struct fsnotify_group *group, | 
|---|
| 298 | struct fsnotify_mark *inode_mark, | 
|---|
| 299 | u32 mask, const void *data, int data_type, | 
|---|
| 300 | struct inode *dir, const struct qstr *name, | 
|---|
| 301 | u32 cookie) | 
|---|
| 302 | { | 
|---|
| 303 | const struct path *path = fsnotify_data_path(data, data_type); | 
|---|
| 304 | struct inode *inode = fsnotify_data_inode(data, data_type); | 
|---|
| 305 | const struct fsnotify_ops *ops = group->ops; | 
|---|
| 306 |  | 
|---|
| 307 | if (WARN_ON_ONCE(!ops->handle_inode_event)) | 
|---|
| 308 | return 0; | 
|---|
| 309 |  | 
|---|
| 310 | if (WARN_ON_ONCE(!inode && !dir)) | 
|---|
| 311 | return 0; | 
|---|
| 312 |  | 
|---|
| 313 | if ((inode_mark->flags & FSNOTIFY_MARK_FLAG_EXCL_UNLINK) && | 
|---|
| 314 | path && d_unlinked(dentry: path->dentry)) | 
|---|
| 315 | return 0; | 
|---|
| 316 |  | 
|---|
| 317 | /* Check interest of this mark in case event was sent with two marks */ | 
|---|
| 318 | if (!(mask & inode_mark->mask & ALL_FSNOTIFY_EVENTS)) | 
|---|
| 319 | return 0; | 
|---|
| 320 |  | 
|---|
| 321 | return ops->handle_inode_event(inode_mark, mask, inode, dir, name, cookie); | 
|---|
| 322 | } | 
|---|
| 323 |  | 
|---|
| 324 | static int fsnotify_handle_event(struct fsnotify_group *group, __u32 mask, | 
|---|
| 325 | const void *data, int data_type, | 
|---|
| 326 | struct inode *dir, const struct qstr *name, | 
|---|
| 327 | u32 cookie, struct fsnotify_iter_info *iter_info) | 
|---|
| 328 | { | 
|---|
| 329 | struct fsnotify_mark *inode_mark = fsnotify_iter_inode_mark(iter_info); | 
|---|
| 330 | struct fsnotify_mark *parent_mark = fsnotify_iter_parent_mark(iter_info); | 
|---|
| 331 | int ret; | 
|---|
| 332 |  | 
|---|
| 333 | if (WARN_ON_ONCE(fsnotify_iter_sb_mark(iter_info)) || | 
|---|
| 334 | WARN_ON_ONCE(fsnotify_iter_vfsmount_mark(iter_info))) | 
|---|
| 335 | return 0; | 
|---|
| 336 |  | 
|---|
| 337 | /* | 
|---|
| 338 | * For FS_RENAME, 'dir' is old dir and 'data' is new dentry. | 
|---|
| 339 | * The only ->handle_inode_event() backend that supports FS_RENAME is | 
|---|
| 340 | * dnotify, where it means file was renamed within same parent. | 
|---|
| 341 | */ | 
|---|
| 342 | if (mask & FS_RENAME) { | 
|---|
| 343 | struct dentry *moved = fsnotify_data_dentry(data, data_type); | 
|---|
| 344 |  | 
|---|
| 345 | if (dir != moved->d_parent->d_inode) | 
|---|
| 346 | return 0; | 
|---|
| 347 | } | 
|---|
| 348 |  | 
|---|
| 349 | if (parent_mark) { | 
|---|
| 350 | ret = fsnotify_handle_inode_event(group, inode_mark: parent_mark, mask, | 
|---|
| 351 | data, data_type, dir, name, cookie: 0); | 
|---|
| 352 | if (ret) | 
|---|
| 353 | return ret; | 
|---|
| 354 | } | 
|---|
| 355 |  | 
|---|
| 356 | if (!inode_mark) | 
|---|
| 357 | return 0; | 
|---|
| 358 |  | 
|---|
| 359 | /* | 
|---|
| 360 | * Some events can be sent on both parent dir and child marks (e.g. | 
|---|
| 361 | * FS_ATTRIB).  If both parent dir and child are watching, report the | 
|---|
| 362 | * event once to parent dir with name (if interested) and once to child | 
|---|
| 363 | * without name (if interested). | 
|---|
| 364 | * | 
|---|
| 365 | * In any case regardless whether the parent is watching or not, the | 
|---|
| 366 | * child watcher is expecting an event without the FS_EVENT_ON_CHILD | 
|---|
| 367 | * flag. The file name is expected if and only if this is a directory | 
|---|
| 368 | * event. | 
|---|
| 369 | */ | 
|---|
| 370 | mask &= ~FS_EVENT_ON_CHILD; | 
|---|
| 371 | if (!(mask & ALL_FSNOTIFY_DIRENT_EVENTS)) { | 
|---|
| 372 | dir = NULL; | 
|---|
| 373 | name = NULL; | 
|---|
| 374 | } | 
|---|
| 375 |  | 
|---|
| 376 | return fsnotify_handle_inode_event(group, inode_mark, mask, data, data_type, | 
|---|
| 377 | dir, name, cookie); | 
|---|
| 378 | } | 
|---|
| 379 |  | 
|---|
| 380 | static int send_to_group(__u32 mask, const void *data, int data_type, | 
|---|
| 381 | struct inode *dir, const struct qstr *file_name, | 
|---|
| 382 | u32 cookie, struct fsnotify_iter_info *iter_info) | 
|---|
| 383 | { | 
|---|
| 384 | struct fsnotify_group *group = NULL; | 
|---|
| 385 | __u32 test_mask = (mask & ALL_FSNOTIFY_EVENTS); | 
|---|
| 386 | __u32 marks_mask = 0; | 
|---|
| 387 | __u32 marks_ignore_mask = 0; | 
|---|
| 388 | bool is_dir = mask & FS_ISDIR; | 
|---|
| 389 | struct fsnotify_mark *mark; | 
|---|
| 390 | int type; | 
|---|
| 391 |  | 
|---|
| 392 | if (!iter_info->report_mask) | 
|---|
| 393 | return 0; | 
|---|
| 394 |  | 
|---|
| 395 | /* clear ignored on inode modification */ | 
|---|
| 396 | if (mask & FS_MODIFY) { | 
|---|
| 397 | fsnotify_foreach_iter_mark_type(iter_info, mark, type) { | 
|---|
| 398 | if (!(mark->flags & | 
|---|
| 399 | FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY)) | 
|---|
| 400 | mark->ignore_mask = 0; | 
|---|
| 401 | } | 
|---|
| 402 | } | 
|---|
| 403 |  | 
|---|
| 404 | /* Are any of the group marks interested in this event? */ | 
|---|
| 405 | fsnotify_foreach_iter_mark_type(iter_info, mark, type) { | 
|---|
| 406 | group = mark->group; | 
|---|
| 407 | marks_mask |= mark->mask; | 
|---|
| 408 | marks_ignore_mask |= | 
|---|
| 409 | fsnotify_effective_ignore_mask(mark, is_dir, iter_type: type); | 
|---|
| 410 | } | 
|---|
| 411 |  | 
|---|
| 412 | pr_debug( "%s: group=%p mask=%x marks_mask=%x marks_ignore_mask=%x data=%p data_type=%d dir=%p cookie=%d\n", | 
|---|
| 413 | __func__, group, mask, marks_mask, marks_ignore_mask, | 
|---|
| 414 | data, data_type, dir, cookie); | 
|---|
| 415 |  | 
|---|
| 416 | if (!(test_mask & marks_mask & ~marks_ignore_mask)) | 
|---|
| 417 | return 0; | 
|---|
| 418 |  | 
|---|
| 419 | if (group->ops->handle_event) { | 
|---|
| 420 | return group->ops->handle_event(group, mask, data, data_type, dir, | 
|---|
| 421 | file_name, cookie, iter_info); | 
|---|
| 422 | } | 
|---|
| 423 |  | 
|---|
| 424 | return fsnotify_handle_event(group, mask, data, data_type, dir, | 
|---|
| 425 | name: file_name, cookie, iter_info); | 
|---|
| 426 | } | 
|---|
| 427 |  | 
|---|
| 428 | static struct fsnotify_mark *fsnotify_first_mark(struct fsnotify_mark_connector *const *connp) | 
|---|
| 429 | { | 
|---|
| 430 | struct fsnotify_mark_connector *conn; | 
|---|
| 431 | struct hlist_node *node = NULL; | 
|---|
| 432 |  | 
|---|
| 433 | conn = srcu_dereference(*connp, &fsnotify_mark_srcu); | 
|---|
| 434 | if (conn) | 
|---|
| 435 | node = srcu_dereference(conn->list.first, &fsnotify_mark_srcu); | 
|---|
| 436 |  | 
|---|
| 437 | return hlist_entry_safe(node, struct fsnotify_mark, obj_list); | 
|---|
| 438 | } | 
|---|
| 439 |  | 
|---|
| 440 | static struct fsnotify_mark *fsnotify_next_mark(struct fsnotify_mark *mark) | 
|---|
| 441 | { | 
|---|
| 442 | struct hlist_node *node = NULL; | 
|---|
| 443 |  | 
|---|
| 444 | if (mark) | 
|---|
| 445 | node = srcu_dereference(mark->obj_list.next, | 
|---|
| 446 | &fsnotify_mark_srcu); | 
|---|
| 447 |  | 
|---|
| 448 | return hlist_entry_safe(node, struct fsnotify_mark, obj_list); | 
|---|
| 449 | } | 
|---|
| 450 |  | 
|---|
| 451 | /* | 
|---|
| 452 | * iter_info is a multi head priority queue of marks. | 
|---|
| 453 | * Pick a subset of marks from queue heads, all with the same group | 
|---|
| 454 | * and set the report_mask to a subset of the selected marks. | 
|---|
| 455 | * Returns false if there are no more groups to iterate. | 
|---|
| 456 | */ | 
|---|
| 457 | static bool fsnotify_iter_select_report_types( | 
|---|
| 458 | struct fsnotify_iter_info *iter_info) | 
|---|
| 459 | { | 
|---|
| 460 | struct fsnotify_group *max_prio_group = NULL; | 
|---|
| 461 | struct fsnotify_mark *mark; | 
|---|
| 462 | int type; | 
|---|
| 463 |  | 
|---|
| 464 | /* Choose max prio group among groups of all queue heads */ | 
|---|
| 465 | fsnotify_foreach_iter_type(type) { | 
|---|
| 466 | mark = iter_info->marks[type]; | 
|---|
| 467 | if (mark && | 
|---|
| 468 | fsnotify_compare_groups(a: max_prio_group, b: mark->group) > 0) | 
|---|
| 469 | max_prio_group = mark->group; | 
|---|
| 470 | } | 
|---|
| 471 |  | 
|---|
| 472 | if (!max_prio_group) | 
|---|
| 473 | return false; | 
|---|
| 474 |  | 
|---|
| 475 | /* Set the report mask for marks from same group as max prio group */ | 
|---|
| 476 | iter_info->current_group = max_prio_group; | 
|---|
| 477 | iter_info->report_mask = 0; | 
|---|
| 478 | fsnotify_foreach_iter_type(type) { | 
|---|
| 479 | mark = iter_info->marks[type]; | 
|---|
| 480 | if (mark && mark->group == iter_info->current_group) { | 
|---|
| 481 | /* | 
|---|
| 482 | * FSNOTIFY_ITER_TYPE_PARENT indicates that this inode | 
|---|
| 483 | * is watching children and interested in this event, | 
|---|
| 484 | * which is an event possible on child. | 
|---|
| 485 | * But is *this mark* watching children? | 
|---|
| 486 | */ | 
|---|
| 487 | if (type == FSNOTIFY_ITER_TYPE_PARENT && | 
|---|
| 488 | !(mark->mask & FS_EVENT_ON_CHILD) && | 
|---|
| 489 | !(fsnotify_ignore_mask(mark) & FS_EVENT_ON_CHILD)) | 
|---|
| 490 | continue; | 
|---|
| 491 |  | 
|---|
| 492 | fsnotify_iter_set_report_type(iter_info, iter_type: type); | 
|---|
| 493 | } | 
|---|
| 494 | } | 
|---|
| 495 |  | 
|---|
| 496 | return true; | 
|---|
| 497 | } | 
|---|
| 498 |  | 
|---|
| 499 | /* | 
|---|
| 500 | * Pop from iter_info multi head queue, the marks that belong to the group of | 
|---|
| 501 | * current iteration step. | 
|---|
| 502 | */ | 
|---|
| 503 | static void fsnotify_iter_next(struct fsnotify_iter_info *iter_info) | 
|---|
| 504 | { | 
|---|
| 505 | struct fsnotify_mark *mark; | 
|---|
| 506 | int type; | 
|---|
| 507 |  | 
|---|
| 508 | /* | 
|---|
| 509 | * We cannot use fsnotify_foreach_iter_mark_type() here because we | 
|---|
| 510 | * may need to advance a mark of type X that belongs to current_group | 
|---|
| 511 | * but was not selected for reporting. | 
|---|
| 512 | */ | 
|---|
| 513 | fsnotify_foreach_iter_type(type) { | 
|---|
| 514 | mark = iter_info->marks[type]; | 
|---|
| 515 | if (mark && mark->group == iter_info->current_group) | 
|---|
| 516 | iter_info->marks[type] = | 
|---|
| 517 | fsnotify_next_mark(mark: iter_info->marks[type]); | 
|---|
| 518 | } | 
|---|
| 519 | } | 
|---|
| 520 |  | 
|---|
| 521 | /* | 
|---|
| 522 | * fsnotify - This is the main call to fsnotify. | 
|---|
| 523 | * | 
|---|
| 524 | * The VFS calls into hook specific functions in linux/fsnotify.h. | 
|---|
| 525 | * Those functions then in turn call here.  Here will call out to all of the | 
|---|
| 526 | * registered fsnotify_group.  Those groups can then use the notification event | 
|---|
| 527 | * in whatever means they feel necessary. | 
|---|
| 528 | * | 
|---|
| 529 | * @mask:	event type and flags | 
|---|
| 530 | * @data:	object that event happened on | 
|---|
| 531 | * @data_type:	type of object for fanotify_data_XXX() accessors | 
|---|
| 532 | * @dir:	optional directory associated with event - | 
|---|
| 533 | *		if @file_name is not NULL, this is the directory that | 
|---|
| 534 | *		@file_name is relative to | 
|---|
| 535 | * @file_name:	optional file name associated with event | 
|---|
| 536 | * @inode:	optional inode associated with event - | 
|---|
| 537 | *		If @dir and @inode are both non-NULL, event may be | 
|---|
| 538 | *		reported to both. | 
|---|
| 539 | * @cookie:	inotify rename cookie | 
|---|
| 540 | */ | 
|---|
| 541 | int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir, | 
|---|
| 542 | const struct qstr *file_name, struct inode *inode, u32 cookie) | 
|---|
| 543 | { | 
|---|
| 544 | const struct path *path = fsnotify_data_path(data, data_type); | 
|---|
| 545 | struct super_block *sb = fsnotify_data_sb(data, data_type); | 
|---|
| 546 | const struct fsnotify_mnt *mnt_data = fsnotify_data_mnt(data, data_type); | 
|---|
| 547 | struct fsnotify_sb_info *sbinfo = sb ? fsnotify_sb_info(sb) : NULL; | 
|---|
| 548 | struct fsnotify_iter_info iter_info = {}; | 
|---|
| 549 | struct mount *mnt = NULL; | 
|---|
| 550 | struct inode *inode2 = NULL; | 
|---|
| 551 | struct dentry *moved; | 
|---|
| 552 | int inode2_type; | 
|---|
| 553 | int ret = 0; | 
|---|
| 554 | __u32 test_mask, marks_mask = 0; | 
|---|
| 555 |  | 
|---|
| 556 | if (path) | 
|---|
| 557 | mnt = real_mount(mnt: path->mnt); | 
|---|
| 558 |  | 
|---|
| 559 | if (!inode) { | 
|---|
| 560 | /* Dirent event - report on TYPE_INODE to dir */ | 
|---|
| 561 | inode = dir; | 
|---|
| 562 | /* For FS_RENAME, inode is old_dir and inode2 is new_dir */ | 
|---|
| 563 | if (mask & FS_RENAME) { | 
|---|
| 564 | moved = fsnotify_data_dentry(data, data_type); | 
|---|
| 565 | inode2 = moved->d_parent->d_inode; | 
|---|
| 566 | inode2_type = FSNOTIFY_ITER_TYPE_INODE2; | 
|---|
| 567 | } | 
|---|
| 568 | } else if (mask & FS_EVENT_ON_CHILD) { | 
|---|
| 569 | /* | 
|---|
| 570 | * Event on child - report on TYPE_PARENT to dir if it is | 
|---|
| 571 | * watching children and on TYPE_INODE to child. | 
|---|
| 572 | */ | 
|---|
| 573 | inode2 = dir; | 
|---|
| 574 | inode2_type = FSNOTIFY_ITER_TYPE_PARENT; | 
|---|
| 575 | } | 
|---|
| 576 |  | 
|---|
| 577 | /* | 
|---|
| 578 | * Optimization: srcu_read_lock() has a memory barrier which can | 
|---|
| 579 | * be expensive.  It protects walking the *_fsnotify_marks lists. | 
|---|
| 580 | * However, if we do not walk the lists, we do not have to do | 
|---|
| 581 | * SRCU because we have no references to any objects and do not | 
|---|
| 582 | * need SRCU to keep them "alive". | 
|---|
| 583 | */ | 
|---|
| 584 | if ((!sbinfo || !sbinfo->sb_marks) && | 
|---|
| 585 | (!mnt || !mnt->mnt_fsnotify_marks) && | 
|---|
| 586 | (!inode || !inode->i_fsnotify_marks) && | 
|---|
| 587 | (!inode2 || !inode2->i_fsnotify_marks) && | 
|---|
| 588 | (!mnt_data || !mnt_data->ns->n_fsnotify_marks)) | 
|---|
| 589 | return 0; | 
|---|
| 590 |  | 
|---|
| 591 | if (sb) | 
|---|
| 592 | marks_mask |= READ_ONCE(sb->s_fsnotify_mask); | 
|---|
| 593 | if (mnt) | 
|---|
| 594 | marks_mask |= READ_ONCE(mnt->mnt_fsnotify_mask); | 
|---|
| 595 | if (inode) | 
|---|
| 596 | marks_mask |= READ_ONCE(inode->i_fsnotify_mask); | 
|---|
| 597 | if (inode2) | 
|---|
| 598 | marks_mask |= READ_ONCE(inode2->i_fsnotify_mask); | 
|---|
| 599 | if (mnt_data) | 
|---|
| 600 | marks_mask |= READ_ONCE(mnt_data->ns->n_fsnotify_mask); | 
|---|
| 601 |  | 
|---|
| 602 | /* | 
|---|
| 603 | * If this is a modify event we may need to clear some ignore masks. | 
|---|
| 604 | * In that case, the object with ignore masks will have the FS_MODIFY | 
|---|
| 605 | * event in its mask. | 
|---|
| 606 | * Otherwise, return if none of the marks care about this type of event. | 
|---|
| 607 | */ | 
|---|
| 608 | test_mask = (mask & ALL_FSNOTIFY_EVENTS); | 
|---|
| 609 | if (!(test_mask & marks_mask)) | 
|---|
| 610 | return 0; | 
|---|
| 611 |  | 
|---|
| 612 | iter_info.srcu_idx = srcu_read_lock(ssp: &fsnotify_mark_srcu); | 
|---|
| 613 |  | 
|---|
| 614 | if (sbinfo) { | 
|---|
| 615 | iter_info.marks[FSNOTIFY_ITER_TYPE_SB] = | 
|---|
| 616 | fsnotify_first_mark(connp: &sbinfo->sb_marks); | 
|---|
| 617 | } | 
|---|
| 618 | if (mnt) { | 
|---|
| 619 | iter_info.marks[FSNOTIFY_ITER_TYPE_VFSMOUNT] = | 
|---|
| 620 | fsnotify_first_mark(connp: &mnt->mnt_fsnotify_marks); | 
|---|
| 621 | } | 
|---|
| 622 | if (inode) { | 
|---|
| 623 | iter_info.marks[FSNOTIFY_ITER_TYPE_INODE] = | 
|---|
| 624 | fsnotify_first_mark(connp: &inode->i_fsnotify_marks); | 
|---|
| 625 | } | 
|---|
| 626 | if (inode2) { | 
|---|
| 627 | iter_info.marks[inode2_type] = | 
|---|
| 628 | fsnotify_first_mark(connp: &inode2->i_fsnotify_marks); | 
|---|
| 629 | } | 
|---|
| 630 | if (mnt_data) { | 
|---|
| 631 | iter_info.marks[FSNOTIFY_ITER_TYPE_MNTNS] = | 
|---|
| 632 | fsnotify_first_mark(connp: &mnt_data->ns->n_fsnotify_marks); | 
|---|
| 633 | } | 
|---|
| 634 |  | 
|---|
| 635 | /* | 
|---|
| 636 | * We need to merge inode/vfsmount/sb mark lists so that e.g. inode mark | 
|---|
| 637 | * ignore masks are properly reflected for mount/sb mark notifications. | 
|---|
| 638 | * That's why this traversal is so complicated... | 
|---|
| 639 | */ | 
|---|
| 640 | while (fsnotify_iter_select_report_types(iter_info: &iter_info)) { | 
|---|
| 641 | ret = send_to_group(mask, data, data_type, dir, file_name, | 
|---|
| 642 | cookie, iter_info: &iter_info); | 
|---|
| 643 |  | 
|---|
| 644 | if (ret && (mask & ALL_FSNOTIFY_PERM_EVENTS)) | 
|---|
| 645 | goto out; | 
|---|
| 646 |  | 
|---|
| 647 | fsnotify_iter_next(iter_info: &iter_info); | 
|---|
| 648 | } | 
|---|
| 649 | ret = 0; | 
|---|
| 650 | out: | 
|---|
| 651 | srcu_read_unlock(ssp: &fsnotify_mark_srcu, idx: iter_info.srcu_idx); | 
|---|
| 652 |  | 
|---|
| 653 | return ret; | 
|---|
| 654 | } | 
|---|
| 655 | EXPORT_SYMBOL_GPL(fsnotify); | 
|---|
| 656 |  | 
|---|
| 657 | #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS | 
|---|
| 658 | /* | 
|---|
| 659 | * At open time we check fsnotify_sb_has_priority_watchers(), call the open perm | 
|---|
| 660 | * hook and set the FMODE_NONOTIFY_ mode bits accordignly. | 
|---|
| 661 | * Later, fsnotify permission hooks do not check if there are permission event | 
|---|
| 662 | * watches, but that there were permission event watches at open time. | 
|---|
| 663 | */ | 
|---|
| 664 | int fsnotify_open_perm_and_set_mode(struct file *file) | 
|---|
| 665 | { | 
|---|
| 666 | struct dentry *dentry = file->f_path.dentry, *parent; | 
|---|
| 667 | struct super_block *sb = dentry->d_sb; | 
|---|
| 668 | __u32 mnt_mask, p_mask = 0; | 
|---|
| 669 |  | 
|---|
| 670 | /* Is it a file opened by fanotify? */ | 
|---|
| 671 | if (FMODE_FSNOTIFY_NONE(file->f_mode)) | 
|---|
| 672 | return 0; | 
|---|
| 673 |  | 
|---|
| 674 | /* | 
|---|
| 675 | * Permission events is a super set of pre-content events, so if there | 
|---|
| 676 | * are no permission event watchers, there are also no pre-content event | 
|---|
| 677 | * watchers and this is implied from the single FMODE_NONOTIFY_PERM bit. | 
|---|
| 678 | */ | 
|---|
| 679 | if (likely(!fsnotify_sb_has_priority_watchers(sb, | 
|---|
| 680 | FSNOTIFY_PRIO_CONTENT))) { | 
|---|
| 681 | file_set_fsnotify_mode(file, FMODE_NONOTIFY_PERM); | 
|---|
| 682 | return 0; | 
|---|
| 683 | } | 
|---|
| 684 |  | 
|---|
| 685 | /* | 
|---|
| 686 | * OK, there are some permission event watchers. Check if anybody is | 
|---|
| 687 | * watching for permission events on *this* file. | 
|---|
| 688 | */ | 
|---|
| 689 | mnt_mask = READ_ONCE(real_mount(file->f_path.mnt)->mnt_fsnotify_mask); | 
|---|
| 690 | p_mask = fsnotify_object_watched(d_inode(dentry), mnt_mask, | 
|---|
| 691 | ALL_FSNOTIFY_PERM_EVENTS); | 
|---|
| 692 | if (dentry->d_flags & DCACHE_FSNOTIFY_PARENT_WATCHED) { | 
|---|
| 693 | parent = dget_parent(dentry); | 
|---|
| 694 | p_mask |= fsnotify_inode_watches_children(d_inode(parent)); | 
|---|
| 695 | dput(parent); | 
|---|
| 696 | } | 
|---|
| 697 |  | 
|---|
| 698 | /* | 
|---|
| 699 | * Legacy FAN_ACCESS_PERM events have very high performance overhead, | 
|---|
| 700 | * so unlikely to be used in the wild. If they are used there will be | 
|---|
| 701 | * no optimizations at all. | 
|---|
| 702 | */ | 
|---|
| 703 | if (unlikely(p_mask & FS_ACCESS_PERM)) { | 
|---|
| 704 | /* Enable all permission and pre-content events */ | 
|---|
| 705 | file_set_fsnotify_mode(file, 0); | 
|---|
| 706 | goto open_perm; | 
|---|
| 707 | } | 
|---|
| 708 |  | 
|---|
| 709 | /* | 
|---|
| 710 | * Pre-content events are only supported on regular files. | 
|---|
| 711 | * If there are pre-content event watchers and no permission access | 
|---|
| 712 | * watchers, set FMODE_NONOTIFY | FMODE_NONOTIFY_PERM to indicate that. | 
|---|
| 713 | * That is the common case with HSM service. | 
|---|
| 714 | */ | 
|---|
| 715 | if (d_is_reg(dentry) && (p_mask & FSNOTIFY_PRE_CONTENT_EVENTS)) { | 
|---|
| 716 | file_set_fsnotify_mode(file, FMODE_NONOTIFY | | 
|---|
| 717 | FMODE_NONOTIFY_PERM); | 
|---|
| 718 | goto open_perm; | 
|---|
| 719 | } | 
|---|
| 720 |  | 
|---|
| 721 | /* Nobody watching permission and pre-content events on this file */ | 
|---|
| 722 | file_set_fsnotify_mode(file, FMODE_NONOTIFY_PERM); | 
|---|
| 723 |  | 
|---|
| 724 | open_perm: | 
|---|
| 725 | /* | 
|---|
| 726 | * Send open perm events depending on object masks and regardless of | 
|---|
| 727 | * FMODE_NONOTIFY_PERM. | 
|---|
| 728 | */ | 
|---|
| 729 | if (file->f_flags & __FMODE_EXEC && p_mask & FS_OPEN_EXEC_PERM) { | 
|---|
| 730 | int ret = fsnotify_path(&file->f_path, FS_OPEN_EXEC_PERM); | 
|---|
| 731 |  | 
|---|
| 732 | if (ret) | 
|---|
| 733 | return ret; | 
|---|
| 734 | } | 
|---|
| 735 |  | 
|---|
| 736 | if (p_mask & FS_OPEN_PERM) | 
|---|
| 737 | return fsnotify_path(&file->f_path, FS_OPEN_PERM); | 
|---|
| 738 |  | 
|---|
| 739 | return 0; | 
|---|
| 740 | } | 
|---|
| 741 | #endif | 
|---|
| 742 |  | 
|---|
| 743 | void fsnotify_mnt(__u32 mask, struct mnt_namespace *ns, struct vfsmount *mnt) | 
|---|
| 744 | { | 
|---|
| 745 | struct fsnotify_mnt data = { | 
|---|
| 746 | .ns = ns, | 
|---|
| 747 | .mnt_id = real_mount(mnt)->mnt_id_unique, | 
|---|
| 748 | }; | 
|---|
| 749 |  | 
|---|
| 750 | if (WARN_ON_ONCE(!ns)) | 
|---|
| 751 | return; | 
|---|
| 752 |  | 
|---|
| 753 | /* | 
|---|
| 754 | * This is an optimization as well as making sure fsnotify_init() has | 
|---|
| 755 | * been called. | 
|---|
| 756 | */ | 
|---|
| 757 | if (!ns->n_fsnotify_marks) | 
|---|
| 758 | return; | 
|---|
| 759 |  | 
|---|
| 760 | fsnotify(mask, &data, FSNOTIFY_EVENT_MNT, NULL, NULL, NULL, 0); | 
|---|
| 761 | } | 
|---|
| 762 |  | 
|---|
| 763 | static __init int fsnotify_init(void) | 
|---|
| 764 | { | 
|---|
| 765 | int ret; | 
|---|
| 766 |  | 
|---|
| 767 | BUILD_BUG_ON(HWEIGHT32(ALL_FSNOTIFY_BITS) != 26); | 
|---|
| 768 |  | 
|---|
| 769 | ret = init_srcu_struct(ssp: &fsnotify_mark_srcu); | 
|---|
| 770 | if (ret) | 
|---|
| 771 | panic(fmt: "initializing fsnotify_mark_srcu"); | 
|---|
| 772 |  | 
|---|
| 773 | fsnotify_mark_connector_cachep = KMEM_CACHE(fsnotify_mark_connector, | 
|---|
| 774 | SLAB_PANIC); | 
|---|
| 775 |  | 
|---|
| 776 | return 0; | 
|---|
| 777 | } | 
|---|
| 778 | core_initcall(fsnotify_init); | 
|---|
| 779 |  | 
|---|