| 1 | // SPDX-License-Identifier: GPL-2.0-only | 
|---|
| 2 | /* Network filesystem high-level buffered write support. | 
|---|
| 3 | * | 
|---|
| 4 | * Copyright (C) 2023 Red Hat, Inc. All Rights Reserved. | 
|---|
| 5 | * Written by David Howells (dhowells@redhat.com) | 
|---|
| 6 | */ | 
|---|
| 7 |  | 
|---|
| 8 | #include <linux/export.h> | 
|---|
| 9 | #include <linux/fs.h> | 
|---|
| 10 | #include <linux/mm.h> | 
|---|
| 11 | #include <linux/pagemap.h> | 
|---|
| 12 | #include <linux/slab.h> | 
|---|
| 13 | #include <linux/pagevec.h> | 
|---|
| 14 | #include "internal.h" | 
|---|
| 15 |  | 
|---|
| 16 | static void __netfs_set_group(struct folio *folio, struct netfs_group *netfs_group) | 
|---|
| 17 | { | 
|---|
| 18 | if (netfs_group) | 
|---|
| 19 | folio_attach_private(folio, data: netfs_get_group(netfs_group)); | 
|---|
| 20 | } | 
|---|
| 21 |  | 
|---|
| 22 | static void netfs_set_group(struct folio *folio, struct netfs_group *netfs_group) | 
|---|
| 23 | { | 
|---|
| 24 | void *priv = folio_get_private(folio); | 
|---|
| 25 |  | 
|---|
| 26 | if (unlikely(priv != netfs_group)) { | 
|---|
| 27 | if (netfs_group && (!priv || priv == NETFS_FOLIO_COPY_TO_CACHE)) | 
|---|
| 28 | folio_attach_private(folio, data: netfs_get_group(netfs_group)); | 
|---|
| 29 | else if (!netfs_group && priv == NETFS_FOLIO_COPY_TO_CACHE) | 
|---|
| 30 | folio_detach_private(folio); | 
|---|
| 31 | } | 
|---|
| 32 | } | 
|---|
| 33 |  | 
|---|
| 34 | /* | 
|---|
| 35 | * Grab a folio for writing and lock it.  Attempt to allocate as large a folio | 
|---|
| 36 | * as possible to hold as much of the remaining length as possible in one go. | 
|---|
| 37 | */ | 
|---|
| 38 | static struct folio *netfs_grab_folio_for_write(struct address_space *mapping, | 
|---|
| 39 | loff_t pos, size_t part) | 
|---|
| 40 | { | 
|---|
| 41 | pgoff_t index = pos / PAGE_SIZE; | 
|---|
| 42 | fgf_t fgp_flags = FGP_WRITEBEGIN; | 
|---|
| 43 |  | 
|---|
| 44 | if (mapping_large_folio_support(mapping)) | 
|---|
| 45 | fgp_flags |= fgf_set_order(size: pos % PAGE_SIZE + part); | 
|---|
| 46 |  | 
|---|
| 47 | return __filemap_get_folio(mapping, index, fgp_flags, | 
|---|
| 48 | gfp: mapping_gfp_mask(mapping)); | 
|---|
| 49 | } | 
|---|
| 50 |  | 
|---|
| 51 | /* | 
|---|
| 52 | * Update i_size and estimate the update to i_blocks to reflect the additional | 
|---|
| 53 | * data written into the pagecache until we can find out from the server what | 
|---|
| 54 | * the values actually are. | 
|---|
| 55 | */ | 
|---|
| 56 | void netfs_update_i_size(struct netfs_inode *ctx, struct inode *inode, | 
|---|
| 57 | loff_t pos, size_t copied) | 
|---|
| 58 | { | 
|---|
| 59 | loff_t i_size, end = pos + copied; | 
|---|
| 60 | blkcnt_t add; | 
|---|
| 61 | size_t gap; | 
|---|
| 62 |  | 
|---|
| 63 | if (end <= i_size_read(inode)) | 
|---|
| 64 | return; | 
|---|
| 65 |  | 
|---|
| 66 | if (ctx->ops->update_i_size) { | 
|---|
| 67 | ctx->ops->update_i_size(inode, end); | 
|---|
| 68 | return; | 
|---|
| 69 | } | 
|---|
| 70 |  | 
|---|
| 71 | spin_lock(lock: &inode->i_lock); | 
|---|
| 72 |  | 
|---|
| 73 | i_size = i_size_read(inode); | 
|---|
| 74 | if (end > i_size) { | 
|---|
| 75 | i_size_write(inode, i_size: end); | 
|---|
| 76 | #if IS_ENABLED(CONFIG_FSCACHE) | 
|---|
| 77 | fscache_update_cookie(ctx->cache, NULL, &end); | 
|---|
| 78 | #endif | 
|---|
| 79 |  | 
|---|
| 80 | gap = SECTOR_SIZE - (i_size & (SECTOR_SIZE - 1)); | 
|---|
| 81 | if (copied > gap) { | 
|---|
| 82 | add = DIV_ROUND_UP(copied - gap, SECTOR_SIZE); | 
|---|
| 83 |  | 
|---|
| 84 | inode->i_blocks = min_t(blkcnt_t, | 
|---|
| 85 | DIV_ROUND_UP(end, SECTOR_SIZE), | 
|---|
| 86 | inode->i_blocks + add); | 
|---|
| 87 | } | 
|---|
| 88 | } | 
|---|
| 89 | spin_unlock(lock: &inode->i_lock); | 
|---|
| 90 | } | 
|---|
| 91 |  | 
|---|
| 92 | /** | 
|---|
| 93 | * netfs_perform_write - Copy data into the pagecache. | 
|---|
| 94 | * @iocb: The operation parameters | 
|---|
| 95 | * @iter: The source buffer | 
|---|
| 96 | * @netfs_group: Grouping for dirty folios (eg. ceph snaps). | 
|---|
| 97 | * | 
|---|
| 98 | * Copy data into pagecache folios attached to the inode specified by @iocb. | 
|---|
| 99 | * The caller must hold appropriate inode locks. | 
|---|
| 100 | * | 
|---|
| 101 | * Dirty folios are tagged with a netfs_folio struct if they're not up to date | 
|---|
| 102 | * to indicate the range modified.  Dirty folios may also be tagged with a | 
|---|
| 103 | * netfs-specific grouping such that data from an old group gets flushed before | 
|---|
| 104 | * a new one is started. | 
|---|
| 105 | */ | 
|---|
| 106 | ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter, | 
|---|
| 107 | struct netfs_group *netfs_group) | 
|---|
| 108 | { | 
|---|
| 109 | struct file *file = iocb->ki_filp; | 
|---|
| 110 | struct inode *inode = file_inode(f: file); | 
|---|
| 111 | struct address_space *mapping = inode->i_mapping; | 
|---|
| 112 | struct netfs_inode *ctx = netfs_inode(inode); | 
|---|
| 113 | struct writeback_control wbc = { | 
|---|
| 114 | .sync_mode	= WB_SYNC_NONE, | 
|---|
| 115 | .for_sync	= true, | 
|---|
| 116 | .nr_to_write	= LONG_MAX, | 
|---|
| 117 | .range_start	= iocb->ki_pos, | 
|---|
| 118 | .range_end	= iocb->ki_pos + iter->count, | 
|---|
| 119 | }; | 
|---|
| 120 | struct netfs_io_request *wreq = NULL; | 
|---|
| 121 | struct folio *folio = NULL, *writethrough = NULL; | 
|---|
| 122 | unsigned int bdp_flags = (iocb->ki_flags & IOCB_NOWAIT) ? BDP_ASYNC : 0; | 
|---|
| 123 | ssize_t written = 0, ret, ret2; | 
|---|
| 124 | loff_t pos = iocb->ki_pos; | 
|---|
| 125 | size_t max_chunk = mapping_max_folio_size(mapping); | 
|---|
| 126 | bool maybe_trouble = false; | 
|---|
| 127 |  | 
|---|
| 128 | if (unlikely(iocb->ki_flags & (IOCB_DSYNC | IOCB_SYNC)) | 
|---|
| 129 | ) { | 
|---|
| 130 | wbc_attach_fdatawrite_inode(wbc: &wbc, inode: mapping->host); | 
|---|
| 131 |  | 
|---|
| 132 | ret = filemap_write_and_wait_range(mapping, lstart: pos, lend: pos + iter->count); | 
|---|
| 133 | if (ret < 0) { | 
|---|
| 134 | wbc_detach_inode(wbc: &wbc); | 
|---|
| 135 | goto out; | 
|---|
| 136 | } | 
|---|
| 137 |  | 
|---|
| 138 | wreq = netfs_begin_writethrough(iocb, len: iter->count); | 
|---|
| 139 | if (IS_ERR(ptr: wreq)) { | 
|---|
| 140 | wbc_detach_inode(wbc: &wbc); | 
|---|
| 141 | ret = PTR_ERR(ptr: wreq); | 
|---|
| 142 | wreq = NULL; | 
|---|
| 143 | goto out; | 
|---|
| 144 | } | 
|---|
| 145 | if (!is_sync_kiocb(kiocb: iocb)) | 
|---|
| 146 | wreq->iocb = iocb; | 
|---|
| 147 | netfs_stat(&netfs_n_wh_writethrough); | 
|---|
| 148 | } else { | 
|---|
| 149 | netfs_stat(&netfs_n_wh_buffered_write); | 
|---|
| 150 | } | 
|---|
| 151 |  | 
|---|
| 152 | do { | 
|---|
| 153 | struct netfs_folio *finfo; | 
|---|
| 154 | struct netfs_group *group; | 
|---|
| 155 | unsigned long long fpos; | 
|---|
| 156 | size_t flen; | 
|---|
| 157 | size_t offset;	/* Offset into pagecache folio */ | 
|---|
| 158 | size_t part;	/* Bytes to write to folio */ | 
|---|
| 159 | size_t copied;	/* Bytes copied from user */ | 
|---|
| 160 |  | 
|---|
| 161 | offset = pos & (max_chunk - 1); | 
|---|
| 162 | part = min(max_chunk - offset, iov_iter_count(iter)); | 
|---|
| 163 |  | 
|---|
| 164 | /* Bring in the user pages that we will copy from _first_ lest | 
|---|
| 165 | * we hit a nasty deadlock on copying from the same page as | 
|---|
| 166 | * we're writing to, without it being marked uptodate. | 
|---|
| 167 | * | 
|---|
| 168 | * Not only is this an optimisation, but it is also required to | 
|---|
| 169 | * check that the address is actually valid, when atomic | 
|---|
| 170 | * usercopies are used below. | 
|---|
| 171 | * | 
|---|
| 172 | * We rely on the page being held onto long enough by the LRU | 
|---|
| 173 | * that we can grab it below if this causes it to be read. | 
|---|
| 174 | */ | 
|---|
| 175 | ret = -EFAULT; | 
|---|
| 176 | if (unlikely(fault_in_iov_iter_readable(iter, part) == part)) | 
|---|
| 177 | break; | 
|---|
| 178 |  | 
|---|
| 179 | folio = netfs_grab_folio_for_write(mapping, pos, part); | 
|---|
| 180 | if (IS_ERR(ptr: folio)) { | 
|---|
| 181 | ret = PTR_ERR(ptr: folio); | 
|---|
| 182 | break; | 
|---|
| 183 | } | 
|---|
| 184 |  | 
|---|
| 185 | flen = folio_size(folio); | 
|---|
| 186 | fpos = folio_pos(folio); | 
|---|
| 187 | offset = pos - fpos; | 
|---|
| 188 | part = min_t(size_t, flen - offset, part); | 
|---|
| 189 |  | 
|---|
| 190 | /* Wait for writeback to complete.  The writeback engine owns | 
|---|
| 191 | * the info in folio->private and may change it until it | 
|---|
| 192 | * removes the WB mark. | 
|---|
| 193 | */ | 
|---|
| 194 | if (folio_get_private(folio) && | 
|---|
| 195 | folio_wait_writeback_killable(folio)) { | 
|---|
| 196 | ret = written ? -EINTR : -ERESTARTSYS; | 
|---|
| 197 | goto error_folio_unlock; | 
|---|
| 198 | } | 
|---|
| 199 |  | 
|---|
| 200 | if (signal_pending(current)) { | 
|---|
| 201 | ret = written ? -EINTR : -ERESTARTSYS; | 
|---|
| 202 | goto error_folio_unlock; | 
|---|
| 203 | } | 
|---|
| 204 |  | 
|---|
| 205 | /* Decide how we should modify a folio.  We might be attempting | 
|---|
| 206 | * to do write-streaming, in which case we don't want to a | 
|---|
| 207 | * local RMW cycle if we can avoid it.  If we're doing local | 
|---|
| 208 | * caching or content crypto, we award that priority over | 
|---|
| 209 | * avoiding RMW.  If the file is open readably, then we also | 
|---|
| 210 | * assume that we may want to read what we wrote. | 
|---|
| 211 | */ | 
|---|
| 212 | finfo = netfs_folio_info(folio); | 
|---|
| 213 | group = netfs_folio_group(folio); | 
|---|
| 214 |  | 
|---|
| 215 | if (unlikely(group != netfs_group) && | 
|---|
| 216 | group != NETFS_FOLIO_COPY_TO_CACHE) | 
|---|
| 217 | goto flush_content; | 
|---|
| 218 |  | 
|---|
| 219 | if (folio_test_uptodate(folio)) { | 
|---|
| 220 | if (mapping_writably_mapped(mapping)) | 
|---|
| 221 | flush_dcache_folio(folio); | 
|---|
| 222 | copied = copy_folio_from_iter_atomic(folio, offset, bytes: part, i: iter); | 
|---|
| 223 | if (unlikely(copied == 0)) | 
|---|
| 224 | goto copy_failed; | 
|---|
| 225 | netfs_set_group(folio, netfs_group); | 
|---|
| 226 | trace_netfs_folio(folio, why: netfs_folio_is_uptodate); | 
|---|
| 227 | goto copied; | 
|---|
| 228 | } | 
|---|
| 229 |  | 
|---|
| 230 | /* If the page is above the zero-point then we assume that the | 
|---|
| 231 | * server would just return a block of zeros or a short read if | 
|---|
| 232 | * we try to read it. | 
|---|
| 233 | */ | 
|---|
| 234 | if (fpos >= ctx->zero_point) { | 
|---|
| 235 | folio_zero_segment(folio, start: 0, xend: offset); | 
|---|
| 236 | copied = copy_folio_from_iter_atomic(folio, offset, bytes: part, i: iter); | 
|---|
| 237 | if (unlikely(copied == 0)) | 
|---|
| 238 | goto copy_failed; | 
|---|
| 239 | folio_zero_segment(folio, start: offset + copied, xend: flen); | 
|---|
| 240 | __netfs_set_group(folio, netfs_group); | 
|---|
| 241 | folio_mark_uptodate(folio); | 
|---|
| 242 | trace_netfs_folio(folio, why: netfs_modify_and_clear); | 
|---|
| 243 | goto copied; | 
|---|
| 244 | } | 
|---|
| 245 |  | 
|---|
| 246 | /* See if we can write a whole folio in one go. */ | 
|---|
| 247 | if (!maybe_trouble && offset == 0 && part >= flen) { | 
|---|
| 248 | copied = copy_folio_from_iter_atomic(folio, offset, bytes: part, i: iter); | 
|---|
| 249 | if (unlikely(copied == 0)) | 
|---|
| 250 | goto copy_failed; | 
|---|
| 251 | if (unlikely(copied < part)) { | 
|---|
| 252 | maybe_trouble = true; | 
|---|
| 253 | iov_iter_revert(i: iter, bytes: copied); | 
|---|
| 254 | copied = 0; | 
|---|
| 255 | folio_unlock(folio); | 
|---|
| 256 | goto retry; | 
|---|
| 257 | } | 
|---|
| 258 | __netfs_set_group(folio, netfs_group); | 
|---|
| 259 | folio_mark_uptodate(folio); | 
|---|
| 260 | trace_netfs_folio(folio, why: netfs_whole_folio_modify); | 
|---|
| 261 | goto copied; | 
|---|
| 262 | } | 
|---|
| 263 |  | 
|---|
| 264 | /* We don't want to do a streaming write on a file that loses | 
|---|
| 265 | * caching service temporarily because the backing store got | 
|---|
| 266 | * culled and we don't really want to get a streaming write on | 
|---|
| 267 | * a file that's open for reading as ->read_folio() then has to | 
|---|
| 268 | * be able to flush it. | 
|---|
| 269 | */ | 
|---|
| 270 | if ((file->f_mode & FMODE_READ) || | 
|---|
| 271 | netfs_is_cache_enabled(ctx)) { | 
|---|
| 272 | if (finfo) { | 
|---|
| 273 | netfs_stat(&netfs_n_wh_wstream_conflict); | 
|---|
| 274 | goto flush_content; | 
|---|
| 275 | } | 
|---|
| 276 | ret = netfs_prefetch_for_write(file, folio, offset, len: part); | 
|---|
| 277 | if (ret < 0) { | 
|---|
| 278 | _debug( "prefetch = %zd", ret); | 
|---|
| 279 | goto error_folio_unlock; | 
|---|
| 280 | } | 
|---|
| 281 | /* Note that copy-to-cache may have been set. */ | 
|---|
| 282 |  | 
|---|
| 283 | copied = copy_folio_from_iter_atomic(folio, offset, bytes: part, i: iter); | 
|---|
| 284 | if (unlikely(copied == 0)) | 
|---|
| 285 | goto copy_failed; | 
|---|
| 286 | netfs_set_group(folio, netfs_group); | 
|---|
| 287 | trace_netfs_folio(folio, why: netfs_just_prefetch); | 
|---|
| 288 | goto copied; | 
|---|
| 289 | } | 
|---|
| 290 |  | 
|---|
| 291 | if (!finfo) { | 
|---|
| 292 | ret = -EIO; | 
|---|
| 293 | if (WARN_ON(folio_get_private(folio))) | 
|---|
| 294 | goto error_folio_unlock; | 
|---|
| 295 | copied = copy_folio_from_iter_atomic(folio, offset, bytes: part, i: iter); | 
|---|
| 296 | if (unlikely(copied == 0)) | 
|---|
| 297 | goto copy_failed; | 
|---|
| 298 | if (offset == 0 && copied == flen) { | 
|---|
| 299 | __netfs_set_group(folio, netfs_group); | 
|---|
| 300 | folio_mark_uptodate(folio); | 
|---|
| 301 | trace_netfs_folio(folio, why: netfs_streaming_filled_page); | 
|---|
| 302 | goto copied; | 
|---|
| 303 | } | 
|---|
| 304 |  | 
|---|
| 305 | finfo = kzalloc(sizeof(*finfo), GFP_KERNEL); | 
|---|
| 306 | if (!finfo) { | 
|---|
| 307 | iov_iter_revert(i: iter, bytes: copied); | 
|---|
| 308 | ret = -ENOMEM; | 
|---|
| 309 | goto error_folio_unlock; | 
|---|
| 310 | } | 
|---|
| 311 | finfo->netfs_group = netfs_get_group(netfs_group); | 
|---|
| 312 | finfo->dirty_offset = offset; | 
|---|
| 313 | finfo->dirty_len = copied; | 
|---|
| 314 | folio_attach_private(folio, data: (void *)((unsigned long)finfo | | 
|---|
| 315 | NETFS_FOLIO_INFO)); | 
|---|
| 316 | trace_netfs_folio(folio, why: netfs_streaming_write); | 
|---|
| 317 | goto copied; | 
|---|
| 318 | } | 
|---|
| 319 |  | 
|---|
| 320 | /* We can continue a streaming write only if it continues on | 
|---|
| 321 | * from the previous.  If it overlaps, we must flush lest we | 
|---|
| 322 | * suffer a partial copy and disjoint dirty regions. | 
|---|
| 323 | */ | 
|---|
| 324 | if (offset == finfo->dirty_offset + finfo->dirty_len) { | 
|---|
| 325 | copied = copy_folio_from_iter_atomic(folio, offset, bytes: part, i: iter); | 
|---|
| 326 | if (unlikely(copied == 0)) | 
|---|
| 327 | goto copy_failed; | 
|---|
| 328 | finfo->dirty_len += copied; | 
|---|
| 329 | if (finfo->dirty_offset == 0 && finfo->dirty_len == flen) { | 
|---|
| 330 | if (finfo->netfs_group) | 
|---|
| 331 | folio_change_private(folio, data: finfo->netfs_group); | 
|---|
| 332 | else | 
|---|
| 333 | folio_detach_private(folio); | 
|---|
| 334 | folio_mark_uptodate(folio); | 
|---|
| 335 | kfree(objp: finfo); | 
|---|
| 336 | trace_netfs_folio(folio, why: netfs_streaming_cont_filled_page); | 
|---|
| 337 | } else { | 
|---|
| 338 | trace_netfs_folio(folio, why: netfs_streaming_write_cont); | 
|---|
| 339 | } | 
|---|
| 340 | goto copied; | 
|---|
| 341 | } | 
|---|
| 342 |  | 
|---|
| 343 | /* Incompatible write; flush the folio and try again. */ | 
|---|
| 344 | flush_content: | 
|---|
| 345 | trace_netfs_folio(folio, why: netfs_flush_content); | 
|---|
| 346 | folio_unlock(folio); | 
|---|
| 347 | folio_put(folio); | 
|---|
| 348 | ret = filemap_write_and_wait_range(mapping, lstart: fpos, lend: fpos + flen - 1); | 
|---|
| 349 | if (ret < 0) | 
|---|
| 350 | goto out; | 
|---|
| 351 | continue; | 
|---|
| 352 |  | 
|---|
| 353 | copied: | 
|---|
| 354 | flush_dcache_folio(folio); | 
|---|
| 355 |  | 
|---|
| 356 | /* Update the inode size if we moved the EOF marker */ | 
|---|
| 357 | netfs_update_i_size(ctx, inode, pos, copied); | 
|---|
| 358 | pos += copied; | 
|---|
| 359 | written += copied; | 
|---|
| 360 |  | 
|---|
| 361 | if (likely(!wreq)) { | 
|---|
| 362 | folio_mark_dirty(folio); | 
|---|
| 363 | folio_unlock(folio); | 
|---|
| 364 | } else { | 
|---|
| 365 | netfs_advance_writethrough(wreq, wbc: &wbc, folio, copied, | 
|---|
| 366 | to_page_end: offset + copied == flen, | 
|---|
| 367 | writethrough_cache: &writethrough); | 
|---|
| 368 | /* Folio unlocked */ | 
|---|
| 369 | } | 
|---|
| 370 | retry: | 
|---|
| 371 | folio_put(folio); | 
|---|
| 372 | folio = NULL; | 
|---|
| 373 |  | 
|---|
| 374 | ret = balance_dirty_pages_ratelimited_flags(mapping, flags: bdp_flags); | 
|---|
| 375 | if (unlikely(ret < 0)) | 
|---|
| 376 | break; | 
|---|
| 377 |  | 
|---|
| 378 | cond_resched(); | 
|---|
| 379 | } while (iov_iter_count(i: iter)); | 
|---|
| 380 |  | 
|---|
| 381 | out: | 
|---|
| 382 | if (likely(written)) { | 
|---|
| 383 | /* Set indication that ctime and mtime got updated in case | 
|---|
| 384 | * close is deferred. | 
|---|
| 385 | */ | 
|---|
| 386 | set_bit(NETFS_ICTX_MODIFIED_ATTR, addr: &ctx->flags); | 
|---|
| 387 | if (unlikely(ctx->ops->post_modify)) | 
|---|
| 388 | ctx->ops->post_modify(inode); | 
|---|
| 389 | } | 
|---|
| 390 |  | 
|---|
| 391 | if (unlikely(wreq)) { | 
|---|
| 392 | ret2 = netfs_end_writethrough(wreq, wbc: &wbc, writethrough_cache: writethrough); | 
|---|
| 393 | wbc_detach_inode(wbc: &wbc); | 
|---|
| 394 | if (ret2 == -EIOCBQUEUED) | 
|---|
| 395 | return ret2; | 
|---|
| 396 | if (ret == 0 && ret2 < 0) | 
|---|
| 397 | ret = ret2; | 
|---|
| 398 | } | 
|---|
| 399 |  | 
|---|
| 400 | iocb->ki_pos += written; | 
|---|
| 401 | _leave( " = %zd [%zd]", written, ret); | 
|---|
| 402 | return written ? written : ret; | 
|---|
| 403 |  | 
|---|
| 404 | copy_failed: | 
|---|
| 405 | ret = -EFAULT; | 
|---|
| 406 | error_folio_unlock: | 
|---|
| 407 | folio_unlock(folio); | 
|---|
| 408 | folio_put(folio); | 
|---|
| 409 | goto out; | 
|---|
| 410 | } | 
|---|
| 411 | EXPORT_SYMBOL(netfs_perform_write); | 
|---|
| 412 |  | 
|---|
| 413 | /** | 
|---|
| 414 | * netfs_buffered_write_iter_locked - write data to a file | 
|---|
| 415 | * @iocb:	IO state structure (file, offset, etc.) | 
|---|
| 416 | * @from:	iov_iter with data to write | 
|---|
| 417 | * @netfs_group: Grouping for dirty folios (eg. ceph snaps). | 
|---|
| 418 | * | 
|---|
| 419 | * This function does all the work needed for actually writing data to a | 
|---|
| 420 | * file. It does all basic checks, removes SUID from the file, updates | 
|---|
| 421 | * modification times and calls proper subroutines depending on whether we | 
|---|
| 422 | * do direct IO or a standard buffered write. | 
|---|
| 423 | * | 
|---|
| 424 | * The caller must hold appropriate locks around this function and have called | 
|---|
| 425 | * generic_write_checks() already.  The caller is also responsible for doing | 
|---|
| 426 | * any necessary syncing afterwards. | 
|---|
| 427 | * | 
|---|
| 428 | * This function does *not* take care of syncing data in case of O_SYNC write. | 
|---|
| 429 | * A caller has to handle it. This is mainly due to the fact that we want to | 
|---|
| 430 | * avoid syncing under i_rwsem. | 
|---|
| 431 | * | 
|---|
| 432 | * Return: | 
|---|
| 433 | * * number of bytes written, even for truncated writes | 
|---|
| 434 | * * negative error code if no data has been written at all | 
|---|
| 435 | */ | 
|---|
| 436 | ssize_t netfs_buffered_write_iter_locked(struct kiocb *iocb, struct iov_iter *from, | 
|---|
| 437 | struct netfs_group *netfs_group) | 
|---|
| 438 | { | 
|---|
| 439 | struct file *file = iocb->ki_filp; | 
|---|
| 440 | ssize_t ret; | 
|---|
| 441 |  | 
|---|
| 442 | trace_netfs_write_iter(iocb, from); | 
|---|
| 443 |  | 
|---|
| 444 | ret = file_remove_privs(file); | 
|---|
| 445 | if (ret) | 
|---|
| 446 | return ret; | 
|---|
| 447 |  | 
|---|
| 448 | ret = file_update_time(file); | 
|---|
| 449 | if (ret) | 
|---|
| 450 | return ret; | 
|---|
| 451 |  | 
|---|
| 452 | return netfs_perform_write(iocb, from, netfs_group); | 
|---|
| 453 | } | 
|---|
| 454 | EXPORT_SYMBOL(netfs_buffered_write_iter_locked); | 
|---|
| 455 |  | 
|---|
| 456 | /** | 
|---|
| 457 | * netfs_file_write_iter - write data to a file | 
|---|
| 458 | * @iocb: IO state structure | 
|---|
| 459 | * @from: iov_iter with data to write | 
|---|
| 460 | * | 
|---|
| 461 | * Perform a write to a file, writing into the pagecache if possible and doing | 
|---|
| 462 | * an unbuffered write instead if not. | 
|---|
| 463 | * | 
|---|
| 464 | * Return: | 
|---|
| 465 | * * Negative error code if no data has been written at all of | 
|---|
| 466 | *   vfs_fsync_range() failed for a synchronous write | 
|---|
| 467 | * * Number of bytes written, even for truncated writes | 
|---|
| 468 | */ | 
|---|
| 469 | ssize_t netfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) | 
|---|
| 470 | { | 
|---|
| 471 | struct file *file = iocb->ki_filp; | 
|---|
| 472 | struct inode *inode = file->f_mapping->host; | 
|---|
| 473 | struct netfs_inode *ictx = netfs_inode(inode); | 
|---|
| 474 | ssize_t ret; | 
|---|
| 475 |  | 
|---|
| 476 | _enter( "%llx,%zx,%llx", iocb->ki_pos, iov_iter_count(from), i_size_read(inode)); | 
|---|
| 477 |  | 
|---|
| 478 | if (!iov_iter_count(i: from)) | 
|---|
| 479 | return 0; | 
|---|
| 480 |  | 
|---|
| 481 | if ((iocb->ki_flags & IOCB_DIRECT) || | 
|---|
| 482 | test_bit(NETFS_ICTX_UNBUFFERED, &ictx->flags)) | 
|---|
| 483 | return netfs_unbuffered_write_iter(iocb, from); | 
|---|
| 484 |  | 
|---|
| 485 | ret = netfs_start_io_write(inode); | 
|---|
| 486 | if (ret < 0) | 
|---|
| 487 | return ret; | 
|---|
| 488 |  | 
|---|
| 489 | ret = generic_write_checks(iocb, from); | 
|---|
| 490 | if (ret > 0) | 
|---|
| 491 | ret = netfs_buffered_write_iter_locked(iocb, from, NULL); | 
|---|
| 492 | netfs_end_io_write(inode); | 
|---|
| 493 | if (ret > 0) | 
|---|
| 494 | ret = generic_write_sync(iocb, count: ret); | 
|---|
| 495 | return ret; | 
|---|
| 496 | } | 
|---|
| 497 | EXPORT_SYMBOL(netfs_file_write_iter); | 
|---|
| 498 |  | 
|---|
| 499 | /* | 
|---|
| 500 | * Notification that a previously read-only page is about to become writable. | 
|---|
| 501 | * The caller indicates the precise page that needs to be written to, but | 
|---|
| 502 | * we only track group on a per-folio basis, so we block more often than | 
|---|
| 503 | * we might otherwise. | 
|---|
| 504 | */ | 
|---|
| 505 | vm_fault_t netfs_page_mkwrite(struct vm_fault *vmf, struct netfs_group *netfs_group) | 
|---|
| 506 | { | 
|---|
| 507 | struct netfs_group *group; | 
|---|
| 508 | struct folio *folio = page_folio(vmf->page); | 
|---|
| 509 | struct file *file = vmf->vma->vm_file; | 
|---|
| 510 | struct address_space *mapping = file->f_mapping; | 
|---|
| 511 | struct inode *inode = file_inode(f: file); | 
|---|
| 512 | struct netfs_inode *ictx = netfs_inode(inode); | 
|---|
| 513 | vm_fault_t ret = VM_FAULT_NOPAGE; | 
|---|
| 514 | int err; | 
|---|
| 515 |  | 
|---|
| 516 | _enter( "%lx", folio->index); | 
|---|
| 517 |  | 
|---|
| 518 | sb_start_pagefault(sb: inode->i_sb); | 
|---|
| 519 |  | 
|---|
| 520 | if (folio_lock_killable(folio) < 0) | 
|---|
| 521 | goto out; | 
|---|
| 522 | if (folio->mapping != mapping) | 
|---|
| 523 | goto unlock; | 
|---|
| 524 | if (folio_wait_writeback_killable(folio) < 0) | 
|---|
| 525 | goto unlock; | 
|---|
| 526 |  | 
|---|
| 527 | /* Can we see a streaming write here? */ | 
|---|
| 528 | if (WARN_ON(!folio_test_uptodate(folio))) { | 
|---|
| 529 | ret = VM_FAULT_SIGBUS; | 
|---|
| 530 | goto unlock; | 
|---|
| 531 | } | 
|---|
| 532 |  | 
|---|
| 533 | group = netfs_folio_group(folio); | 
|---|
| 534 | if (group != netfs_group && group != NETFS_FOLIO_COPY_TO_CACHE) { | 
|---|
| 535 | folio_unlock(folio); | 
|---|
| 536 | err = filemap_fdatawrite_range(mapping, | 
|---|
| 537 | start: folio_pos(folio), | 
|---|
| 538 | end: folio_pos(folio) + folio_size(folio)); | 
|---|
| 539 | switch (err) { | 
|---|
| 540 | case 0: | 
|---|
| 541 | ret = VM_FAULT_RETRY; | 
|---|
| 542 | goto out; | 
|---|
| 543 | case -ENOMEM: | 
|---|
| 544 | ret = VM_FAULT_OOM; | 
|---|
| 545 | goto out; | 
|---|
| 546 | default: | 
|---|
| 547 | ret = VM_FAULT_SIGBUS; | 
|---|
| 548 | goto out; | 
|---|
| 549 | } | 
|---|
| 550 | } | 
|---|
| 551 |  | 
|---|
| 552 | if (folio_test_dirty(folio)) | 
|---|
| 553 | trace_netfs_folio(folio, why: netfs_folio_trace_mkwrite_plus); | 
|---|
| 554 | else | 
|---|
| 555 | trace_netfs_folio(folio, why: netfs_folio_trace_mkwrite); | 
|---|
| 556 | netfs_set_group(folio, netfs_group); | 
|---|
| 557 | file_update_time(file); | 
|---|
| 558 | set_bit(NETFS_ICTX_MODIFIED_ATTR, addr: &ictx->flags); | 
|---|
| 559 | if (ictx->ops->post_modify) | 
|---|
| 560 | ictx->ops->post_modify(inode); | 
|---|
| 561 | ret = VM_FAULT_LOCKED; | 
|---|
| 562 | out: | 
|---|
| 563 | sb_end_pagefault(sb: inode->i_sb); | 
|---|
| 564 | return ret; | 
|---|
| 565 | unlock: | 
|---|
| 566 | folio_unlock(folio); | 
|---|
| 567 | goto out; | 
|---|
| 568 | } | 
|---|
| 569 | EXPORT_SYMBOL(netfs_page_mkwrite); | 
|---|
| 570 |  | 
|---|