| 1 | // SPDX-License-Identifier: GPL-2.0-or-later | 
|---|
| 2 | /* Direct I/O support. | 
|---|
| 3 | * | 
|---|
| 4 | * Copyright (C) 2023 Red Hat, Inc. All Rights Reserved. | 
|---|
| 5 | * Written by David Howells (dhowells@redhat.com) | 
|---|
| 6 | */ | 
|---|
| 7 |  | 
|---|
| 8 | #include <linux/export.h> | 
|---|
| 9 | #include <linux/fs.h> | 
|---|
| 10 | #include <linux/mm.h> | 
|---|
| 11 | #include <linux/pagemap.h> | 
|---|
| 12 | #include <linux/slab.h> | 
|---|
| 13 | #include <linux/uio.h> | 
|---|
| 14 | #include <linux/sched/mm.h> | 
|---|
| 15 | #include <linux/task_io_accounting_ops.h> | 
|---|
| 16 | #include <linux/netfs.h> | 
|---|
| 17 | #include "internal.h" | 
|---|
| 18 |  | 
|---|
| 19 | static void netfs_prepare_dio_read_iterator(struct netfs_io_subrequest *subreq) | 
|---|
| 20 | { | 
|---|
| 21 | struct netfs_io_request *rreq = subreq->rreq; | 
|---|
| 22 | size_t rsize; | 
|---|
| 23 |  | 
|---|
| 24 | rsize = umin(subreq->len, rreq->io_streams[0].sreq_max_len); | 
|---|
| 25 | subreq->len = rsize; | 
|---|
| 26 |  | 
|---|
| 27 | if (unlikely(rreq->io_streams[0].sreq_max_segs)) { | 
|---|
| 28 | size_t limit = netfs_limit_iter(iter: &rreq->buffer.iter, start_offset: 0, max_size: rsize, | 
|---|
| 29 | max_segs: rreq->io_streams[0].sreq_max_segs); | 
|---|
| 30 |  | 
|---|
| 31 | if (limit < rsize) { | 
|---|
| 32 | subreq->len = limit; | 
|---|
| 33 | trace_netfs_sreq(sreq: subreq, what: netfs_sreq_trace_limited); | 
|---|
| 34 | } | 
|---|
| 35 | } | 
|---|
| 36 |  | 
|---|
| 37 | trace_netfs_sreq(sreq: subreq, what: netfs_sreq_trace_prepare); | 
|---|
| 38 |  | 
|---|
| 39 | subreq->io_iter	= rreq->buffer.iter; | 
|---|
| 40 | iov_iter_truncate(i: &subreq->io_iter, count: subreq->len); | 
|---|
| 41 | iov_iter_advance(i: &rreq->buffer.iter, bytes: subreq->len); | 
|---|
| 42 | } | 
|---|
| 43 |  | 
|---|
| 44 | /* | 
|---|
| 45 | * Perform a read to a buffer from the server, slicing up the region to be read | 
|---|
| 46 | * according to the network rsize. | 
|---|
| 47 | */ | 
|---|
| 48 | static int netfs_dispatch_unbuffered_reads(struct netfs_io_request *rreq) | 
|---|
| 49 | { | 
|---|
| 50 | struct netfs_io_stream *stream = &rreq->io_streams[0]; | 
|---|
| 51 | unsigned long long start = rreq->start; | 
|---|
| 52 | ssize_t size = rreq->len; | 
|---|
| 53 | int ret = 0; | 
|---|
| 54 |  | 
|---|
| 55 | do { | 
|---|
| 56 | struct netfs_io_subrequest *subreq; | 
|---|
| 57 | ssize_t slice; | 
|---|
| 58 |  | 
|---|
| 59 | subreq = netfs_alloc_subrequest(rreq); | 
|---|
| 60 | if (!subreq) { | 
|---|
| 61 | ret = -ENOMEM; | 
|---|
| 62 | break; | 
|---|
| 63 | } | 
|---|
| 64 |  | 
|---|
| 65 | subreq->source	= NETFS_DOWNLOAD_FROM_SERVER; | 
|---|
| 66 | subreq->start	= start; | 
|---|
| 67 | subreq->len	= size; | 
|---|
| 68 |  | 
|---|
| 69 | __set_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags); | 
|---|
| 70 |  | 
|---|
| 71 | spin_lock(lock: &rreq->lock); | 
|---|
| 72 | list_add_tail(new: &subreq->rreq_link, head: &stream->subrequests); | 
|---|
| 73 | if (list_is_first(list: &subreq->rreq_link, head: &stream->subrequests)) { | 
|---|
| 74 | stream->front = subreq; | 
|---|
| 75 | if (!stream->active) { | 
|---|
| 76 | stream->collected_to = stream->front->start; | 
|---|
| 77 | /* Store list pointers before active flag */ | 
|---|
| 78 | smp_store_release(&stream->active, true); | 
|---|
| 79 | } | 
|---|
| 80 | } | 
|---|
| 81 | trace_netfs_sreq(sreq: subreq, what: netfs_sreq_trace_added); | 
|---|
| 82 | spin_unlock(lock: &rreq->lock); | 
|---|
| 83 |  | 
|---|
| 84 | netfs_stat(&netfs_n_rh_download); | 
|---|
| 85 | if (rreq->netfs_ops->prepare_read) { | 
|---|
| 86 | ret = rreq->netfs_ops->prepare_read(subreq); | 
|---|
| 87 | if (ret < 0) { | 
|---|
| 88 | netfs_put_subrequest(subreq, what: netfs_sreq_trace_put_cancel); | 
|---|
| 89 | break; | 
|---|
| 90 | } | 
|---|
| 91 | } | 
|---|
| 92 |  | 
|---|
| 93 | netfs_prepare_dio_read_iterator(subreq); | 
|---|
| 94 | slice = subreq->len; | 
|---|
| 95 | size -= slice; | 
|---|
| 96 | start += slice; | 
|---|
| 97 | rreq->submitted += slice; | 
|---|
| 98 | if (size <= 0) { | 
|---|
| 99 | smp_wmb(); /* Write lists before ALL_QUEUED. */ | 
|---|
| 100 | set_bit(NETFS_RREQ_ALL_QUEUED, addr: &rreq->flags); | 
|---|
| 101 | } | 
|---|
| 102 |  | 
|---|
| 103 | rreq->netfs_ops->issue_read(subreq); | 
|---|
| 104 |  | 
|---|
| 105 | if (test_bit(NETFS_RREQ_PAUSE, &rreq->flags)) | 
|---|
| 106 | netfs_wait_for_paused_read(rreq); | 
|---|
| 107 | if (test_bit(NETFS_RREQ_FAILED, &rreq->flags)) | 
|---|
| 108 | break; | 
|---|
| 109 | cond_resched(); | 
|---|
| 110 | } while (size > 0); | 
|---|
| 111 |  | 
|---|
| 112 | if (unlikely(size > 0)) { | 
|---|
| 113 | smp_wmb(); /* Write lists before ALL_QUEUED. */ | 
|---|
| 114 | set_bit(NETFS_RREQ_ALL_QUEUED, addr: &rreq->flags); | 
|---|
| 115 | netfs_wake_collector(rreq); | 
|---|
| 116 | } | 
|---|
| 117 |  | 
|---|
| 118 | return ret; | 
|---|
| 119 | } | 
|---|
| 120 |  | 
|---|
| 121 | /* | 
|---|
| 122 | * Perform a read to an application buffer, bypassing the pagecache and the | 
|---|
| 123 | * local disk cache. | 
|---|
| 124 | */ | 
|---|
| 125 | static ssize_t netfs_unbuffered_read(struct netfs_io_request *rreq, bool sync) | 
|---|
| 126 | { | 
|---|
| 127 | ssize_t ret; | 
|---|
| 128 |  | 
|---|
| 129 | _enter( "R=%x %llx-%llx", | 
|---|
| 130 | rreq->debug_id, rreq->start, rreq->start + rreq->len - 1); | 
|---|
| 131 |  | 
|---|
| 132 | if (rreq->len == 0) { | 
|---|
| 133 | pr_err( "Zero-sized read [R=%x]\n", rreq->debug_id); | 
|---|
| 134 | netfs_put_request(rreq, what: netfs_rreq_trace_put_discard); | 
|---|
| 135 | return -EIO; | 
|---|
| 136 | } | 
|---|
| 137 |  | 
|---|
| 138 | // TODO: Use bounce buffer if requested | 
|---|
| 139 |  | 
|---|
| 140 | inode_dio_begin(inode: rreq->inode); | 
|---|
| 141 |  | 
|---|
| 142 | ret = netfs_dispatch_unbuffered_reads(rreq); | 
|---|
| 143 |  | 
|---|
| 144 | if (!rreq->submitted) { | 
|---|
| 145 | netfs_put_request(rreq, what: netfs_rreq_trace_put_no_submit); | 
|---|
| 146 | inode_dio_end(inode: rreq->inode); | 
|---|
| 147 | ret = 0; | 
|---|
| 148 | goto out; | 
|---|
| 149 | } | 
|---|
| 150 |  | 
|---|
| 151 | if (sync) | 
|---|
| 152 | ret = netfs_wait_for_read(rreq); | 
|---|
| 153 | else | 
|---|
| 154 | ret = -EIOCBQUEUED; | 
|---|
| 155 | out: | 
|---|
| 156 | _leave( " = %zd", ret); | 
|---|
| 157 | return ret; | 
|---|
| 158 | } | 
|---|
| 159 |  | 
|---|
| 160 | /** | 
|---|
| 161 | * netfs_unbuffered_read_iter_locked - Perform an unbuffered or direct I/O read | 
|---|
| 162 | * @iocb: The I/O control descriptor describing the read | 
|---|
| 163 | * @iter: The output buffer (also specifies read length) | 
|---|
| 164 | * | 
|---|
| 165 | * Perform an unbuffered I/O or direct I/O from the file in @iocb to the | 
|---|
| 166 | * output buffer.  No use is made of the pagecache. | 
|---|
| 167 | * | 
|---|
| 168 | * The caller must hold any appropriate locks. | 
|---|
| 169 | */ | 
|---|
| 170 | ssize_t netfs_unbuffered_read_iter_locked(struct kiocb *iocb, struct iov_iter *iter) | 
|---|
| 171 | { | 
|---|
| 172 | struct netfs_io_request *rreq; | 
|---|
| 173 | ssize_t ret; | 
|---|
| 174 | size_t orig_count = iov_iter_count(i: iter); | 
|---|
| 175 | bool sync = is_sync_kiocb(kiocb: iocb); | 
|---|
| 176 |  | 
|---|
| 177 | _enter( ""); | 
|---|
| 178 |  | 
|---|
| 179 | if (!orig_count) | 
|---|
| 180 | return 0; /* Don't update atime */ | 
|---|
| 181 |  | 
|---|
| 182 | ret = kiocb_write_and_wait(iocb, count: orig_count); | 
|---|
| 183 | if (ret < 0) | 
|---|
| 184 | return ret; | 
|---|
| 185 | file_accessed(file: iocb->ki_filp); | 
|---|
| 186 |  | 
|---|
| 187 | rreq = netfs_alloc_request(mapping: iocb->ki_filp->f_mapping, file: iocb->ki_filp, | 
|---|
| 188 | start: iocb->ki_pos, len: orig_count, | 
|---|
| 189 | origin: iocb->ki_flags & IOCB_DIRECT ? | 
|---|
| 190 | NETFS_DIO_READ : NETFS_UNBUFFERED_READ); | 
|---|
| 191 | if (IS_ERR(ptr: rreq)) | 
|---|
| 192 | return PTR_ERR(ptr: rreq); | 
|---|
| 193 |  | 
|---|
| 194 | netfs_stat(&netfs_n_rh_dio_read); | 
|---|
| 195 | trace_netfs_read(rreq, start: rreq->start, len: rreq->len, what: netfs_read_trace_dio_read); | 
|---|
| 196 |  | 
|---|
| 197 | /* If this is an async op, we have to keep track of the destination | 
|---|
| 198 | * buffer for ourselves as the caller's iterator will be trashed when | 
|---|
| 199 | * we return. | 
|---|
| 200 | * | 
|---|
| 201 | * In such a case, extract an iterator to represent as much of the the | 
|---|
| 202 | * output buffer as we can manage.  Note that the extraction might not | 
|---|
| 203 | * be able to allocate a sufficiently large bvec array and may shorten | 
|---|
| 204 | * the request. | 
|---|
| 205 | */ | 
|---|
| 206 | if (user_backed_iter(i: iter)) { | 
|---|
| 207 | ret = netfs_extract_user_iter(orig: iter, orig_len: rreq->len, new: &rreq->buffer.iter, extraction_flags: 0); | 
|---|
| 208 | if (ret < 0) | 
|---|
| 209 | goto error_put; | 
|---|
| 210 | rreq->direct_bv = (struct bio_vec *)rreq->buffer.iter.bvec; | 
|---|
| 211 | rreq->direct_bv_count = ret; | 
|---|
| 212 | rreq->direct_bv_unpin = iov_iter_extract_will_pin(iter); | 
|---|
| 213 | rreq->len = iov_iter_count(i: &rreq->buffer.iter); | 
|---|
| 214 | } else { | 
|---|
| 215 | rreq->buffer.iter = *iter; | 
|---|
| 216 | rreq->len = orig_count; | 
|---|
| 217 | rreq->direct_bv_unpin = false; | 
|---|
| 218 | iov_iter_advance(i: iter, bytes: orig_count); | 
|---|
| 219 | } | 
|---|
| 220 |  | 
|---|
| 221 | // TODO: Set up bounce buffer if needed | 
|---|
| 222 |  | 
|---|
| 223 | if (!sync) { | 
|---|
| 224 | rreq->iocb = iocb; | 
|---|
| 225 | __set_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &rreq->flags); | 
|---|
| 226 | } | 
|---|
| 227 |  | 
|---|
| 228 | ret = netfs_unbuffered_read(rreq, sync); | 
|---|
| 229 | if (ret < 0) | 
|---|
| 230 | goto out; /* May be -EIOCBQUEUED */ | 
|---|
| 231 | if (sync) { | 
|---|
| 232 | // TODO: Copy from bounce buffer | 
|---|
| 233 | iocb->ki_pos += rreq->transferred; | 
|---|
| 234 | ret = rreq->transferred; | 
|---|
| 235 | } | 
|---|
| 236 |  | 
|---|
| 237 | out: | 
|---|
| 238 | netfs_put_request(rreq, what: netfs_rreq_trace_put_return); | 
|---|
| 239 | if (ret > 0) | 
|---|
| 240 | orig_count -= ret; | 
|---|
| 241 | return ret; | 
|---|
| 242 |  | 
|---|
| 243 | error_put: | 
|---|
| 244 | netfs_put_failed_request(rreq); | 
|---|
| 245 | return ret; | 
|---|
| 246 | } | 
|---|
| 247 | EXPORT_SYMBOL(netfs_unbuffered_read_iter_locked); | 
|---|
| 248 |  | 
|---|
| 249 | /** | 
|---|
| 250 | * netfs_unbuffered_read_iter - Perform an unbuffered or direct I/O read | 
|---|
| 251 | * @iocb: The I/O control descriptor describing the read | 
|---|
| 252 | * @iter: The output buffer (also specifies read length) | 
|---|
| 253 | * | 
|---|
| 254 | * Perform an unbuffered I/O or direct I/O from the file in @iocb to the | 
|---|
| 255 | * output buffer.  No use is made of the pagecache. | 
|---|
| 256 | */ | 
|---|
| 257 | ssize_t netfs_unbuffered_read_iter(struct kiocb *iocb, struct iov_iter *iter) | 
|---|
| 258 | { | 
|---|
| 259 | struct inode *inode = file_inode(f: iocb->ki_filp); | 
|---|
| 260 | ssize_t ret; | 
|---|
| 261 |  | 
|---|
| 262 | if (!iter->count) | 
|---|
| 263 | return 0; /* Don't update atime */ | 
|---|
| 264 |  | 
|---|
| 265 | ret = netfs_start_io_direct(inode); | 
|---|
| 266 | if (ret == 0) { | 
|---|
| 267 | ret = netfs_unbuffered_read_iter_locked(iocb, iter); | 
|---|
| 268 | netfs_end_io_direct(inode); | 
|---|
| 269 | } | 
|---|
| 270 | return ret; | 
|---|
| 271 | } | 
|---|
| 272 | EXPORT_SYMBOL(netfs_unbuffered_read_iter); | 
|---|
| 273 |  | 
|---|