| 1 | // SPDX-License-Identifier: GPL-2.0 | 
|---|
| 2 |  | 
|---|
| 3 | #include <kunit/visibility.h> | 
|---|
| 4 | #include <linux/kernel.h> | 
|---|
| 5 | #include <linux/irqflags.h> | 
|---|
| 6 | #include <linux/string.h> | 
|---|
| 7 | #include <linux/errno.h> | 
|---|
| 8 | #include <linux/bug.h> | 
|---|
| 9 | #include "printk_ringbuffer.h" | 
|---|
| 10 | #include "internal.h" | 
|---|
| 11 |  | 
|---|
| 12 | /** | 
|---|
| 13 | * DOC: printk_ringbuffer overview | 
|---|
| 14 | * | 
|---|
| 15 | * Data Structure | 
|---|
| 16 | * -------------- | 
|---|
| 17 | * The printk_ringbuffer is made up of 3 internal ringbuffers: | 
|---|
| 18 | * | 
|---|
| 19 | *   desc_ring | 
|---|
| 20 | *     A ring of descriptors and their meta data (such as sequence number, | 
|---|
| 21 | *     timestamp, loglevel, etc.) as well as internal state information about | 
|---|
| 22 | *     the record and logical positions specifying where in the other | 
|---|
| 23 | *     ringbuffer the text strings are located. | 
|---|
| 24 | * | 
|---|
| 25 | *   text_data_ring | 
|---|
| 26 | *     A ring of data blocks. A data block consists of an unsigned long | 
|---|
| 27 | *     integer (ID) that maps to a desc_ring index followed by the text | 
|---|
| 28 | *     string of the record. | 
|---|
| 29 | * | 
|---|
| 30 | * The internal state information of a descriptor is the key element to allow | 
|---|
| 31 | * readers and writers to locklessly synchronize access to the data. | 
|---|
| 32 | * | 
|---|
| 33 | * Implementation | 
|---|
| 34 | * -------------- | 
|---|
| 35 | * | 
|---|
| 36 | * Descriptor Ring | 
|---|
| 37 | * ~~~~~~~~~~~~~~~ | 
|---|
| 38 | * The descriptor ring is an array of descriptors. A descriptor contains | 
|---|
| 39 | * essential meta data to track the data of a printk record using | 
|---|
| 40 | * blk_lpos structs pointing to associated text data blocks (see | 
|---|
| 41 | * "Data Rings" below). Each descriptor is assigned an ID that maps | 
|---|
| 42 | * directly to index values of the descriptor array and has a state. The ID | 
|---|
| 43 | * and the state are bitwise combined into a single descriptor field named | 
|---|
| 44 | * @state_var, allowing ID and state to be synchronously and atomically | 
|---|
| 45 | * updated. | 
|---|
| 46 | * | 
|---|
| 47 | * Descriptors have four states: | 
|---|
| 48 | * | 
|---|
| 49 | *   reserved | 
|---|
| 50 | *     A writer is modifying the record. | 
|---|
| 51 | * | 
|---|
| 52 | *   committed | 
|---|
| 53 | *     The record and all its data are written. A writer can reopen the | 
|---|
| 54 | *     descriptor (transitioning it back to reserved), but in the committed | 
|---|
| 55 | *     state the data is consistent. | 
|---|
| 56 | * | 
|---|
| 57 | *   finalized | 
|---|
| 58 | *     The record and all its data are complete and available for reading. A | 
|---|
| 59 | *     writer cannot reopen the descriptor. | 
|---|
| 60 | * | 
|---|
| 61 | *   reusable | 
|---|
| 62 | *     The record exists, but its text and/or meta data may no longer be | 
|---|
| 63 | *     available. | 
|---|
| 64 | * | 
|---|
| 65 | * Querying the @state_var of a record requires providing the ID of the | 
|---|
| 66 | * descriptor to query. This can yield a possible fifth (pseudo) state: | 
|---|
| 67 | * | 
|---|
| 68 | *   miss | 
|---|
| 69 | *     The descriptor being queried has an unexpected ID. | 
|---|
| 70 | * | 
|---|
| 71 | * The descriptor ring has a @tail_id that contains the ID of the oldest | 
|---|
| 72 | * descriptor and @head_id that contains the ID of the newest descriptor. | 
|---|
| 73 | * | 
|---|
| 74 | * When a new descriptor should be created (and the ring is full), the tail | 
|---|
| 75 | * descriptor is invalidated by first transitioning to the reusable state and | 
|---|
| 76 | * then invalidating all tail data blocks up to and including the data blocks | 
|---|
| 77 | * associated with the tail descriptor (for the text ring). Then | 
|---|
| 78 | * @tail_id is advanced, followed by advancing @head_id. And finally the | 
|---|
| 79 | * @state_var of the new descriptor is initialized to the new ID and reserved | 
|---|
| 80 | * state. | 
|---|
| 81 | * | 
|---|
| 82 | * The @tail_id can only be advanced if the new @tail_id would be in the | 
|---|
| 83 | * committed or reusable queried state. This makes it possible that a valid | 
|---|
| 84 | * sequence number of the tail is always available. | 
|---|
| 85 | * | 
|---|
| 86 | * Descriptor Finalization | 
|---|
| 87 | * ~~~~~~~~~~~~~~~~~~~~~~~ | 
|---|
| 88 | * When a writer calls the commit function prb_commit(), record data is | 
|---|
| 89 | * fully stored and is consistent within the ringbuffer. However, a writer can | 
|---|
| 90 | * reopen that record, claiming exclusive access (as with prb_reserve()), and | 
|---|
| 91 | * modify that record. When finished, the writer must again commit the record. | 
|---|
| 92 | * | 
|---|
| 93 | * In order for a record to be made available to readers (and also become | 
|---|
| 94 | * recyclable for writers), it must be finalized. A finalized record cannot be | 
|---|
| 95 | * reopened and can never become "unfinalized". Record finalization can occur | 
|---|
| 96 | * in three different scenarios: | 
|---|
| 97 | * | 
|---|
| 98 | *   1) A writer can simultaneously commit and finalize its record by calling | 
|---|
| 99 | *      prb_final_commit() instead of prb_commit(). | 
|---|
| 100 | * | 
|---|
| 101 | *   2) When a new record is reserved and the previous record has been | 
|---|
| 102 | *      committed via prb_commit(), that previous record is automatically | 
|---|
| 103 | *      finalized. | 
|---|
| 104 | * | 
|---|
| 105 | *   3) When a record is committed via prb_commit() and a newer record | 
|---|
| 106 | *      already exists, the record being committed is automatically finalized. | 
|---|
| 107 | * | 
|---|
| 108 | * Data Ring | 
|---|
| 109 | * ~~~~~~~~~ | 
|---|
| 110 | * The text data ring is a byte array composed of data blocks. Data blocks are | 
|---|
| 111 | * referenced by blk_lpos structs that point to the logical position of the | 
|---|
| 112 | * beginning of a data block and the beginning of the next adjacent data | 
|---|
| 113 | * block. Logical positions are mapped directly to index values of the byte | 
|---|
| 114 | * array ringbuffer. | 
|---|
| 115 | * | 
|---|
| 116 | * Each data block consists of an ID followed by the writer data. The ID is | 
|---|
| 117 | * the identifier of a descriptor that is associated with the data block. A | 
|---|
| 118 | * given data block is considered valid if all of the following conditions | 
|---|
| 119 | * are met: | 
|---|
| 120 | * | 
|---|
| 121 | *   1) The descriptor associated with the data block is in the committed | 
|---|
| 122 | *      or finalized queried state. | 
|---|
| 123 | * | 
|---|
| 124 | *   2) The blk_lpos struct within the descriptor associated with the data | 
|---|
| 125 | *      block references back to the same data block. | 
|---|
| 126 | * | 
|---|
| 127 | *   3) The data block is within the head/tail logical position range. | 
|---|
| 128 | * | 
|---|
| 129 | * If the writer data of a data block would extend beyond the end of the | 
|---|
| 130 | * byte array, only the ID of the data block is stored at the logical | 
|---|
| 131 | * position and the full data block (ID and writer data) is stored at the | 
|---|
| 132 | * beginning of the byte array. The referencing blk_lpos will point to the | 
|---|
| 133 | * ID before the wrap and the next data block will be at the logical | 
|---|
| 134 | * position adjacent the full data block after the wrap. | 
|---|
| 135 | * | 
|---|
| 136 | * Data rings have a @tail_lpos that points to the beginning of the oldest | 
|---|
| 137 | * data block and a @head_lpos that points to the logical position of the | 
|---|
| 138 | * next (not yet existing) data block. | 
|---|
| 139 | * | 
|---|
| 140 | * When a new data block should be created (and the ring is full), tail data | 
|---|
| 141 | * blocks will first be invalidated by putting their associated descriptors | 
|---|
| 142 | * into the reusable state and then pushing the @tail_lpos forward beyond | 
|---|
| 143 | * them. Then the @head_lpos is pushed forward and is associated with a new | 
|---|
| 144 | * descriptor. If a data block is not valid, the @tail_lpos cannot be | 
|---|
| 145 | * advanced beyond it. | 
|---|
| 146 | * | 
|---|
| 147 | * Info Array | 
|---|
| 148 | * ~~~~~~~~~~ | 
|---|
| 149 | * The general meta data of printk records are stored in printk_info structs, | 
|---|
| 150 | * stored in an array with the same number of elements as the descriptor ring. | 
|---|
| 151 | * Each info corresponds to the descriptor of the same index in the | 
|---|
| 152 | * descriptor ring. Info validity is confirmed by evaluating the corresponding | 
|---|
| 153 | * descriptor before and after loading the info. | 
|---|
| 154 | * | 
|---|
| 155 | * Usage | 
|---|
| 156 | * ----- | 
|---|
| 157 | * Here are some simple examples demonstrating writers and readers. For the | 
|---|
| 158 | * examples a global ringbuffer (test_rb) is available (which is not the | 
|---|
| 159 | * actual ringbuffer used by printk):: | 
|---|
| 160 | * | 
|---|
| 161 | *	DEFINE_PRINTKRB(test_rb, 15, 5); | 
|---|
| 162 | * | 
|---|
| 163 | * This ringbuffer allows up to 32768 records (2 ^ 15) and has a size of | 
|---|
| 164 | * 1 MiB (2 ^ (15 + 5)) for text data. | 
|---|
| 165 | * | 
|---|
| 166 | * Sample writer code:: | 
|---|
| 167 | * | 
|---|
| 168 | *	const char *textstr = "message text"; | 
|---|
| 169 | *	struct prb_reserved_entry e; | 
|---|
| 170 | *	struct printk_record r; | 
|---|
| 171 | * | 
|---|
| 172 | *	// specify how much to allocate | 
|---|
| 173 | *	prb_rec_init_wr(&r, strlen(textstr) + 1); | 
|---|
| 174 | * | 
|---|
| 175 | *	if (prb_reserve(&e, &test_rb, &r)) { | 
|---|
| 176 | *		snprintf(r.text_buf, r.text_buf_size, "%s", textstr); | 
|---|
| 177 | * | 
|---|
| 178 | *		r.info->text_len = strlen(textstr); | 
|---|
| 179 | *		r.info->ts_nsec = local_clock(); | 
|---|
| 180 | *		r.info->caller_id = printk_caller_id(); | 
|---|
| 181 | * | 
|---|
| 182 | *		// commit and finalize the record | 
|---|
| 183 | *		prb_final_commit(&e); | 
|---|
| 184 | *	} | 
|---|
| 185 | * | 
|---|
| 186 | * Note that additional writer functions are available to extend a record | 
|---|
| 187 | * after it has been committed but not yet finalized. This can be done as | 
|---|
| 188 | * long as no new records have been reserved and the caller is the same. | 
|---|
| 189 | * | 
|---|
| 190 | * Sample writer code (record extending):: | 
|---|
| 191 | * | 
|---|
| 192 | *		// alternate rest of previous example | 
|---|
| 193 | * | 
|---|
| 194 | *		r.info->text_len = strlen(textstr); | 
|---|
| 195 | *		r.info->ts_nsec = local_clock(); | 
|---|
| 196 | *		r.info->caller_id = printk_caller_id(); | 
|---|
| 197 | * | 
|---|
| 198 | *		// commit the record (but do not finalize yet) | 
|---|
| 199 | *		prb_commit(&e); | 
|---|
| 200 | *	} | 
|---|
| 201 | * | 
|---|
| 202 | *	... | 
|---|
| 203 | * | 
|---|
| 204 | *	// specify additional 5 bytes text space to extend | 
|---|
| 205 | *	prb_rec_init_wr(&r, 5); | 
|---|
| 206 | * | 
|---|
| 207 | *	// try to extend, but only if it does not exceed 32 bytes | 
|---|
| 208 | *	if (prb_reserve_in_last(&e, &test_rb, &r, printk_caller_id(), 32)) { | 
|---|
| 209 | *		snprintf(&r.text_buf[r.info->text_len], | 
|---|
| 210 | *			 r.text_buf_size - r.info->text_len, "hello"); | 
|---|
| 211 | * | 
|---|
| 212 | *		r.info->text_len += 5; | 
|---|
| 213 | * | 
|---|
| 214 | *		// commit and finalize the record | 
|---|
| 215 | *		prb_final_commit(&e); | 
|---|
| 216 | *	} | 
|---|
| 217 | * | 
|---|
| 218 | * Sample reader code:: | 
|---|
| 219 | * | 
|---|
| 220 | *	struct printk_info info; | 
|---|
| 221 | *	struct printk_record r; | 
|---|
| 222 | *	char text_buf[32]; | 
|---|
| 223 | *	u64 seq; | 
|---|
| 224 | * | 
|---|
| 225 | *	prb_rec_init_rd(&r, &info, &text_buf[0], sizeof(text_buf)); | 
|---|
| 226 | * | 
|---|
| 227 | *	prb_for_each_record(0, &test_rb, &seq, &r) { | 
|---|
| 228 | *		if (info.seq != seq) | 
|---|
| 229 | *			pr_warn("lost %llu records\n", info.seq - seq); | 
|---|
| 230 | * | 
|---|
| 231 | *		if (info.text_len > r.text_buf_size) { | 
|---|
| 232 | *			pr_warn("record %llu text truncated\n", info.seq); | 
|---|
| 233 | *			text_buf[r.text_buf_size - 1] = 0; | 
|---|
| 234 | *		} | 
|---|
| 235 | * | 
|---|
| 236 | *		pr_info("%llu: %llu: %s\n", info.seq, info.ts_nsec, | 
|---|
| 237 | *			&text_buf[0]); | 
|---|
| 238 | *	} | 
|---|
| 239 | * | 
|---|
| 240 | * Note that additional less convenient reader functions are available to | 
|---|
| 241 | * allow complex record access. | 
|---|
| 242 | * | 
|---|
| 243 | * ABA Issues | 
|---|
| 244 | * ~~~~~~~~~~ | 
|---|
| 245 | * To help avoid ABA issues, descriptors are referenced by IDs (array index | 
|---|
| 246 | * values combined with tagged bits counting array wraps) and data blocks are | 
|---|
| 247 | * referenced by logical positions (array index values combined with tagged | 
|---|
| 248 | * bits counting array wraps). However, on 32-bit systems the number of | 
|---|
| 249 | * tagged bits is relatively small such that an ABA incident is (at least | 
|---|
| 250 | * theoretically) possible. For example, if 4 million maximally sized (1KiB) | 
|---|
| 251 | * printk messages were to occur in NMI context on a 32-bit system, the | 
|---|
| 252 | * interrupted context would not be able to recognize that the 32-bit integer | 
|---|
| 253 | * completely wrapped and thus represents a different data block than the one | 
|---|
| 254 | * the interrupted context expects. | 
|---|
| 255 | * | 
|---|
| 256 | * To help combat this possibility, additional state checking is performed | 
|---|
| 257 | * (such as using cmpxchg() even though set() would suffice). These extra | 
|---|
| 258 | * checks are commented as such and will hopefully catch any ABA issue that | 
|---|
| 259 | * a 32-bit system might experience. | 
|---|
| 260 | * | 
|---|
| 261 | * Memory Barriers | 
|---|
| 262 | * ~~~~~~~~~~~~~~~ | 
|---|
| 263 | * Multiple memory barriers are used. To simplify proving correctness and | 
|---|
| 264 | * generating litmus tests, lines of code related to memory barriers | 
|---|
| 265 | * (loads, stores, and the associated memory barriers) are labeled:: | 
|---|
| 266 | * | 
|---|
| 267 | *	LMM(function:letter) | 
|---|
| 268 | * | 
|---|
| 269 | * Comments reference the labels using only the "function:letter" part. | 
|---|
| 270 | * | 
|---|
| 271 | * The memory barrier pairs and their ordering are: | 
|---|
| 272 | * | 
|---|
| 273 | *   desc_reserve:D / desc_reserve:B | 
|---|
| 274 | *     push descriptor tail (id), then push descriptor head (id) | 
|---|
| 275 | * | 
|---|
| 276 | *   desc_reserve:D / data_push_tail:B | 
|---|
| 277 | *     push data tail (lpos), then set new descriptor reserved (state) | 
|---|
| 278 | * | 
|---|
| 279 | *   desc_reserve:D / desc_push_tail:C | 
|---|
| 280 | *     push descriptor tail (id), then set new descriptor reserved (state) | 
|---|
| 281 | * | 
|---|
| 282 | *   desc_reserve:D / prb_first_seq:C | 
|---|
| 283 | *     push descriptor tail (id), then set new descriptor reserved (state) | 
|---|
| 284 | * | 
|---|
| 285 | *   desc_reserve:F / desc_read:D | 
|---|
| 286 | *     set new descriptor id and reserved (state), then allow writer changes | 
|---|
| 287 | * | 
|---|
| 288 | *   data_alloc:A (or data_realloc:A) / desc_read:D | 
|---|
| 289 | *     set old descriptor reusable (state), then modify new data block area | 
|---|
| 290 | * | 
|---|
| 291 | *   data_alloc:A (or data_realloc:A) / data_push_tail:B | 
|---|
| 292 | *     push data tail (lpos), then modify new data block area | 
|---|
| 293 | * | 
|---|
| 294 | *   _prb_commit:B / desc_read:B | 
|---|
| 295 | *     store writer changes, then set new descriptor committed (state) | 
|---|
| 296 | * | 
|---|
| 297 | *   desc_reopen_last:A / _prb_commit:B | 
|---|
| 298 | *     set descriptor reserved (state), then read descriptor data | 
|---|
| 299 | * | 
|---|
| 300 | *   _prb_commit:B / desc_reserve:D | 
|---|
| 301 | *     set new descriptor committed (state), then check descriptor head (id) | 
|---|
| 302 | * | 
|---|
| 303 | *   data_push_tail:D / data_push_tail:A | 
|---|
| 304 | *     set descriptor reusable (state), then push data tail (lpos) | 
|---|
| 305 | * | 
|---|
| 306 | *   desc_push_tail:B / desc_reserve:D | 
|---|
| 307 | *     set descriptor reusable (state), then push descriptor tail (id) | 
|---|
| 308 | * | 
|---|
| 309 | *   desc_update_last_finalized:A / desc_last_finalized_seq:A | 
|---|
| 310 | *     store finalized record, then set new highest finalized sequence number | 
|---|
| 311 | */ | 
|---|
| 312 |  | 
|---|
| 313 | #define DATA_SIZE(data_ring)		_DATA_SIZE((data_ring)->size_bits) | 
|---|
| 314 | #define DATA_SIZE_MASK(data_ring)	(DATA_SIZE(data_ring) - 1) | 
|---|
| 315 |  | 
|---|
| 316 | #define DESCS_COUNT(desc_ring)		_DESCS_COUNT((desc_ring)->count_bits) | 
|---|
| 317 | #define DESCS_COUNT_MASK(desc_ring)	(DESCS_COUNT(desc_ring) - 1) | 
|---|
| 318 |  | 
|---|
| 319 | /* Determine the data array index from a logical position. */ | 
|---|
| 320 | #define DATA_INDEX(data_ring, lpos)	((lpos) & DATA_SIZE_MASK(data_ring)) | 
|---|
| 321 |  | 
|---|
| 322 | /* Determine the desc array index from an ID or sequence number. */ | 
|---|
| 323 | #define DESC_INDEX(desc_ring, n)	((n) & DESCS_COUNT_MASK(desc_ring)) | 
|---|
| 324 |  | 
|---|
| 325 | /* Determine how many times the data array has wrapped. */ | 
|---|
| 326 | #define DATA_WRAPS(data_ring, lpos)	((lpos) >> (data_ring)->size_bits) | 
|---|
| 327 |  | 
|---|
| 328 | /* Determine if a logical position refers to a data-less block. */ | 
|---|
| 329 | #define LPOS_DATALESS(lpos)		((lpos) & 1UL) | 
|---|
| 330 | #define BLK_DATALESS(blk)		(LPOS_DATALESS((blk)->begin) && \ | 
|---|
| 331 | LPOS_DATALESS((blk)->next)) | 
|---|
| 332 |  | 
|---|
| 333 | /* Get the logical position at index 0 of the current wrap. */ | 
|---|
| 334 | #define DATA_THIS_WRAP_START_LPOS(data_ring, lpos) \ | 
|---|
| 335 | ((lpos) & ~DATA_SIZE_MASK(data_ring)) | 
|---|
| 336 |  | 
|---|
| 337 | /* Get the ID for the same index of the previous wrap as the given ID. */ | 
|---|
| 338 | #define DESC_ID_PREV_WRAP(desc_ring, id) \ | 
|---|
| 339 | DESC_ID((id) - DESCS_COUNT(desc_ring)) | 
|---|
| 340 |  | 
|---|
| 341 | /* | 
|---|
| 342 | * A data block: mapped directly to the beginning of the data block area | 
|---|
| 343 | * specified as a logical position within the data ring. | 
|---|
| 344 | * | 
|---|
| 345 | * @id:   the ID of the associated descriptor | 
|---|
| 346 | * @data: the writer data | 
|---|
| 347 | * | 
|---|
| 348 | * Note that the size of a data block is only known by its associated | 
|---|
| 349 | * descriptor. | 
|---|
| 350 | */ | 
|---|
| 351 | struct prb_data_block { | 
|---|
| 352 | unsigned long	id; | 
|---|
| 353 | char		data[]; | 
|---|
| 354 | }; | 
|---|
| 355 |  | 
|---|
| 356 | /* | 
|---|
| 357 | * Return the descriptor associated with @n. @n can be either a | 
|---|
| 358 | * descriptor ID or a sequence number. | 
|---|
| 359 | */ | 
|---|
| 360 | static struct prb_desc *to_desc(struct prb_desc_ring *desc_ring, u64 n) | 
|---|
| 361 | { | 
|---|
| 362 | return &desc_ring->descs[DESC_INDEX(desc_ring, n)]; | 
|---|
| 363 | } | 
|---|
| 364 |  | 
|---|
| 365 | /* | 
|---|
| 366 | * Return the printk_info associated with @n. @n can be either a | 
|---|
| 367 | * descriptor ID or a sequence number. | 
|---|
| 368 | */ | 
|---|
| 369 | static struct printk_info *to_info(struct prb_desc_ring *desc_ring, u64 n) | 
|---|
| 370 | { | 
|---|
| 371 | return &desc_ring->infos[DESC_INDEX(desc_ring, n)]; | 
|---|
| 372 | } | 
|---|
| 373 |  | 
|---|
| 374 | static struct prb_data_block *to_block(struct prb_data_ring *data_ring, | 
|---|
| 375 | unsigned long begin_lpos) | 
|---|
| 376 | { | 
|---|
| 377 | return (void *)&data_ring->data[DATA_INDEX(data_ring, begin_lpos)]; | 
|---|
| 378 | } | 
|---|
| 379 |  | 
|---|
| 380 | /* | 
|---|
| 381 | * Increase the data size to account for data block meta data plus any | 
|---|
| 382 | * padding so that the adjacent data block is aligned on the ID size. | 
|---|
| 383 | */ | 
|---|
| 384 | static unsigned int to_blk_size(unsigned int size) | 
|---|
| 385 | { | 
|---|
| 386 | struct prb_data_block *db = NULL; | 
|---|
| 387 |  | 
|---|
| 388 | size += sizeof(*db); | 
|---|
| 389 | size = ALIGN(size, sizeof(db->id)); | 
|---|
| 390 | return size; | 
|---|
| 391 | } | 
|---|
| 392 |  | 
|---|
| 393 | /* | 
|---|
| 394 | * Sanity checker for reserve size. The ringbuffer code assumes that a data | 
|---|
| 395 | * block does not exceed the maximum possible size that could fit within the | 
|---|
| 396 | * ringbuffer. This function provides that basic size check so that the | 
|---|
| 397 | * assumption is safe. In particular, it guarantees that data_push_tail() will | 
|---|
| 398 | * never attempt to push the tail beyond the head. | 
|---|
| 399 | */ | 
|---|
| 400 | static bool data_check_size(struct prb_data_ring *data_ring, unsigned int size) | 
|---|
| 401 | { | 
|---|
| 402 | /* Data-less blocks take no space. */ | 
|---|
| 403 | if (size == 0) | 
|---|
| 404 | return true; | 
|---|
| 405 |  | 
|---|
| 406 | /* | 
|---|
| 407 | * If data blocks were allowed to be larger than half the data ring | 
|---|
| 408 | * size, a wrapping data block could require more space than the full | 
|---|
| 409 | * ringbuffer. | 
|---|
| 410 | */ | 
|---|
| 411 | return to_blk_size(size) <= DATA_SIZE(data_ring) / 2; | 
|---|
| 412 | } | 
|---|
| 413 |  | 
|---|
| 414 | /* Query the state of a descriptor. */ | 
|---|
| 415 | static enum desc_state get_desc_state(unsigned long id, | 
|---|
| 416 | unsigned long state_val) | 
|---|
| 417 | { | 
|---|
| 418 | if (id != DESC_ID(state_val)) | 
|---|
| 419 | return desc_miss; | 
|---|
| 420 |  | 
|---|
| 421 | return DESC_STATE(state_val); | 
|---|
| 422 | } | 
|---|
| 423 |  | 
|---|
| 424 | /* | 
|---|
| 425 | * Get a copy of a specified descriptor and return its queried state. If the | 
|---|
| 426 | * descriptor is in an inconsistent state (miss or reserved), the caller can | 
|---|
| 427 | * only expect the descriptor's @state_var field to be valid. | 
|---|
| 428 | * | 
|---|
| 429 | * The sequence number and caller_id can be optionally retrieved. Like all | 
|---|
| 430 | * non-state_var data, they are only valid if the descriptor is in a | 
|---|
| 431 | * consistent state. | 
|---|
| 432 | */ | 
|---|
| 433 | static enum desc_state desc_read(struct prb_desc_ring *desc_ring, | 
|---|
| 434 | unsigned long id, struct prb_desc *desc_out, | 
|---|
| 435 | u64 *seq_out, u32 *caller_id_out) | 
|---|
| 436 | { | 
|---|
| 437 | struct printk_info *info = to_info(desc_ring, n: id); | 
|---|
| 438 | struct prb_desc *desc = to_desc(desc_ring, n: id); | 
|---|
| 439 | atomic_long_t *state_var = &desc->state_var; | 
|---|
| 440 | enum desc_state d_state; | 
|---|
| 441 | unsigned long state_val; | 
|---|
| 442 |  | 
|---|
| 443 | /* Check the descriptor state. */ | 
|---|
| 444 | state_val = atomic_long_read(v: state_var); /* LMM(desc_read:A) */ | 
|---|
| 445 | d_state = get_desc_state(id, state_val); | 
|---|
| 446 | if (d_state == desc_miss || d_state == desc_reserved) { | 
|---|
| 447 | /* | 
|---|
| 448 | * The descriptor is in an inconsistent state. Set at least | 
|---|
| 449 | * @state_var so that the caller can see the details of | 
|---|
| 450 | * the inconsistent state. | 
|---|
| 451 | */ | 
|---|
| 452 | goto out; | 
|---|
| 453 | } | 
|---|
| 454 |  | 
|---|
| 455 | /* | 
|---|
| 456 | * Guarantee the state is loaded before copying the descriptor | 
|---|
| 457 | * content. This avoids copying obsolete descriptor content that might | 
|---|
| 458 | * not apply to the descriptor state. This pairs with _prb_commit:B. | 
|---|
| 459 | * | 
|---|
| 460 | * Memory barrier involvement: | 
|---|
| 461 | * | 
|---|
| 462 | * If desc_read:A reads from _prb_commit:B, then desc_read:C reads | 
|---|
| 463 | * from _prb_commit:A. | 
|---|
| 464 | * | 
|---|
| 465 | * Relies on: | 
|---|
| 466 | * | 
|---|
| 467 | * WMB from _prb_commit:A to _prb_commit:B | 
|---|
| 468 | *    matching | 
|---|
| 469 | * RMB from desc_read:A to desc_read:C | 
|---|
| 470 | */ | 
|---|
| 471 | smp_rmb(); /* LMM(desc_read:B) */ | 
|---|
| 472 |  | 
|---|
| 473 | /* | 
|---|
| 474 | * Copy the descriptor data. The data is not valid until the | 
|---|
| 475 | * state has been re-checked. A memcpy() for all of @desc | 
|---|
| 476 | * cannot be used because of the atomic_t @state_var field. | 
|---|
| 477 | */ | 
|---|
| 478 | if (desc_out) { | 
|---|
| 479 | memcpy(to: &desc_out->text_blk_lpos, from: &desc->text_blk_lpos, | 
|---|
| 480 | len: sizeof(desc_out->text_blk_lpos)); /* LMM(desc_read:C) */ | 
|---|
| 481 | } | 
|---|
| 482 | if (seq_out) | 
|---|
| 483 | *seq_out = info->seq; /* also part of desc_read:C */ | 
|---|
| 484 | if (caller_id_out) | 
|---|
| 485 | *caller_id_out = info->caller_id; /* also part of desc_read:C */ | 
|---|
| 486 |  | 
|---|
| 487 | /* | 
|---|
| 488 | * 1. Guarantee the descriptor content is loaded before re-checking | 
|---|
| 489 | *    the state. This avoids reading an obsolete descriptor state | 
|---|
| 490 | *    that may not apply to the copied content. This pairs with | 
|---|
| 491 | *    desc_reserve:F. | 
|---|
| 492 | * | 
|---|
| 493 | *    Memory barrier involvement: | 
|---|
| 494 | * | 
|---|
| 495 | *    If desc_read:C reads from desc_reserve:G, then desc_read:E | 
|---|
| 496 | *    reads from desc_reserve:F. | 
|---|
| 497 | * | 
|---|
| 498 | *    Relies on: | 
|---|
| 499 | * | 
|---|
| 500 | *    WMB from desc_reserve:F to desc_reserve:G | 
|---|
| 501 | *       matching | 
|---|
| 502 | *    RMB from desc_read:C to desc_read:E | 
|---|
| 503 | * | 
|---|
| 504 | * 2. Guarantee the record data is loaded before re-checking the | 
|---|
| 505 | *    state. This avoids reading an obsolete descriptor state that may | 
|---|
| 506 | *    not apply to the copied data. This pairs with data_alloc:A and | 
|---|
| 507 | *    data_realloc:A. | 
|---|
| 508 | * | 
|---|
| 509 | *    Memory barrier involvement: | 
|---|
| 510 | * | 
|---|
| 511 | *    If copy_data:A reads from data_alloc:B, then desc_read:E | 
|---|
| 512 | *    reads from desc_make_reusable:A. | 
|---|
| 513 | * | 
|---|
| 514 | *    Relies on: | 
|---|
| 515 | * | 
|---|
| 516 | *    MB from desc_make_reusable:A to data_alloc:B | 
|---|
| 517 | *       matching | 
|---|
| 518 | *    RMB from desc_read:C to desc_read:E | 
|---|
| 519 | * | 
|---|
| 520 | *    Note: desc_make_reusable:A and data_alloc:B can be different | 
|---|
| 521 | *          CPUs. However, the data_alloc:B CPU (which performs the | 
|---|
| 522 | *          full memory barrier) must have previously seen | 
|---|
| 523 | *          desc_make_reusable:A. | 
|---|
| 524 | */ | 
|---|
| 525 | smp_rmb(); /* LMM(desc_read:D) */ | 
|---|
| 526 |  | 
|---|
| 527 | /* | 
|---|
| 528 | * The data has been copied. Return the current descriptor state, | 
|---|
| 529 | * which may have changed since the load above. | 
|---|
| 530 | */ | 
|---|
| 531 | state_val = atomic_long_read(v: state_var); /* LMM(desc_read:E) */ | 
|---|
| 532 | d_state = get_desc_state(id, state_val); | 
|---|
| 533 | out: | 
|---|
| 534 | if (desc_out) | 
|---|
| 535 | atomic_long_set(v: &desc_out->state_var, i: state_val); | 
|---|
| 536 | return d_state; | 
|---|
| 537 | } | 
|---|
| 538 |  | 
|---|
| 539 | /* | 
|---|
| 540 | * Take a specified descriptor out of the finalized state by attempting | 
|---|
| 541 | * the transition from finalized to reusable. Either this context or some | 
|---|
| 542 | * other context will have been successful. | 
|---|
| 543 | */ | 
|---|
| 544 | static void desc_make_reusable(struct prb_desc_ring *desc_ring, | 
|---|
| 545 | unsigned long id) | 
|---|
| 546 | { | 
|---|
| 547 | unsigned long val_finalized = DESC_SV(id, desc_finalized); | 
|---|
| 548 | unsigned long val_reusable = DESC_SV(id, desc_reusable); | 
|---|
| 549 | struct prb_desc *desc = to_desc(desc_ring, n: id); | 
|---|
| 550 | atomic_long_t *state_var = &desc->state_var; | 
|---|
| 551 |  | 
|---|
| 552 | atomic_long_cmpxchg_relaxed(v: state_var, old: val_finalized, | 
|---|
| 553 | new: val_reusable); /* LMM(desc_make_reusable:A) */ | 
|---|
| 554 | } | 
|---|
| 555 |  | 
|---|
| 556 | /* | 
|---|
| 557 | * Given the text data ring, put the associated descriptor of each | 
|---|
| 558 | * data block from @lpos_begin until @lpos_end into the reusable state. | 
|---|
| 559 | * | 
|---|
| 560 | * If there is any problem making the associated descriptor reusable, either | 
|---|
| 561 | * the descriptor has not yet been finalized or another writer context has | 
|---|
| 562 | * already pushed the tail lpos past the problematic data block. Regardless, | 
|---|
| 563 | * on error the caller can re-load the tail lpos to determine the situation. | 
|---|
| 564 | */ | 
|---|
| 565 | static bool data_make_reusable(struct printk_ringbuffer *rb, | 
|---|
| 566 | unsigned long lpos_begin, | 
|---|
| 567 | unsigned long lpos_end, | 
|---|
| 568 | unsigned long *lpos_out) | 
|---|
| 569 | { | 
|---|
| 570 |  | 
|---|
| 571 | struct prb_data_ring *data_ring = &rb->text_data_ring; | 
|---|
| 572 | struct prb_desc_ring *desc_ring = &rb->desc_ring; | 
|---|
| 573 | struct prb_data_block *blk; | 
|---|
| 574 | enum desc_state d_state; | 
|---|
| 575 | struct prb_desc desc; | 
|---|
| 576 | struct prb_data_blk_lpos *blk_lpos = &desc.text_blk_lpos; | 
|---|
| 577 | unsigned long id; | 
|---|
| 578 |  | 
|---|
| 579 | /* Loop until @lpos_begin has advanced to or beyond @lpos_end. */ | 
|---|
| 580 | while ((lpos_end - lpos_begin) - 1 < DATA_SIZE(data_ring)) { | 
|---|
| 581 | blk = to_block(data_ring, begin_lpos: lpos_begin); | 
|---|
| 582 |  | 
|---|
| 583 | /* | 
|---|
| 584 | * Load the block ID from the data block. This is a data race | 
|---|
| 585 | * against a writer that may have newly reserved this data | 
|---|
| 586 | * area. If the loaded value matches a valid descriptor ID, | 
|---|
| 587 | * the blk_lpos of that descriptor will be checked to make | 
|---|
| 588 | * sure it points back to this data block. If the check fails, | 
|---|
| 589 | * the data area has been recycled by another writer. | 
|---|
| 590 | */ | 
|---|
| 591 | id = blk->id; /* LMM(data_make_reusable:A) */ | 
|---|
| 592 |  | 
|---|
| 593 | d_state = desc_read(desc_ring, id, desc_out: &desc, | 
|---|
| 594 | NULL, NULL); /* LMM(data_make_reusable:B) */ | 
|---|
| 595 |  | 
|---|
| 596 | switch (d_state) { | 
|---|
| 597 | case desc_miss: | 
|---|
| 598 | case desc_reserved: | 
|---|
| 599 | case desc_committed: | 
|---|
| 600 | return false; | 
|---|
| 601 | case desc_finalized: | 
|---|
| 602 | /* | 
|---|
| 603 | * This data block is invalid if the descriptor | 
|---|
| 604 | * does not point back to it. | 
|---|
| 605 | */ | 
|---|
| 606 | if (blk_lpos->begin != lpos_begin) | 
|---|
| 607 | return false; | 
|---|
| 608 | desc_make_reusable(desc_ring, id); | 
|---|
| 609 | break; | 
|---|
| 610 | case desc_reusable: | 
|---|
| 611 | /* | 
|---|
| 612 | * This data block is invalid if the descriptor | 
|---|
| 613 | * does not point back to it. | 
|---|
| 614 | */ | 
|---|
| 615 | if (blk_lpos->begin != lpos_begin) | 
|---|
| 616 | return false; | 
|---|
| 617 | break; | 
|---|
| 618 | } | 
|---|
| 619 |  | 
|---|
| 620 | /* Advance @lpos_begin to the next data block. */ | 
|---|
| 621 | lpos_begin = blk_lpos->next; | 
|---|
| 622 | } | 
|---|
| 623 |  | 
|---|
| 624 | *lpos_out = lpos_begin; | 
|---|
| 625 | return true; | 
|---|
| 626 | } | 
|---|
| 627 |  | 
|---|
| 628 | /* | 
|---|
| 629 | * Advance the data ring tail to at least @lpos. This function puts | 
|---|
| 630 | * descriptors into the reusable state if the tail is pushed beyond | 
|---|
| 631 | * their associated data block. | 
|---|
| 632 | */ | 
|---|
| 633 | static bool data_push_tail(struct printk_ringbuffer *rb, unsigned long lpos) | 
|---|
| 634 | { | 
|---|
| 635 | struct prb_data_ring *data_ring = &rb->text_data_ring; | 
|---|
| 636 | unsigned long tail_lpos_new; | 
|---|
| 637 | unsigned long tail_lpos; | 
|---|
| 638 | unsigned long next_lpos; | 
|---|
| 639 |  | 
|---|
| 640 | /* If @lpos is from a data-less block, there is nothing to do. */ | 
|---|
| 641 | if (LPOS_DATALESS(lpos)) | 
|---|
| 642 | return true; | 
|---|
| 643 |  | 
|---|
| 644 | /* | 
|---|
| 645 | * Any descriptor states that have transitioned to reusable due to the | 
|---|
| 646 | * data tail being pushed to this loaded value will be visible to this | 
|---|
| 647 | * CPU. This pairs with data_push_tail:D. | 
|---|
| 648 | * | 
|---|
| 649 | * Memory barrier involvement: | 
|---|
| 650 | * | 
|---|
| 651 | * If data_push_tail:A reads from data_push_tail:D, then this CPU can | 
|---|
| 652 | * see desc_make_reusable:A. | 
|---|
| 653 | * | 
|---|
| 654 | * Relies on: | 
|---|
| 655 | * | 
|---|
| 656 | * MB from desc_make_reusable:A to data_push_tail:D | 
|---|
| 657 | *    matches | 
|---|
| 658 | * READFROM from data_push_tail:D to data_push_tail:A | 
|---|
| 659 | *    thus | 
|---|
| 660 | * READFROM from desc_make_reusable:A to this CPU | 
|---|
| 661 | */ | 
|---|
| 662 | tail_lpos = atomic_long_read(v: &data_ring->tail_lpos); /* LMM(data_push_tail:A) */ | 
|---|
| 663 |  | 
|---|
| 664 | /* | 
|---|
| 665 | * Loop until the tail lpos is at or beyond @lpos. This condition | 
|---|
| 666 | * may already be satisfied, resulting in no full memory barrier | 
|---|
| 667 | * from data_push_tail:D being performed. However, since this CPU | 
|---|
| 668 | * sees the new tail lpos, any descriptor states that transitioned to | 
|---|
| 669 | * the reusable state must already be visible. | 
|---|
| 670 | */ | 
|---|
| 671 | while ((lpos - tail_lpos) - 1 < DATA_SIZE(data_ring)) { | 
|---|
| 672 | /* | 
|---|
| 673 | * Make all descriptors reusable that are associated with | 
|---|
| 674 | * data blocks before @lpos. | 
|---|
| 675 | */ | 
|---|
| 676 | if (!data_make_reusable(rb, lpos_begin: tail_lpos, lpos_end: lpos, lpos_out: &next_lpos)) { | 
|---|
| 677 | /* | 
|---|
| 678 | * 1. Guarantee the block ID loaded in | 
|---|
| 679 | *    data_make_reusable() is performed before | 
|---|
| 680 | *    reloading the tail lpos. The failed | 
|---|
| 681 | *    data_make_reusable() may be due to a newly | 
|---|
| 682 | *    recycled data area causing the tail lpos to | 
|---|
| 683 | *    have been previously pushed. This pairs with | 
|---|
| 684 | *    data_alloc:A and data_realloc:A. | 
|---|
| 685 | * | 
|---|
| 686 | *    Memory barrier involvement: | 
|---|
| 687 | * | 
|---|
| 688 | *    If data_make_reusable:A reads from data_alloc:B, | 
|---|
| 689 | *    then data_push_tail:C reads from | 
|---|
| 690 | *    data_push_tail:D. | 
|---|
| 691 | * | 
|---|
| 692 | *    Relies on: | 
|---|
| 693 | * | 
|---|
| 694 | *    MB from data_push_tail:D to data_alloc:B | 
|---|
| 695 | *       matching | 
|---|
| 696 | *    RMB from data_make_reusable:A to | 
|---|
| 697 | *    data_push_tail:C | 
|---|
| 698 | * | 
|---|
| 699 | *    Note: data_push_tail:D and data_alloc:B can be | 
|---|
| 700 | *          different CPUs. However, the data_alloc:B | 
|---|
| 701 | *          CPU (which performs the full memory | 
|---|
| 702 | *          barrier) must have previously seen | 
|---|
| 703 | *          data_push_tail:D. | 
|---|
| 704 | * | 
|---|
| 705 | * 2. Guarantee the descriptor state loaded in | 
|---|
| 706 | *    data_make_reusable() is performed before | 
|---|
| 707 | *    reloading the tail lpos. The failed | 
|---|
| 708 | *    data_make_reusable() may be due to a newly | 
|---|
| 709 | *    recycled descriptor causing the tail lpos to | 
|---|
| 710 | *    have been previously pushed. This pairs with | 
|---|
| 711 | *    desc_reserve:D. | 
|---|
| 712 | * | 
|---|
| 713 | *    Memory barrier involvement: | 
|---|
| 714 | * | 
|---|
| 715 | *    If data_make_reusable:B reads from | 
|---|
| 716 | *    desc_reserve:F, then data_push_tail:C reads | 
|---|
| 717 | *    from data_push_tail:D. | 
|---|
| 718 | * | 
|---|
| 719 | *    Relies on: | 
|---|
| 720 | * | 
|---|
| 721 | *    MB from data_push_tail:D to desc_reserve:F | 
|---|
| 722 | *       matching | 
|---|
| 723 | *    RMB from data_make_reusable:B to | 
|---|
| 724 | *    data_push_tail:C | 
|---|
| 725 | * | 
|---|
| 726 | *    Note: data_push_tail:D and desc_reserve:F can | 
|---|
| 727 | *          be different CPUs. However, the | 
|---|
| 728 | *          desc_reserve:F CPU (which performs the | 
|---|
| 729 | *          full memory barrier) must have previously | 
|---|
| 730 | *          seen data_push_tail:D. | 
|---|
| 731 | */ | 
|---|
| 732 | smp_rmb(); /* LMM(data_push_tail:B) */ | 
|---|
| 733 |  | 
|---|
| 734 | tail_lpos_new = atomic_long_read(v: &data_ring->tail_lpos | 
|---|
| 735 | ); /* LMM(data_push_tail:C) */ | 
|---|
| 736 | if (tail_lpos_new == tail_lpos) | 
|---|
| 737 | return false; | 
|---|
| 738 |  | 
|---|
| 739 | /* Another CPU pushed the tail. Try again. */ | 
|---|
| 740 | tail_lpos = tail_lpos_new; | 
|---|
| 741 | continue; | 
|---|
| 742 | } | 
|---|
| 743 |  | 
|---|
| 744 | /* | 
|---|
| 745 | * Guarantee any descriptor states that have transitioned to | 
|---|
| 746 | * reusable are stored before pushing the tail lpos. A full | 
|---|
| 747 | * memory barrier is needed since other CPUs may have made | 
|---|
| 748 | * the descriptor states reusable. This pairs with | 
|---|
| 749 | * data_push_tail:A. | 
|---|
| 750 | */ | 
|---|
| 751 | if (atomic_long_try_cmpxchg(v: &data_ring->tail_lpos, old: &tail_lpos, | 
|---|
| 752 | new: next_lpos)) { /* LMM(data_push_tail:D) */ | 
|---|
| 753 | break; | 
|---|
| 754 | } | 
|---|
| 755 | } | 
|---|
| 756 |  | 
|---|
| 757 | return true; | 
|---|
| 758 | } | 
|---|
| 759 |  | 
|---|
| 760 | /* | 
|---|
| 761 | * Advance the desc ring tail. This function advances the tail by one | 
|---|
| 762 | * descriptor, thus invalidating the oldest descriptor. Before advancing | 
|---|
| 763 | * the tail, the tail descriptor is made reusable and all data blocks up to | 
|---|
| 764 | * and including the descriptor's data block are invalidated (i.e. the data | 
|---|
| 765 | * ring tail is pushed past the data block of the descriptor being made | 
|---|
| 766 | * reusable). | 
|---|
| 767 | */ | 
|---|
| 768 | static bool desc_push_tail(struct printk_ringbuffer *rb, | 
|---|
| 769 | unsigned long tail_id) | 
|---|
| 770 | { | 
|---|
| 771 | struct prb_desc_ring *desc_ring = &rb->desc_ring; | 
|---|
| 772 | enum desc_state d_state; | 
|---|
| 773 | struct prb_desc desc; | 
|---|
| 774 |  | 
|---|
| 775 | d_state = desc_read(desc_ring, id: tail_id, desc_out: &desc, NULL, NULL); | 
|---|
| 776 |  | 
|---|
| 777 | switch (d_state) { | 
|---|
| 778 | case desc_miss: | 
|---|
| 779 | /* | 
|---|
| 780 | * If the ID is exactly 1 wrap behind the expected, it is | 
|---|
| 781 | * in the process of being reserved by another writer and | 
|---|
| 782 | * must be considered reserved. | 
|---|
| 783 | */ | 
|---|
| 784 | if (DESC_ID(atomic_long_read(&desc.state_var)) == | 
|---|
| 785 | DESC_ID_PREV_WRAP(desc_ring, tail_id)) { | 
|---|
| 786 | return false; | 
|---|
| 787 | } | 
|---|
| 788 |  | 
|---|
| 789 | /* | 
|---|
| 790 | * The ID has changed. Another writer must have pushed the | 
|---|
| 791 | * tail and recycled the descriptor already. Success is | 
|---|
| 792 | * returned because the caller is only interested in the | 
|---|
| 793 | * specified tail being pushed, which it was. | 
|---|
| 794 | */ | 
|---|
| 795 | return true; | 
|---|
| 796 | case desc_reserved: | 
|---|
| 797 | case desc_committed: | 
|---|
| 798 | return false; | 
|---|
| 799 | case desc_finalized: | 
|---|
| 800 | desc_make_reusable(desc_ring, id: tail_id); | 
|---|
| 801 | break; | 
|---|
| 802 | case desc_reusable: | 
|---|
| 803 | break; | 
|---|
| 804 | } | 
|---|
| 805 |  | 
|---|
| 806 | /* | 
|---|
| 807 | * Data blocks must be invalidated before their associated | 
|---|
| 808 | * descriptor can be made available for recycling. Invalidating | 
|---|
| 809 | * them later is not possible because there is no way to trust | 
|---|
| 810 | * data blocks once their associated descriptor is gone. | 
|---|
| 811 | */ | 
|---|
| 812 |  | 
|---|
| 813 | if (!data_push_tail(rb, lpos: desc.text_blk_lpos.next)) | 
|---|
| 814 | return false; | 
|---|
| 815 |  | 
|---|
| 816 | /* | 
|---|
| 817 | * Check the next descriptor after @tail_id before pushing the tail | 
|---|
| 818 | * to it because the tail must always be in a finalized or reusable | 
|---|
| 819 | * state. The implementation of prb_first_seq() relies on this. | 
|---|
| 820 | * | 
|---|
| 821 | * A successful read implies that the next descriptor is less than or | 
|---|
| 822 | * equal to @head_id so there is no risk of pushing the tail past the | 
|---|
| 823 | * head. | 
|---|
| 824 | */ | 
|---|
| 825 | d_state = desc_read(desc_ring, DESC_ID(tail_id + 1), desc_out: &desc, | 
|---|
| 826 | NULL, NULL); /* LMM(desc_push_tail:A) */ | 
|---|
| 827 |  | 
|---|
| 828 | if (d_state == desc_finalized || d_state == desc_reusable) { | 
|---|
| 829 | /* | 
|---|
| 830 | * Guarantee any descriptor states that have transitioned to | 
|---|
| 831 | * reusable are stored before pushing the tail ID. This allows | 
|---|
| 832 | * verifying the recycled descriptor state. A full memory | 
|---|
| 833 | * barrier is needed since other CPUs may have made the | 
|---|
| 834 | * descriptor states reusable. This pairs with desc_reserve:D. | 
|---|
| 835 | */ | 
|---|
| 836 | atomic_long_cmpxchg(v: &desc_ring->tail_id, old: tail_id, | 
|---|
| 837 | DESC_ID(tail_id + 1)); /* LMM(desc_push_tail:B) */ | 
|---|
| 838 | } else { | 
|---|
| 839 | /* | 
|---|
| 840 | * Guarantee the last state load from desc_read() is before | 
|---|
| 841 | * reloading @tail_id in order to see a new tail ID in the | 
|---|
| 842 | * case that the descriptor has been recycled. This pairs | 
|---|
| 843 | * with desc_reserve:D. | 
|---|
| 844 | * | 
|---|
| 845 | * Memory barrier involvement: | 
|---|
| 846 | * | 
|---|
| 847 | * If desc_push_tail:A reads from desc_reserve:F, then | 
|---|
| 848 | * desc_push_tail:D reads from desc_push_tail:B. | 
|---|
| 849 | * | 
|---|
| 850 | * Relies on: | 
|---|
| 851 | * | 
|---|
| 852 | * MB from desc_push_tail:B to desc_reserve:F | 
|---|
| 853 | *    matching | 
|---|
| 854 | * RMB from desc_push_tail:A to desc_push_tail:D | 
|---|
| 855 | * | 
|---|
| 856 | * Note: desc_push_tail:B and desc_reserve:F can be different | 
|---|
| 857 | *       CPUs. However, the desc_reserve:F CPU (which performs | 
|---|
| 858 | *       the full memory barrier) must have previously seen | 
|---|
| 859 | *       desc_push_tail:B. | 
|---|
| 860 | */ | 
|---|
| 861 | smp_rmb(); /* LMM(desc_push_tail:C) */ | 
|---|
| 862 |  | 
|---|
| 863 | /* | 
|---|
| 864 | * Re-check the tail ID. The descriptor following @tail_id is | 
|---|
| 865 | * not in an allowed tail state. But if the tail has since | 
|---|
| 866 | * been moved by another CPU, then it does not matter. | 
|---|
| 867 | */ | 
|---|
| 868 | if (atomic_long_read(v: &desc_ring->tail_id) == tail_id) /* LMM(desc_push_tail:D) */ | 
|---|
| 869 | return false; | 
|---|
| 870 | } | 
|---|
| 871 |  | 
|---|
| 872 | return true; | 
|---|
| 873 | } | 
|---|
| 874 |  | 
|---|
| 875 | /* Reserve a new descriptor, invalidating the oldest if necessary. */ | 
|---|
| 876 | static bool desc_reserve(struct printk_ringbuffer *rb, unsigned long *id_out) | 
|---|
| 877 | { | 
|---|
| 878 | struct prb_desc_ring *desc_ring = &rb->desc_ring; | 
|---|
| 879 | unsigned long prev_state_val; | 
|---|
| 880 | unsigned long id_prev_wrap; | 
|---|
| 881 | struct prb_desc *desc; | 
|---|
| 882 | unsigned long head_id; | 
|---|
| 883 | unsigned long id; | 
|---|
| 884 |  | 
|---|
| 885 | head_id = atomic_long_read(v: &desc_ring->head_id); /* LMM(desc_reserve:A) */ | 
|---|
| 886 |  | 
|---|
| 887 | do { | 
|---|
| 888 | id = DESC_ID(head_id + 1); | 
|---|
| 889 | id_prev_wrap = DESC_ID_PREV_WRAP(desc_ring, id); | 
|---|
| 890 |  | 
|---|
| 891 | /* | 
|---|
| 892 | * Guarantee the head ID is read before reading the tail ID. | 
|---|
| 893 | * Since the tail ID is updated before the head ID, this | 
|---|
| 894 | * guarantees that @id_prev_wrap is never ahead of the tail | 
|---|
| 895 | * ID. This pairs with desc_reserve:D. | 
|---|
| 896 | * | 
|---|
| 897 | * Memory barrier involvement: | 
|---|
| 898 | * | 
|---|
| 899 | * If desc_reserve:A reads from desc_reserve:D, then | 
|---|
| 900 | * desc_reserve:C reads from desc_push_tail:B. | 
|---|
| 901 | * | 
|---|
| 902 | * Relies on: | 
|---|
| 903 | * | 
|---|
| 904 | * MB from desc_push_tail:B to desc_reserve:D | 
|---|
| 905 | *    matching | 
|---|
| 906 | * RMB from desc_reserve:A to desc_reserve:C | 
|---|
| 907 | * | 
|---|
| 908 | * Note: desc_push_tail:B and desc_reserve:D can be different | 
|---|
| 909 | *       CPUs. However, the desc_reserve:D CPU (which performs | 
|---|
| 910 | *       the full memory barrier) must have previously seen | 
|---|
| 911 | *       desc_push_tail:B. | 
|---|
| 912 | */ | 
|---|
| 913 | smp_rmb(); /* LMM(desc_reserve:B) */ | 
|---|
| 914 |  | 
|---|
| 915 | if (id_prev_wrap == atomic_long_read(v: &desc_ring->tail_id | 
|---|
| 916 | )) { /* LMM(desc_reserve:C) */ | 
|---|
| 917 | /* | 
|---|
| 918 | * Make space for the new descriptor by | 
|---|
| 919 | * advancing the tail. | 
|---|
| 920 | */ | 
|---|
| 921 | if (!desc_push_tail(rb, tail_id: id_prev_wrap)) | 
|---|
| 922 | return false; | 
|---|
| 923 | } | 
|---|
| 924 |  | 
|---|
| 925 | /* | 
|---|
| 926 | * 1. Guarantee the tail ID is read before validating the | 
|---|
| 927 | *    recycled descriptor state. A read memory barrier is | 
|---|
| 928 | *    sufficient for this. This pairs with desc_push_tail:B. | 
|---|
| 929 | * | 
|---|
| 930 | *    Memory barrier involvement: | 
|---|
| 931 | * | 
|---|
| 932 | *    If desc_reserve:C reads from desc_push_tail:B, then | 
|---|
| 933 | *    desc_reserve:E reads from desc_make_reusable:A. | 
|---|
| 934 | * | 
|---|
| 935 | *    Relies on: | 
|---|
| 936 | * | 
|---|
| 937 | *    MB from desc_make_reusable:A to desc_push_tail:B | 
|---|
| 938 | *       matching | 
|---|
| 939 | *    RMB from desc_reserve:C to desc_reserve:E | 
|---|
| 940 | * | 
|---|
| 941 | *    Note: desc_make_reusable:A and desc_push_tail:B can be | 
|---|
| 942 | *          different CPUs. However, the desc_push_tail:B CPU | 
|---|
| 943 | *          (which performs the full memory barrier) must have | 
|---|
| 944 | *          previously seen desc_make_reusable:A. | 
|---|
| 945 | * | 
|---|
| 946 | * 2. Guarantee the tail ID is stored before storing the head | 
|---|
| 947 | *    ID. This pairs with desc_reserve:B. | 
|---|
| 948 | * | 
|---|
| 949 | * 3. Guarantee any data ring tail changes are stored before | 
|---|
| 950 | *    recycling the descriptor. Data ring tail changes can | 
|---|
| 951 | *    happen via desc_push_tail()->data_push_tail(). A full | 
|---|
| 952 | *    memory barrier is needed since another CPU may have | 
|---|
| 953 | *    pushed the data ring tails. This pairs with | 
|---|
| 954 | *    data_push_tail:B. | 
|---|
| 955 | * | 
|---|
| 956 | * 4. Guarantee a new tail ID is stored before recycling the | 
|---|
| 957 | *    descriptor. A full memory barrier is needed since | 
|---|
| 958 | *    another CPU may have pushed the tail ID. This pairs | 
|---|
| 959 | *    with desc_push_tail:C and this also pairs with | 
|---|
| 960 | *    prb_first_seq:C. | 
|---|
| 961 | * | 
|---|
| 962 | * 5. Guarantee the head ID is stored before trying to | 
|---|
| 963 | *    finalize the previous descriptor. This pairs with | 
|---|
| 964 | *    _prb_commit:B. | 
|---|
| 965 | */ | 
|---|
| 966 | } while (!atomic_long_try_cmpxchg(v: &desc_ring->head_id, old: &head_id, | 
|---|
| 967 | new: id)); /* LMM(desc_reserve:D) */ | 
|---|
| 968 |  | 
|---|
| 969 | desc = to_desc(desc_ring, n: id); | 
|---|
| 970 |  | 
|---|
| 971 | /* | 
|---|
| 972 | * If the descriptor has been recycled, verify the old state val. | 
|---|
| 973 | * See "ABA Issues" about why this verification is performed. | 
|---|
| 974 | */ | 
|---|
| 975 | prev_state_val = atomic_long_read(v: &desc->state_var); /* LMM(desc_reserve:E) */ | 
|---|
| 976 | if (prev_state_val && | 
|---|
| 977 | get_desc_state(id: id_prev_wrap, state_val: prev_state_val) != desc_reusable) { | 
|---|
| 978 | WARN_ON_ONCE(1); | 
|---|
| 979 | return false; | 
|---|
| 980 | } | 
|---|
| 981 |  | 
|---|
| 982 | /* | 
|---|
| 983 | * Assign the descriptor a new ID and set its state to reserved. | 
|---|
| 984 | * See "ABA Issues" about why cmpxchg() instead of set() is used. | 
|---|
| 985 | * | 
|---|
| 986 | * Guarantee the new descriptor ID and state is stored before making | 
|---|
| 987 | * any other changes. A write memory barrier is sufficient for this. | 
|---|
| 988 | * This pairs with desc_read:D. | 
|---|
| 989 | */ | 
|---|
| 990 | if (!atomic_long_try_cmpxchg(v: &desc->state_var, old: &prev_state_val, | 
|---|
| 991 | DESC_SV(id, desc_reserved))) { /* LMM(desc_reserve:F) */ | 
|---|
| 992 | WARN_ON_ONCE(1); | 
|---|
| 993 | return false; | 
|---|
| 994 | } | 
|---|
| 995 |  | 
|---|
| 996 | /* Now data in @desc can be modified: LMM(desc_reserve:G) */ | 
|---|
| 997 |  | 
|---|
| 998 | *id_out = id; | 
|---|
| 999 | return true; | 
|---|
| 1000 | } | 
|---|
| 1001 |  | 
|---|
| 1002 | /* Determine the end of a data block. */ | 
|---|
| 1003 | static unsigned long get_next_lpos(struct prb_data_ring *data_ring, | 
|---|
| 1004 | unsigned long lpos, unsigned int size) | 
|---|
| 1005 | { | 
|---|
| 1006 | unsigned long begin_lpos; | 
|---|
| 1007 | unsigned long next_lpos; | 
|---|
| 1008 |  | 
|---|
| 1009 | begin_lpos = lpos; | 
|---|
| 1010 | next_lpos = lpos + size; | 
|---|
| 1011 |  | 
|---|
| 1012 | /* First check if the data block does not wrap. */ | 
|---|
| 1013 | if (DATA_WRAPS(data_ring, begin_lpos) == DATA_WRAPS(data_ring, next_lpos)) | 
|---|
| 1014 | return next_lpos; | 
|---|
| 1015 |  | 
|---|
| 1016 | /* Wrapping data blocks store their data at the beginning. */ | 
|---|
| 1017 | return (DATA_THIS_WRAP_START_LPOS(data_ring, next_lpos) + size); | 
|---|
| 1018 | } | 
|---|
| 1019 |  | 
|---|
| 1020 | /* | 
|---|
| 1021 | * Allocate a new data block, invalidating the oldest data block(s) | 
|---|
| 1022 | * if necessary. This function also associates the data block with | 
|---|
| 1023 | * a specified descriptor. | 
|---|
| 1024 | */ | 
|---|
| 1025 | static char *data_alloc(struct printk_ringbuffer *rb, unsigned int size, | 
|---|
| 1026 | struct prb_data_blk_lpos *blk_lpos, unsigned long id) | 
|---|
| 1027 | { | 
|---|
| 1028 | struct prb_data_ring *data_ring = &rb->text_data_ring; | 
|---|
| 1029 | struct prb_data_block *blk; | 
|---|
| 1030 | unsigned long begin_lpos; | 
|---|
| 1031 | unsigned long next_lpos; | 
|---|
| 1032 |  | 
|---|
| 1033 | if (size == 0) { | 
|---|
| 1034 | /* | 
|---|
| 1035 | * Data blocks are not created for empty lines. Instead, the | 
|---|
| 1036 | * reader will recognize these special lpos values and handle | 
|---|
| 1037 | * it appropriately. | 
|---|
| 1038 | */ | 
|---|
| 1039 | blk_lpos->begin = EMPTY_LINE_LPOS; | 
|---|
| 1040 | blk_lpos->next = EMPTY_LINE_LPOS; | 
|---|
| 1041 | return NULL; | 
|---|
| 1042 | } | 
|---|
| 1043 |  | 
|---|
| 1044 | size = to_blk_size(size); | 
|---|
| 1045 |  | 
|---|
| 1046 | begin_lpos = atomic_long_read(v: &data_ring->head_lpos); | 
|---|
| 1047 |  | 
|---|
| 1048 | do { | 
|---|
| 1049 | next_lpos = get_next_lpos(data_ring, lpos: begin_lpos, size); | 
|---|
| 1050 |  | 
|---|
| 1051 | /* | 
|---|
| 1052 | * data_check_size() prevents data block allocation that could | 
|---|
| 1053 | * cause illegal ringbuffer states. But double check that the | 
|---|
| 1054 | * used space will not be bigger than the ring buffer. Wrapped | 
|---|
| 1055 | * messages need to reserve more space, see get_next_lpos(). | 
|---|
| 1056 | * | 
|---|
| 1057 | * Specify a data-less block when the check or the allocation | 
|---|
| 1058 | * fails. | 
|---|
| 1059 | */ | 
|---|
| 1060 | if (WARN_ON_ONCE(next_lpos - begin_lpos > DATA_SIZE(data_ring)) || | 
|---|
| 1061 | !data_push_tail(rb, lpos: next_lpos - DATA_SIZE(data_ring))) { | 
|---|
| 1062 | blk_lpos->begin = FAILED_LPOS; | 
|---|
| 1063 | blk_lpos->next = FAILED_LPOS; | 
|---|
| 1064 | return NULL; | 
|---|
| 1065 | } | 
|---|
| 1066 |  | 
|---|
| 1067 | /* | 
|---|
| 1068 | * 1. Guarantee any descriptor states that have transitioned | 
|---|
| 1069 | *    to reusable are stored before modifying the newly | 
|---|
| 1070 | *    allocated data area. A full memory barrier is needed | 
|---|
| 1071 | *    since other CPUs may have made the descriptor states | 
|---|
| 1072 | *    reusable. See data_push_tail:A about why the reusable | 
|---|
| 1073 | *    states are visible. This pairs with desc_read:D. | 
|---|
| 1074 | * | 
|---|
| 1075 | * 2. Guarantee any updated tail lpos is stored before | 
|---|
| 1076 | *    modifying the newly allocated data area. Another CPU may | 
|---|
| 1077 | *    be in data_make_reusable() and is reading a block ID | 
|---|
| 1078 | *    from this area. data_make_reusable() can handle reading | 
|---|
| 1079 | *    a garbage block ID value, but then it must be able to | 
|---|
| 1080 | *    load a new tail lpos. A full memory barrier is needed | 
|---|
| 1081 | *    since other CPUs may have updated the tail lpos. This | 
|---|
| 1082 | *    pairs with data_push_tail:B. | 
|---|
| 1083 | */ | 
|---|
| 1084 | } while (!atomic_long_try_cmpxchg(v: &data_ring->head_lpos, old: &begin_lpos, | 
|---|
| 1085 | new: next_lpos)); /* LMM(data_alloc:A) */ | 
|---|
| 1086 |  | 
|---|
| 1087 | blk = to_block(data_ring, begin_lpos); | 
|---|
| 1088 | blk->id = id; /* LMM(data_alloc:B) */ | 
|---|
| 1089 |  | 
|---|
| 1090 | if (DATA_WRAPS(data_ring, begin_lpos) != DATA_WRAPS(data_ring, next_lpos)) { | 
|---|
| 1091 | /* Wrapping data blocks store their data at the beginning. */ | 
|---|
| 1092 | blk = to_block(data_ring, begin_lpos: 0); | 
|---|
| 1093 |  | 
|---|
| 1094 | /* | 
|---|
| 1095 | * Store the ID on the wrapped block for consistency. | 
|---|
| 1096 | * The printk_ringbuffer does not actually use it. | 
|---|
| 1097 | */ | 
|---|
| 1098 | blk->id = id; | 
|---|
| 1099 | } | 
|---|
| 1100 |  | 
|---|
| 1101 | blk_lpos->begin = begin_lpos; | 
|---|
| 1102 | blk_lpos->next = next_lpos; | 
|---|
| 1103 |  | 
|---|
| 1104 | return &blk->data[0]; | 
|---|
| 1105 | } | 
|---|
| 1106 |  | 
|---|
| 1107 | /* | 
|---|
| 1108 | * Try to resize an existing data block associated with the descriptor | 
|---|
| 1109 | * specified by @id. If the resized data block should become wrapped, it | 
|---|
| 1110 | * copies the old data to the new data block. If @size yields a data block | 
|---|
| 1111 | * with the same or less size, the data block is left as is. | 
|---|
| 1112 | * | 
|---|
| 1113 | * Fail if this is not the last allocated data block or if there is not | 
|---|
| 1114 | * enough space or it is not possible make enough space. | 
|---|
| 1115 | * | 
|---|
| 1116 | * Return a pointer to the beginning of the entire data buffer or NULL on | 
|---|
| 1117 | * failure. | 
|---|
| 1118 | */ | 
|---|
| 1119 | static char *data_realloc(struct printk_ringbuffer *rb, unsigned int size, | 
|---|
| 1120 | struct prb_data_blk_lpos *blk_lpos, unsigned long id) | 
|---|
| 1121 | { | 
|---|
| 1122 | struct prb_data_ring *data_ring = &rb->text_data_ring; | 
|---|
| 1123 | struct prb_data_block *blk; | 
|---|
| 1124 | unsigned long head_lpos; | 
|---|
| 1125 | unsigned long next_lpos; | 
|---|
| 1126 | bool wrapped; | 
|---|
| 1127 |  | 
|---|
| 1128 | /* Reallocation only works if @blk_lpos is the newest data block. */ | 
|---|
| 1129 | head_lpos = atomic_long_read(v: &data_ring->head_lpos); | 
|---|
| 1130 | if (head_lpos != blk_lpos->next) | 
|---|
| 1131 | return NULL; | 
|---|
| 1132 |  | 
|---|
| 1133 | /* Keep track if @blk_lpos was a wrapping data block. */ | 
|---|
| 1134 | wrapped = (DATA_WRAPS(data_ring, blk_lpos->begin) != DATA_WRAPS(data_ring, blk_lpos->next)); | 
|---|
| 1135 |  | 
|---|
| 1136 | size = to_blk_size(size); | 
|---|
| 1137 |  | 
|---|
| 1138 | next_lpos = get_next_lpos(data_ring, lpos: blk_lpos->begin, size); | 
|---|
| 1139 |  | 
|---|
| 1140 | /* If the data block does not increase, there is nothing to do. */ | 
|---|
| 1141 | if (head_lpos - next_lpos < DATA_SIZE(data_ring)) { | 
|---|
| 1142 | if (wrapped) | 
|---|
| 1143 | blk = to_block(data_ring, begin_lpos: 0); | 
|---|
| 1144 | else | 
|---|
| 1145 | blk = to_block(data_ring, begin_lpos: blk_lpos->begin); | 
|---|
| 1146 | return &blk->data[0]; | 
|---|
| 1147 | } | 
|---|
| 1148 |  | 
|---|
| 1149 | /* | 
|---|
| 1150 | * data_check_size() prevents data block reallocation that could | 
|---|
| 1151 | * cause illegal ringbuffer states. But double check that the | 
|---|
| 1152 | * new used space will not be bigger than the ring buffer. Wrapped | 
|---|
| 1153 | * messages need to reserve more space, see get_next_lpos(). | 
|---|
| 1154 | * | 
|---|
| 1155 | * Specify failure when the check or the allocation fails. | 
|---|
| 1156 | */ | 
|---|
| 1157 | if (WARN_ON_ONCE(next_lpos - blk_lpos->begin > DATA_SIZE(data_ring)) || | 
|---|
| 1158 | !data_push_tail(rb, lpos: next_lpos - DATA_SIZE(data_ring))) { | 
|---|
| 1159 | return NULL; | 
|---|
| 1160 | } | 
|---|
| 1161 |  | 
|---|
| 1162 | /* The memory barrier involvement is the same as data_alloc:A. */ | 
|---|
| 1163 | if (!atomic_long_try_cmpxchg(v: &data_ring->head_lpos, old: &head_lpos, | 
|---|
| 1164 | new: next_lpos)) { /* LMM(data_realloc:A) */ | 
|---|
| 1165 | return NULL; | 
|---|
| 1166 | } | 
|---|
| 1167 |  | 
|---|
| 1168 | blk = to_block(data_ring, begin_lpos: blk_lpos->begin); | 
|---|
| 1169 |  | 
|---|
| 1170 | if (DATA_WRAPS(data_ring, blk_lpos->begin) != DATA_WRAPS(data_ring, next_lpos)) { | 
|---|
| 1171 | struct prb_data_block *old_blk = blk; | 
|---|
| 1172 |  | 
|---|
| 1173 | /* Wrapping data blocks store their data at the beginning. */ | 
|---|
| 1174 | blk = to_block(data_ring, begin_lpos: 0); | 
|---|
| 1175 |  | 
|---|
| 1176 | /* | 
|---|
| 1177 | * Store the ID on the wrapped block for consistency. | 
|---|
| 1178 | * The printk_ringbuffer does not actually use it. | 
|---|
| 1179 | */ | 
|---|
| 1180 | blk->id = id; | 
|---|
| 1181 |  | 
|---|
| 1182 | if (!wrapped) { | 
|---|
| 1183 | /* | 
|---|
| 1184 | * Since the allocated space is now in the newly | 
|---|
| 1185 | * created wrapping data block, copy the content | 
|---|
| 1186 | * from the old data block. | 
|---|
| 1187 | */ | 
|---|
| 1188 | memcpy(to: &blk->data[0], from: &old_blk->data[0], | 
|---|
| 1189 | len: (blk_lpos->next - blk_lpos->begin) - sizeof(blk->id)); | 
|---|
| 1190 | } | 
|---|
| 1191 | } | 
|---|
| 1192 |  | 
|---|
| 1193 | blk_lpos->next = next_lpos; | 
|---|
| 1194 |  | 
|---|
| 1195 | return &blk->data[0]; | 
|---|
| 1196 | } | 
|---|
| 1197 |  | 
|---|
| 1198 | /* Return the number of bytes used by a data block. */ | 
|---|
| 1199 | static unsigned int space_used(struct prb_data_ring *data_ring, | 
|---|
| 1200 | struct prb_data_blk_lpos *blk_lpos) | 
|---|
| 1201 | { | 
|---|
| 1202 | /* Data-less blocks take no space. */ | 
|---|
| 1203 | if (BLK_DATALESS(blk_lpos)) | 
|---|
| 1204 | return 0; | 
|---|
| 1205 |  | 
|---|
| 1206 | if (DATA_WRAPS(data_ring, blk_lpos->begin) == DATA_WRAPS(data_ring, blk_lpos->next)) { | 
|---|
| 1207 | /* Data block does not wrap. */ | 
|---|
| 1208 | return (DATA_INDEX(data_ring, blk_lpos->next) - | 
|---|
| 1209 | DATA_INDEX(data_ring, blk_lpos->begin)); | 
|---|
| 1210 | } | 
|---|
| 1211 |  | 
|---|
| 1212 | /* | 
|---|
| 1213 | * For wrapping data blocks, the trailing (wasted) space is | 
|---|
| 1214 | * also counted. | 
|---|
| 1215 | */ | 
|---|
| 1216 | return (DATA_INDEX(data_ring, blk_lpos->next) + | 
|---|
| 1217 | DATA_SIZE(data_ring) - DATA_INDEX(data_ring, blk_lpos->begin)); | 
|---|
| 1218 | } | 
|---|
| 1219 |  | 
|---|
| 1220 | /* | 
|---|
| 1221 | * Given @blk_lpos, return a pointer to the writer data from the data block | 
|---|
| 1222 | * and calculate the size of the data part. A NULL pointer is returned if | 
|---|
| 1223 | * @blk_lpos specifies values that could never be legal. | 
|---|
| 1224 | * | 
|---|
| 1225 | * This function (used by readers) performs strict validation on the lpos | 
|---|
| 1226 | * values to possibly detect bugs in the writer code. A WARN_ON_ONCE() is | 
|---|
| 1227 | * triggered if an internal error is detected. | 
|---|
| 1228 | */ | 
|---|
| 1229 | static const char *get_data(struct prb_data_ring *data_ring, | 
|---|
| 1230 | struct prb_data_blk_lpos *blk_lpos, | 
|---|
| 1231 | unsigned int *data_size) | 
|---|
| 1232 | { | 
|---|
| 1233 | struct prb_data_block *db; | 
|---|
| 1234 |  | 
|---|
| 1235 | /* Data-less data block description. */ | 
|---|
| 1236 | if (BLK_DATALESS(blk_lpos)) { | 
|---|
| 1237 | /* | 
|---|
| 1238 | * Records that are just empty lines are also valid, even | 
|---|
| 1239 | * though they do not have a data block. For such records | 
|---|
| 1240 | * explicitly return empty string data to signify success. | 
|---|
| 1241 | */ | 
|---|
| 1242 | if (blk_lpos->begin == EMPTY_LINE_LPOS && | 
|---|
| 1243 | blk_lpos->next == EMPTY_LINE_LPOS) { | 
|---|
| 1244 | *data_size = 0; | 
|---|
| 1245 | return ""; | 
|---|
| 1246 | } | 
|---|
| 1247 |  | 
|---|
| 1248 | /* Data lost, invalid, or otherwise unavailable. */ | 
|---|
| 1249 | return NULL; | 
|---|
| 1250 | } | 
|---|
| 1251 |  | 
|---|
| 1252 | /* Regular data block: @begin less than @next and in same wrap. */ | 
|---|
| 1253 | if (DATA_WRAPS(data_ring, blk_lpos->begin) == DATA_WRAPS(data_ring, blk_lpos->next) && | 
|---|
| 1254 | blk_lpos->begin < blk_lpos->next) { | 
|---|
| 1255 | db = to_block(data_ring, begin_lpos: blk_lpos->begin); | 
|---|
| 1256 | *data_size = blk_lpos->next - blk_lpos->begin; | 
|---|
| 1257 |  | 
|---|
| 1258 | /* Wrapping data block: @begin is one wrap behind @next. */ | 
|---|
| 1259 | } else if (DATA_WRAPS(data_ring, blk_lpos->begin + DATA_SIZE(data_ring)) == | 
|---|
| 1260 | DATA_WRAPS(data_ring, blk_lpos->next)) { | 
|---|
| 1261 | db = to_block(data_ring, begin_lpos: 0); | 
|---|
| 1262 | *data_size = DATA_INDEX(data_ring, blk_lpos->next); | 
|---|
| 1263 |  | 
|---|
| 1264 | /* Illegal block description. */ | 
|---|
| 1265 | } else { | 
|---|
| 1266 | WARN_ON_ONCE(1); | 
|---|
| 1267 | return NULL; | 
|---|
| 1268 | } | 
|---|
| 1269 |  | 
|---|
| 1270 | /* A valid data block will always be aligned to the ID size. */ | 
|---|
| 1271 | if (WARN_ON_ONCE(blk_lpos->begin != ALIGN(blk_lpos->begin, sizeof(db->id))) || | 
|---|
| 1272 | WARN_ON_ONCE(blk_lpos->next != ALIGN(blk_lpos->next, sizeof(db->id)))) { | 
|---|
| 1273 | return NULL; | 
|---|
| 1274 | } | 
|---|
| 1275 |  | 
|---|
| 1276 | /* A valid data block will always have at least an ID. */ | 
|---|
| 1277 | if (WARN_ON_ONCE(*data_size < sizeof(db->id))) | 
|---|
| 1278 | return NULL; | 
|---|
| 1279 |  | 
|---|
| 1280 | /* Subtract block ID space from size to reflect data size. */ | 
|---|
| 1281 | *data_size -= sizeof(db->id); | 
|---|
| 1282 |  | 
|---|
| 1283 | return &db->data[0]; | 
|---|
| 1284 | } | 
|---|
| 1285 |  | 
|---|
| 1286 | /* | 
|---|
| 1287 | * Attempt to transition the newest descriptor from committed back to reserved | 
|---|
| 1288 | * so that the record can be modified by a writer again. This is only possible | 
|---|
| 1289 | * if the descriptor is not yet finalized and the provided @caller_id matches. | 
|---|
| 1290 | */ | 
|---|
| 1291 | static struct prb_desc *desc_reopen_last(struct prb_desc_ring *desc_ring, | 
|---|
| 1292 | u32 caller_id, unsigned long *id_out) | 
|---|
| 1293 | { | 
|---|
| 1294 | unsigned long prev_state_val; | 
|---|
| 1295 | enum desc_state d_state; | 
|---|
| 1296 | struct prb_desc desc; | 
|---|
| 1297 | struct prb_desc *d; | 
|---|
| 1298 | unsigned long id; | 
|---|
| 1299 | u32 cid; | 
|---|
| 1300 |  | 
|---|
| 1301 | id = atomic_long_read(v: &desc_ring->head_id); | 
|---|
| 1302 |  | 
|---|
| 1303 | /* | 
|---|
| 1304 | * To reduce unnecessarily reopening, first check if the descriptor | 
|---|
| 1305 | * state and caller ID are correct. | 
|---|
| 1306 | */ | 
|---|
| 1307 | d_state = desc_read(desc_ring, id, desc_out: &desc, NULL, caller_id_out: &cid); | 
|---|
| 1308 | if (d_state != desc_committed || cid != caller_id) | 
|---|
| 1309 | return NULL; | 
|---|
| 1310 |  | 
|---|
| 1311 | d = to_desc(desc_ring, n: id); | 
|---|
| 1312 |  | 
|---|
| 1313 | prev_state_val = DESC_SV(id, desc_committed); | 
|---|
| 1314 |  | 
|---|
| 1315 | /* | 
|---|
| 1316 | * Guarantee the reserved state is stored before reading any | 
|---|
| 1317 | * record data. A full memory barrier is needed because @state_var | 
|---|
| 1318 | * modification is followed by reading. This pairs with _prb_commit:B. | 
|---|
| 1319 | * | 
|---|
| 1320 | * Memory barrier involvement: | 
|---|
| 1321 | * | 
|---|
| 1322 | * If desc_reopen_last:A reads from _prb_commit:B, then | 
|---|
| 1323 | * prb_reserve_in_last:A reads from _prb_commit:A. | 
|---|
| 1324 | * | 
|---|
| 1325 | * Relies on: | 
|---|
| 1326 | * | 
|---|
| 1327 | * WMB from _prb_commit:A to _prb_commit:B | 
|---|
| 1328 | *    matching | 
|---|
| 1329 | * MB If desc_reopen_last:A to prb_reserve_in_last:A | 
|---|
| 1330 | */ | 
|---|
| 1331 | if (!atomic_long_try_cmpxchg(v: &d->state_var, old: &prev_state_val, | 
|---|
| 1332 | DESC_SV(id, desc_reserved))) { /* LMM(desc_reopen_last:A) */ | 
|---|
| 1333 | return NULL; | 
|---|
| 1334 | } | 
|---|
| 1335 |  | 
|---|
| 1336 | *id_out = id; | 
|---|
| 1337 | return d; | 
|---|
| 1338 | } | 
|---|
| 1339 |  | 
|---|
| 1340 | /** | 
|---|
| 1341 | * prb_reserve_in_last() - Re-reserve and extend the space in the ringbuffer | 
|---|
| 1342 | *                         used by the newest record. | 
|---|
| 1343 | * | 
|---|
| 1344 | * @e:         The entry structure to setup. | 
|---|
| 1345 | * @rb:        The ringbuffer to re-reserve and extend data in. | 
|---|
| 1346 | * @r:         The record structure to allocate buffers for. | 
|---|
| 1347 | * @caller_id: The caller ID of the caller (reserving writer). | 
|---|
| 1348 | * @max_size:  Fail if the extended size would be greater than this. | 
|---|
| 1349 | * | 
|---|
| 1350 | * This is the public function available to writers to re-reserve and extend | 
|---|
| 1351 | * data. | 
|---|
| 1352 | * | 
|---|
| 1353 | * The writer specifies the text size to extend (not the new total size) by | 
|---|
| 1354 | * setting the @text_buf_size field of @r. To ensure proper initialization | 
|---|
| 1355 | * of @r, prb_rec_init_wr() should be used. | 
|---|
| 1356 | * | 
|---|
| 1357 | * This function will fail if @caller_id does not match the caller ID of the | 
|---|
| 1358 | * newest record. In that case the caller must reserve new data using | 
|---|
| 1359 | * prb_reserve(). | 
|---|
| 1360 | * | 
|---|
| 1361 | * Context: Any context. Disables local interrupts on success. | 
|---|
| 1362 | * Return: true if text data could be extended, otherwise false. | 
|---|
| 1363 | * | 
|---|
| 1364 | * On success: | 
|---|
| 1365 | * | 
|---|
| 1366 | *   - @r->text_buf points to the beginning of the entire text buffer. | 
|---|
| 1367 | * | 
|---|
| 1368 | *   - @r->text_buf_size is set to the new total size of the buffer. | 
|---|
| 1369 | * | 
|---|
| 1370 | *   - @r->info is not touched so that @r->info->text_len could be used | 
|---|
| 1371 | *     to append the text. | 
|---|
| 1372 | * | 
|---|
| 1373 | *   - prb_record_text_space() can be used on @e to query the new | 
|---|
| 1374 | *     actually used space. | 
|---|
| 1375 | * | 
|---|
| 1376 | * Important: All @r->info fields will already be set with the current values | 
|---|
| 1377 | *            for the record. I.e. @r->info->text_len will be less than | 
|---|
| 1378 | *            @text_buf_size. Writers can use @r->info->text_len to know | 
|---|
| 1379 | *            where concatenation begins and writers should update | 
|---|
| 1380 | *            @r->info->text_len after concatenating. | 
|---|
| 1381 | */ | 
|---|
| 1382 | bool prb_reserve_in_last(struct prb_reserved_entry *e, struct printk_ringbuffer *rb, | 
|---|
| 1383 | struct printk_record *r, u32 caller_id, unsigned int max_size) | 
|---|
| 1384 | { | 
|---|
| 1385 | struct prb_desc_ring *desc_ring = &rb->desc_ring; | 
|---|
| 1386 | struct printk_info *info; | 
|---|
| 1387 | unsigned int data_size; | 
|---|
| 1388 | struct prb_desc *d; | 
|---|
| 1389 | unsigned long id; | 
|---|
| 1390 |  | 
|---|
| 1391 | local_irq_save(e->irqflags); | 
|---|
| 1392 |  | 
|---|
| 1393 | /* Transition the newest descriptor back to the reserved state. */ | 
|---|
| 1394 | d = desc_reopen_last(desc_ring, caller_id, id_out: &id); | 
|---|
| 1395 | if (!d) { | 
|---|
| 1396 | local_irq_restore(e->irqflags); | 
|---|
| 1397 | goto fail_reopen; | 
|---|
| 1398 | } | 
|---|
| 1399 |  | 
|---|
| 1400 | /* Now the writer has exclusive access: LMM(prb_reserve_in_last:A) */ | 
|---|
| 1401 |  | 
|---|
| 1402 | info = to_info(desc_ring, n: id); | 
|---|
| 1403 |  | 
|---|
| 1404 | /* | 
|---|
| 1405 | * Set the @e fields here so that prb_commit() can be used if | 
|---|
| 1406 | * anything fails from now on. | 
|---|
| 1407 | */ | 
|---|
| 1408 | e->rb = rb; | 
|---|
| 1409 | e->id = id; | 
|---|
| 1410 |  | 
|---|
| 1411 | /* | 
|---|
| 1412 | * desc_reopen_last() checked the caller_id, but there was no | 
|---|
| 1413 | * exclusive access at that point. The descriptor may have | 
|---|
| 1414 | * changed since then. | 
|---|
| 1415 | */ | 
|---|
| 1416 | if (caller_id != info->caller_id) | 
|---|
| 1417 | goto fail; | 
|---|
| 1418 |  | 
|---|
| 1419 | if (BLK_DATALESS(&d->text_blk_lpos)) { | 
|---|
| 1420 | if (WARN_ON_ONCE(info->text_len != 0)) { | 
|---|
| 1421 | pr_warn_once( "wrong text_len value (%hu, expecting 0)\n", | 
|---|
| 1422 | info->text_len); | 
|---|
| 1423 | info->text_len = 0; | 
|---|
| 1424 | } | 
|---|
| 1425 |  | 
|---|
| 1426 | if (!data_check_size(data_ring: &rb->text_data_ring, size: r->text_buf_size)) | 
|---|
| 1427 | goto fail; | 
|---|
| 1428 |  | 
|---|
| 1429 | if (r->text_buf_size > max_size) | 
|---|
| 1430 | goto fail; | 
|---|
| 1431 |  | 
|---|
| 1432 | r->text_buf = data_alloc(rb, size: r->text_buf_size, | 
|---|
| 1433 | blk_lpos: &d->text_blk_lpos, id); | 
|---|
| 1434 | } else { | 
|---|
| 1435 | if (!get_data(data_ring: &rb->text_data_ring, blk_lpos: &d->text_blk_lpos, data_size: &data_size)) | 
|---|
| 1436 | goto fail; | 
|---|
| 1437 |  | 
|---|
| 1438 | /* | 
|---|
| 1439 | * Increase the buffer size to include the original size. If | 
|---|
| 1440 | * the meta data (@text_len) is not sane, use the full data | 
|---|
| 1441 | * block size. | 
|---|
| 1442 | */ | 
|---|
| 1443 | if (WARN_ON_ONCE(info->text_len > data_size)) { | 
|---|
| 1444 | pr_warn_once( "wrong text_len value (%hu, expecting <=%u)\n", | 
|---|
| 1445 | info->text_len, data_size); | 
|---|
| 1446 | info->text_len = data_size; | 
|---|
| 1447 | } | 
|---|
| 1448 | r->text_buf_size += info->text_len; | 
|---|
| 1449 |  | 
|---|
| 1450 | if (!data_check_size(data_ring: &rb->text_data_ring, size: r->text_buf_size)) | 
|---|
| 1451 | goto fail; | 
|---|
| 1452 |  | 
|---|
| 1453 | if (r->text_buf_size > max_size) | 
|---|
| 1454 | goto fail; | 
|---|
| 1455 |  | 
|---|
| 1456 | r->text_buf = data_realloc(rb, size: r->text_buf_size, | 
|---|
| 1457 | blk_lpos: &d->text_blk_lpos, id); | 
|---|
| 1458 | } | 
|---|
| 1459 | if (r->text_buf_size && !r->text_buf) | 
|---|
| 1460 | goto fail; | 
|---|
| 1461 |  | 
|---|
| 1462 | r->info = info; | 
|---|
| 1463 |  | 
|---|
| 1464 | e->text_space = space_used(data_ring: &rb->text_data_ring, blk_lpos: &d->text_blk_lpos); | 
|---|
| 1465 |  | 
|---|
| 1466 | return true; | 
|---|
| 1467 | fail: | 
|---|
| 1468 | prb_commit(e); | 
|---|
| 1469 | /* prb_commit() re-enabled interrupts. */ | 
|---|
| 1470 | fail_reopen: | 
|---|
| 1471 | /* Make it clear to the caller that the re-reserve failed. */ | 
|---|
| 1472 | memset(s: r, c: 0, n: sizeof(*r)); | 
|---|
| 1473 | return false; | 
|---|
| 1474 | } | 
|---|
| 1475 |  | 
|---|
| 1476 | /* | 
|---|
| 1477 | * @last_finalized_seq value guarantees that all records up to and including | 
|---|
| 1478 | * this sequence number are finalized and can be read. The only exception are | 
|---|
| 1479 | * too old records which have already been overwritten. | 
|---|
| 1480 | * | 
|---|
| 1481 | * It is also guaranteed that @last_finalized_seq only increases. | 
|---|
| 1482 | * | 
|---|
| 1483 | * Be aware that finalized records following non-finalized records are not | 
|---|
| 1484 | * reported because they are not yet available to the reader. For example, | 
|---|
| 1485 | * a new record stored via printk() will not be available to a printer if | 
|---|
| 1486 | * it follows a record that has not been finalized yet. However, once that | 
|---|
| 1487 | * non-finalized record becomes finalized, @last_finalized_seq will be | 
|---|
| 1488 | * appropriately updated and the full set of finalized records will be | 
|---|
| 1489 | * available to the printer. And since each printk() caller will either | 
|---|
| 1490 | * directly print or trigger deferred printing of all available unprinted | 
|---|
| 1491 | * records, all printk() messages will get printed. | 
|---|
| 1492 | */ | 
|---|
| 1493 | static u64 desc_last_finalized_seq(struct printk_ringbuffer *rb) | 
|---|
| 1494 | { | 
|---|
| 1495 | struct prb_desc_ring *desc_ring = &rb->desc_ring; | 
|---|
| 1496 | unsigned long ulseq; | 
|---|
| 1497 |  | 
|---|
| 1498 | /* | 
|---|
| 1499 | * Guarantee the sequence number is loaded before loading the | 
|---|
| 1500 | * associated record in order to guarantee that the record can be | 
|---|
| 1501 | * seen by this CPU. This pairs with desc_update_last_finalized:A. | 
|---|
| 1502 | */ | 
|---|
| 1503 | ulseq = atomic_long_read_acquire(v: &desc_ring->last_finalized_seq | 
|---|
| 1504 | ); /* LMM(desc_last_finalized_seq:A) */ | 
|---|
| 1505 |  | 
|---|
| 1506 | return __ulseq_to_u64seq(rb, ulseq); | 
|---|
| 1507 | } | 
|---|
| 1508 |  | 
|---|
| 1509 | static bool _prb_read_valid(struct printk_ringbuffer *rb, u64 *seq, | 
|---|
| 1510 | struct printk_record *r, unsigned int *line_count); | 
|---|
| 1511 |  | 
|---|
| 1512 | /* | 
|---|
| 1513 | * Check if there are records directly following @last_finalized_seq that are | 
|---|
| 1514 | * finalized. If so, update @last_finalized_seq to the latest of these | 
|---|
| 1515 | * records. It is not allowed to skip over records that are not yet finalized. | 
|---|
| 1516 | */ | 
|---|
| 1517 | static void desc_update_last_finalized(struct printk_ringbuffer *rb) | 
|---|
| 1518 | { | 
|---|
| 1519 | struct prb_desc_ring *desc_ring = &rb->desc_ring; | 
|---|
| 1520 | u64 old_seq = desc_last_finalized_seq(rb); | 
|---|
| 1521 | unsigned long oldval; | 
|---|
| 1522 | unsigned long newval; | 
|---|
| 1523 | u64 finalized_seq; | 
|---|
| 1524 | u64 try_seq; | 
|---|
| 1525 |  | 
|---|
| 1526 | try_again: | 
|---|
| 1527 | finalized_seq = old_seq; | 
|---|
| 1528 | try_seq = finalized_seq + 1; | 
|---|
| 1529 |  | 
|---|
| 1530 | /* Try to find later finalized records. */ | 
|---|
| 1531 | while (_prb_read_valid(rb, seq: &try_seq, NULL, NULL)) { | 
|---|
| 1532 | finalized_seq = try_seq; | 
|---|
| 1533 | try_seq++; | 
|---|
| 1534 | } | 
|---|
| 1535 |  | 
|---|
| 1536 | /* No update needed if no later finalized record was found. */ | 
|---|
| 1537 | if (finalized_seq == old_seq) | 
|---|
| 1538 | return; | 
|---|
| 1539 |  | 
|---|
| 1540 | oldval = __u64seq_to_ulseq(old_seq); | 
|---|
| 1541 | newval = __u64seq_to_ulseq(finalized_seq); | 
|---|
| 1542 |  | 
|---|
| 1543 | /* | 
|---|
| 1544 | * Set the sequence number of a later finalized record that has been | 
|---|
| 1545 | * seen. | 
|---|
| 1546 | * | 
|---|
| 1547 | * Guarantee the record data is visible to other CPUs before storing | 
|---|
| 1548 | * its sequence number. This pairs with desc_last_finalized_seq:A. | 
|---|
| 1549 | * | 
|---|
| 1550 | * Memory barrier involvement: | 
|---|
| 1551 | * | 
|---|
| 1552 | * If desc_last_finalized_seq:A reads from | 
|---|
| 1553 | * desc_update_last_finalized:A, then desc_read:A reads from | 
|---|
| 1554 | * _prb_commit:B. | 
|---|
| 1555 | * | 
|---|
| 1556 | * Relies on: | 
|---|
| 1557 | * | 
|---|
| 1558 | * RELEASE from _prb_commit:B to desc_update_last_finalized:A | 
|---|
| 1559 | *    matching | 
|---|
| 1560 | * ACQUIRE from desc_last_finalized_seq:A to desc_read:A | 
|---|
| 1561 | * | 
|---|
| 1562 | * Note: _prb_commit:B and desc_update_last_finalized:A can be | 
|---|
| 1563 | *       different CPUs. However, the desc_update_last_finalized:A | 
|---|
| 1564 | *       CPU (which performs the release) must have previously seen | 
|---|
| 1565 | *       _prb_commit:B. | 
|---|
| 1566 | */ | 
|---|
| 1567 | if (!atomic_long_try_cmpxchg_release(v: &desc_ring->last_finalized_seq, | 
|---|
| 1568 | old: &oldval, new: newval)) { /* LMM(desc_update_last_finalized:A) */ | 
|---|
| 1569 | old_seq = __ulseq_to_u64seq(rb, oldval); | 
|---|
| 1570 | goto try_again; | 
|---|
| 1571 | } | 
|---|
| 1572 | } | 
|---|
| 1573 |  | 
|---|
| 1574 | /* | 
|---|
| 1575 | * Attempt to finalize a specified descriptor. If this fails, the descriptor | 
|---|
| 1576 | * is either already final or it will finalize itself when the writer commits. | 
|---|
| 1577 | */ | 
|---|
| 1578 | static void desc_make_final(struct printk_ringbuffer *rb, unsigned long id) | 
|---|
| 1579 | { | 
|---|
| 1580 | struct prb_desc_ring *desc_ring = &rb->desc_ring; | 
|---|
| 1581 | unsigned long prev_state_val = DESC_SV(id, desc_committed); | 
|---|
| 1582 | struct prb_desc *d = to_desc(desc_ring, n: id); | 
|---|
| 1583 |  | 
|---|
| 1584 | if (atomic_long_try_cmpxchg_relaxed(v: &d->state_var, old: &prev_state_val, | 
|---|
| 1585 | DESC_SV(id, desc_finalized))) { /* LMM(desc_make_final:A) */ | 
|---|
| 1586 | desc_update_last_finalized(rb); | 
|---|
| 1587 | } | 
|---|
| 1588 | } | 
|---|
| 1589 |  | 
|---|
| 1590 | /** | 
|---|
| 1591 | * prb_reserve() - Reserve space in the ringbuffer. | 
|---|
| 1592 | * | 
|---|
| 1593 | * @e:  The entry structure to setup. | 
|---|
| 1594 | * @rb: The ringbuffer to reserve data in. | 
|---|
| 1595 | * @r:  The record structure to allocate buffers for. | 
|---|
| 1596 | * | 
|---|
| 1597 | * This is the public function available to writers to reserve data. | 
|---|
| 1598 | * | 
|---|
| 1599 | * The writer specifies the text size to reserve by setting the | 
|---|
| 1600 | * @text_buf_size field of @r. To ensure proper initialization of @r, | 
|---|
| 1601 | * prb_rec_init_wr() should be used. | 
|---|
| 1602 | * | 
|---|
| 1603 | * Context: Any context. Disables local interrupts on success. | 
|---|
| 1604 | * Return: true if at least text data could be allocated, otherwise false. | 
|---|
| 1605 | * | 
|---|
| 1606 | * On success, the fields @info and @text_buf of @r will be set by this | 
|---|
| 1607 | * function and should be filled in by the writer before committing. Also | 
|---|
| 1608 | * on success, prb_record_text_space() can be used on @e to query the actual | 
|---|
| 1609 | * space used for the text data block. | 
|---|
| 1610 | * | 
|---|
| 1611 | * Important: @info->text_len needs to be set correctly by the writer in | 
|---|
| 1612 | *            order for data to be readable and/or extended. Its value | 
|---|
| 1613 | *            is initialized to 0. | 
|---|
| 1614 | */ | 
|---|
| 1615 | bool prb_reserve(struct prb_reserved_entry *e, struct printk_ringbuffer *rb, | 
|---|
| 1616 | struct printk_record *r) | 
|---|
| 1617 | { | 
|---|
| 1618 | struct prb_desc_ring *desc_ring = &rb->desc_ring; | 
|---|
| 1619 | struct printk_info *info; | 
|---|
| 1620 | struct prb_desc *d; | 
|---|
| 1621 | unsigned long id; | 
|---|
| 1622 | u64 seq; | 
|---|
| 1623 |  | 
|---|
| 1624 | if (!data_check_size(data_ring: &rb->text_data_ring, size: r->text_buf_size)) | 
|---|
| 1625 | goto fail; | 
|---|
| 1626 |  | 
|---|
| 1627 | /* | 
|---|
| 1628 | * Descriptors in the reserved state act as blockers to all further | 
|---|
| 1629 | * reservations once the desc_ring has fully wrapped. Disable | 
|---|
| 1630 | * interrupts during the reserve/commit window in order to minimize | 
|---|
| 1631 | * the likelihood of this happening. | 
|---|
| 1632 | */ | 
|---|
| 1633 | local_irq_save(e->irqflags); | 
|---|
| 1634 |  | 
|---|
| 1635 | if (!desc_reserve(rb, id_out: &id)) { | 
|---|
| 1636 | /* Descriptor reservation failures are tracked. */ | 
|---|
| 1637 | atomic_long_inc(v: &rb->fail); | 
|---|
| 1638 | local_irq_restore(e->irqflags); | 
|---|
| 1639 | goto fail; | 
|---|
| 1640 | } | 
|---|
| 1641 |  | 
|---|
| 1642 | d = to_desc(desc_ring, n: id); | 
|---|
| 1643 | info = to_info(desc_ring, n: id); | 
|---|
| 1644 |  | 
|---|
| 1645 | /* | 
|---|
| 1646 | * All @info fields (except @seq) are cleared and must be filled in | 
|---|
| 1647 | * by the writer. Save @seq before clearing because it is used to | 
|---|
| 1648 | * determine the new sequence number. | 
|---|
| 1649 | */ | 
|---|
| 1650 | seq = info->seq; | 
|---|
| 1651 | memset(s: info, c: 0, n: sizeof(*info)); | 
|---|
| 1652 |  | 
|---|
| 1653 | /* | 
|---|
| 1654 | * Set the @e fields here so that prb_commit() can be used if | 
|---|
| 1655 | * text data allocation fails. | 
|---|
| 1656 | */ | 
|---|
| 1657 | e->rb = rb; | 
|---|
| 1658 | e->id = id; | 
|---|
| 1659 |  | 
|---|
| 1660 | /* | 
|---|
| 1661 | * Initialize the sequence number if it has "never been set". | 
|---|
| 1662 | * Otherwise just increment it by a full wrap. | 
|---|
| 1663 | * | 
|---|
| 1664 | * @seq is considered "never been set" if it has a value of 0, | 
|---|
| 1665 | * _except_ for @infos[0], which was specially setup by the ringbuffer | 
|---|
| 1666 | * initializer and therefore is always considered as set. | 
|---|
| 1667 | * | 
|---|
| 1668 | * See the "Bootstrap" comment block in printk_ringbuffer.h for | 
|---|
| 1669 | * details about how the initializer bootstraps the descriptors. | 
|---|
| 1670 | */ | 
|---|
| 1671 | if (seq == 0 && DESC_INDEX(desc_ring, id) != 0) | 
|---|
| 1672 | info->seq = DESC_INDEX(desc_ring, id); | 
|---|
| 1673 | else | 
|---|
| 1674 | info->seq = seq + DESCS_COUNT(desc_ring); | 
|---|
| 1675 |  | 
|---|
| 1676 | /* | 
|---|
| 1677 | * New data is about to be reserved. Once that happens, previous | 
|---|
| 1678 | * descriptors are no longer able to be extended. Finalize the | 
|---|
| 1679 | * previous descriptor now so that it can be made available to | 
|---|
| 1680 | * readers. (For seq==0 there is no previous descriptor.) | 
|---|
| 1681 | */ | 
|---|
| 1682 | if (info->seq > 0) | 
|---|
| 1683 | desc_make_final(rb, DESC_ID(id - 1)); | 
|---|
| 1684 |  | 
|---|
| 1685 | r->text_buf = data_alloc(rb, size: r->text_buf_size, blk_lpos: &d->text_blk_lpos, id); | 
|---|
| 1686 | /* If text data allocation fails, a data-less record is committed. */ | 
|---|
| 1687 | if (r->text_buf_size && !r->text_buf) { | 
|---|
| 1688 | prb_commit(e); | 
|---|
| 1689 | /* prb_commit() re-enabled interrupts. */ | 
|---|
| 1690 | goto fail; | 
|---|
| 1691 | } | 
|---|
| 1692 |  | 
|---|
| 1693 | r->info = info; | 
|---|
| 1694 |  | 
|---|
| 1695 | /* Record full text space used by record. */ | 
|---|
| 1696 | e->text_space = space_used(data_ring: &rb->text_data_ring, blk_lpos: &d->text_blk_lpos); | 
|---|
| 1697 |  | 
|---|
| 1698 | return true; | 
|---|
| 1699 | fail: | 
|---|
| 1700 | /* Make it clear to the caller that the reserve failed. */ | 
|---|
| 1701 | memset(s: r, c: 0, n: sizeof(*r)); | 
|---|
| 1702 | return false; | 
|---|
| 1703 | } | 
|---|
| 1704 | EXPORT_SYMBOL_IF_KUNIT(prb_reserve); | 
|---|
| 1705 |  | 
|---|
| 1706 | /* Commit the data (possibly finalizing it) and restore interrupts. */ | 
|---|
| 1707 | static void _prb_commit(struct prb_reserved_entry *e, unsigned long state_val) | 
|---|
| 1708 | { | 
|---|
| 1709 | struct prb_desc_ring *desc_ring = &e->rb->desc_ring; | 
|---|
| 1710 | struct prb_desc *d = to_desc(desc_ring, n: e->id); | 
|---|
| 1711 | unsigned long prev_state_val = DESC_SV(e->id, desc_reserved); | 
|---|
| 1712 |  | 
|---|
| 1713 | /* Now the writer has finished all writing: LMM(_prb_commit:A) */ | 
|---|
| 1714 |  | 
|---|
| 1715 | /* | 
|---|
| 1716 | * Set the descriptor as committed. See "ABA Issues" about why | 
|---|
| 1717 | * cmpxchg() instead of set() is used. | 
|---|
| 1718 | * | 
|---|
| 1719 | * 1  Guarantee all record data is stored before the descriptor state | 
|---|
| 1720 | *    is stored as committed. A write memory barrier is sufficient | 
|---|
| 1721 | *    for this. This pairs with desc_read:B and desc_reopen_last:A. | 
|---|
| 1722 | * | 
|---|
| 1723 | * 2. Guarantee the descriptor state is stored as committed before | 
|---|
| 1724 | *    re-checking the head ID in order to possibly finalize this | 
|---|
| 1725 | *    descriptor. This pairs with desc_reserve:D. | 
|---|
| 1726 | * | 
|---|
| 1727 | *    Memory barrier involvement: | 
|---|
| 1728 | * | 
|---|
| 1729 | *    If prb_commit:A reads from desc_reserve:D, then | 
|---|
| 1730 | *    desc_make_final:A reads from _prb_commit:B. | 
|---|
| 1731 | * | 
|---|
| 1732 | *    Relies on: | 
|---|
| 1733 | * | 
|---|
| 1734 | *    MB _prb_commit:B to prb_commit:A | 
|---|
| 1735 | *       matching | 
|---|
| 1736 | *    MB desc_reserve:D to desc_make_final:A | 
|---|
| 1737 | */ | 
|---|
| 1738 | if (!atomic_long_try_cmpxchg(v: &d->state_var, old: &prev_state_val, | 
|---|
| 1739 | DESC_SV(e->id, state_val))) { /* LMM(_prb_commit:B) */ | 
|---|
| 1740 | WARN_ON_ONCE(1); | 
|---|
| 1741 | } | 
|---|
| 1742 |  | 
|---|
| 1743 | /* Restore interrupts, the reserve/commit window is finished. */ | 
|---|
| 1744 | local_irq_restore(e->irqflags); | 
|---|
| 1745 | } | 
|---|
| 1746 |  | 
|---|
| 1747 | /** | 
|---|
| 1748 | * prb_commit() - Commit (previously reserved) data to the ringbuffer. | 
|---|
| 1749 | * | 
|---|
| 1750 | * @e: The entry containing the reserved data information. | 
|---|
| 1751 | * | 
|---|
| 1752 | * This is the public function available to writers to commit data. | 
|---|
| 1753 | * | 
|---|
| 1754 | * Note that the data is not yet available to readers until it is finalized. | 
|---|
| 1755 | * Finalizing happens automatically when space for the next record is | 
|---|
| 1756 | * reserved. | 
|---|
| 1757 | * | 
|---|
| 1758 | * See prb_final_commit() for a version of this function that finalizes | 
|---|
| 1759 | * immediately. | 
|---|
| 1760 | * | 
|---|
| 1761 | * Context: Any context. Enables local interrupts. | 
|---|
| 1762 | */ | 
|---|
| 1763 | void prb_commit(struct prb_reserved_entry *e) | 
|---|
| 1764 | { | 
|---|
| 1765 | struct prb_desc_ring *desc_ring = &e->rb->desc_ring; | 
|---|
| 1766 | unsigned long head_id; | 
|---|
| 1767 |  | 
|---|
| 1768 | _prb_commit(e, state_val: desc_committed); | 
|---|
| 1769 |  | 
|---|
| 1770 | /* | 
|---|
| 1771 | * If this descriptor is no longer the head (i.e. a new record has | 
|---|
| 1772 | * been allocated), extending the data for this record is no longer | 
|---|
| 1773 | * allowed and therefore it must be finalized. | 
|---|
| 1774 | */ | 
|---|
| 1775 | head_id = atomic_long_read(v: &desc_ring->head_id); /* LMM(prb_commit:A) */ | 
|---|
| 1776 | if (head_id != e->id) | 
|---|
| 1777 | desc_make_final(rb: e->rb, id: e->id); | 
|---|
| 1778 | } | 
|---|
| 1779 | EXPORT_SYMBOL_IF_KUNIT(prb_commit); | 
|---|
| 1780 |  | 
|---|
| 1781 | /** | 
|---|
| 1782 | * prb_final_commit() - Commit and finalize (previously reserved) data to | 
|---|
| 1783 | *                      the ringbuffer. | 
|---|
| 1784 | * | 
|---|
| 1785 | * @e: The entry containing the reserved data information. | 
|---|
| 1786 | * | 
|---|
| 1787 | * This is the public function available to writers to commit+finalize data. | 
|---|
| 1788 | * | 
|---|
| 1789 | * By finalizing, the data is made immediately available to readers. | 
|---|
| 1790 | * | 
|---|
| 1791 | * This function should only be used if there are no intentions of extending | 
|---|
| 1792 | * this data using prb_reserve_in_last(). | 
|---|
| 1793 | * | 
|---|
| 1794 | * Context: Any context. Enables local interrupts. | 
|---|
| 1795 | */ | 
|---|
| 1796 | void prb_final_commit(struct prb_reserved_entry *e) | 
|---|
| 1797 | { | 
|---|
| 1798 | _prb_commit(e, state_val: desc_finalized); | 
|---|
| 1799 |  | 
|---|
| 1800 | desc_update_last_finalized(rb: e->rb); | 
|---|
| 1801 | } | 
|---|
| 1802 |  | 
|---|
| 1803 | /* | 
|---|
| 1804 | * Count the number of lines in provided text. All text has at least 1 line | 
|---|
| 1805 | * (even if @text_size is 0). Each '\n' processed is counted as an additional | 
|---|
| 1806 | * line. | 
|---|
| 1807 | */ | 
|---|
| 1808 | static unsigned int count_lines(const char *text, unsigned int text_size) | 
|---|
| 1809 | { | 
|---|
| 1810 | unsigned int next_size = text_size; | 
|---|
| 1811 | unsigned int line_count = 1; | 
|---|
| 1812 | const char *next = text; | 
|---|
| 1813 |  | 
|---|
| 1814 | while (next_size) { | 
|---|
| 1815 | next = memchr(next, '\n', next_size); | 
|---|
| 1816 | if (!next) | 
|---|
| 1817 | break; | 
|---|
| 1818 | line_count++; | 
|---|
| 1819 | next++; | 
|---|
| 1820 | next_size = text_size - (next - text); | 
|---|
| 1821 | } | 
|---|
| 1822 |  | 
|---|
| 1823 | return line_count; | 
|---|
| 1824 | } | 
|---|
| 1825 |  | 
|---|
| 1826 | /* | 
|---|
| 1827 | * Given @blk_lpos, copy an expected @len of data into the provided buffer. | 
|---|
| 1828 | * If @line_count is provided, count the number of lines in the data. | 
|---|
| 1829 | * | 
|---|
| 1830 | * This function (used by readers) performs strict validation on the data | 
|---|
| 1831 | * size to possibly detect bugs in the writer code. A WARN_ON_ONCE() is | 
|---|
| 1832 | * triggered if an internal error is detected. | 
|---|
| 1833 | */ | 
|---|
| 1834 | static bool copy_data(struct prb_data_ring *data_ring, | 
|---|
| 1835 | struct prb_data_blk_lpos *blk_lpos, u16 len, char *buf, | 
|---|
| 1836 | unsigned int buf_size, unsigned int *line_count) | 
|---|
| 1837 | { | 
|---|
| 1838 | unsigned int data_size; | 
|---|
| 1839 | const char *data; | 
|---|
| 1840 |  | 
|---|
| 1841 | /* Caller might not want any data. */ | 
|---|
| 1842 | if ((!buf || !buf_size) && !line_count) | 
|---|
| 1843 | return true; | 
|---|
| 1844 |  | 
|---|
| 1845 | data = get_data(data_ring, blk_lpos, data_size: &data_size); | 
|---|
| 1846 | if (!data) | 
|---|
| 1847 | return false; | 
|---|
| 1848 |  | 
|---|
| 1849 | /* | 
|---|
| 1850 | * Actual cannot be less than expected. It can be more than expected | 
|---|
| 1851 | * because of the trailing alignment padding. | 
|---|
| 1852 | * | 
|---|
| 1853 | * Note that invalid @len values can occur because the caller loads | 
|---|
| 1854 | * the value during an allowed data race. | 
|---|
| 1855 | */ | 
|---|
| 1856 | if (data_size < (unsigned int)len) | 
|---|
| 1857 | return false; | 
|---|
| 1858 |  | 
|---|
| 1859 | /* Caller interested in the line count? */ | 
|---|
| 1860 | if (line_count) | 
|---|
| 1861 | *line_count = count_lines(text: data, text_size: len); | 
|---|
| 1862 |  | 
|---|
| 1863 | /* Caller interested in the data content? */ | 
|---|
| 1864 | if (!buf || !buf_size) | 
|---|
| 1865 | return true; | 
|---|
| 1866 |  | 
|---|
| 1867 | data_size = min_t(unsigned int, buf_size, len); | 
|---|
| 1868 |  | 
|---|
| 1869 | memcpy(to: &buf[0], from: data, len: data_size); /* LMM(copy_data:A) */ | 
|---|
| 1870 | return true; | 
|---|
| 1871 | } | 
|---|
| 1872 |  | 
|---|
| 1873 | /* | 
|---|
| 1874 | * This is an extended version of desc_read(). It gets a copy of a specified | 
|---|
| 1875 | * descriptor. However, it also verifies that the record is finalized and has | 
|---|
| 1876 | * the sequence number @seq. On success, 0 is returned. | 
|---|
| 1877 | * | 
|---|
| 1878 | * Error return values: | 
|---|
| 1879 | * -EINVAL: A finalized record with sequence number @seq does not exist. | 
|---|
| 1880 | * -ENOENT: A finalized record with sequence number @seq exists, but its data | 
|---|
| 1881 | *          is not available. This is a valid record, so readers should | 
|---|
| 1882 | *          continue with the next record. | 
|---|
| 1883 | */ | 
|---|
| 1884 | static int desc_read_finalized_seq(struct prb_desc_ring *desc_ring, | 
|---|
| 1885 | unsigned long id, u64 seq, | 
|---|
| 1886 | struct prb_desc *desc_out) | 
|---|
| 1887 | { | 
|---|
| 1888 | struct prb_data_blk_lpos *blk_lpos = &desc_out->text_blk_lpos; | 
|---|
| 1889 | enum desc_state d_state; | 
|---|
| 1890 | u64 s; | 
|---|
| 1891 |  | 
|---|
| 1892 | d_state = desc_read(desc_ring, id, desc_out, seq_out: &s, NULL); | 
|---|
| 1893 |  | 
|---|
| 1894 | /* | 
|---|
| 1895 | * An unexpected @id (desc_miss) or @seq mismatch means the record | 
|---|
| 1896 | * does not exist. A descriptor in the reserved or committed state | 
|---|
| 1897 | * means the record does not yet exist for the reader. | 
|---|
| 1898 | */ | 
|---|
| 1899 | if (d_state == desc_miss || | 
|---|
| 1900 | d_state == desc_reserved || | 
|---|
| 1901 | d_state == desc_committed || | 
|---|
| 1902 | s != seq) { | 
|---|
| 1903 | return -EINVAL; | 
|---|
| 1904 | } | 
|---|
| 1905 |  | 
|---|
| 1906 | /* | 
|---|
| 1907 | * A descriptor in the reusable state may no longer have its data | 
|---|
| 1908 | * available; report it as existing but with lost data. Or the record | 
|---|
| 1909 | * may actually be a record with lost data. | 
|---|
| 1910 | */ | 
|---|
| 1911 | if (d_state == desc_reusable || | 
|---|
| 1912 | (blk_lpos->begin == FAILED_LPOS && blk_lpos->next == FAILED_LPOS)) { | 
|---|
| 1913 | return -ENOENT; | 
|---|
| 1914 | } | 
|---|
| 1915 |  | 
|---|
| 1916 | return 0; | 
|---|
| 1917 | } | 
|---|
| 1918 |  | 
|---|
| 1919 | /* | 
|---|
| 1920 | * Copy the ringbuffer data from the record with @seq to the provided | 
|---|
| 1921 | * @r buffer. On success, 0 is returned. | 
|---|
| 1922 | * | 
|---|
| 1923 | * See desc_read_finalized_seq() for error return values. | 
|---|
| 1924 | */ | 
|---|
| 1925 | static int prb_read(struct printk_ringbuffer *rb, u64 seq, | 
|---|
| 1926 | struct printk_record *r, unsigned int *line_count) | 
|---|
| 1927 | { | 
|---|
| 1928 | struct prb_desc_ring *desc_ring = &rb->desc_ring; | 
|---|
| 1929 | struct printk_info *info = to_info(desc_ring, n: seq); | 
|---|
| 1930 | struct prb_desc *rdesc = to_desc(desc_ring, n: seq); | 
|---|
| 1931 | atomic_long_t *state_var = &rdesc->state_var; | 
|---|
| 1932 | struct prb_desc desc; | 
|---|
| 1933 | unsigned long id; | 
|---|
| 1934 | int err; | 
|---|
| 1935 |  | 
|---|
| 1936 | /* Extract the ID, used to specify the descriptor to read. */ | 
|---|
| 1937 | id = DESC_ID(atomic_long_read(state_var)); | 
|---|
| 1938 |  | 
|---|
| 1939 | /* Get a local copy of the correct descriptor (if available). */ | 
|---|
| 1940 | err = desc_read_finalized_seq(desc_ring, id, seq, desc_out: &desc); | 
|---|
| 1941 |  | 
|---|
| 1942 | /* | 
|---|
| 1943 | * If @r is NULL, the caller is only interested in the availability | 
|---|
| 1944 | * of the record. | 
|---|
| 1945 | */ | 
|---|
| 1946 | if (err || !r) | 
|---|
| 1947 | return err; | 
|---|
| 1948 |  | 
|---|
| 1949 | /* If requested, copy meta data. */ | 
|---|
| 1950 | if (r->info) | 
|---|
| 1951 | memcpy(to: r->info, from: info, len: sizeof(*(r->info))); | 
|---|
| 1952 |  | 
|---|
| 1953 | /* Copy text data. If it fails, this is a data-less record. */ | 
|---|
| 1954 | if (!copy_data(data_ring: &rb->text_data_ring, blk_lpos: &desc.text_blk_lpos, len: info->text_len, | 
|---|
| 1955 | buf: r->text_buf, buf_size: r->text_buf_size, line_count)) { | 
|---|
| 1956 | return -ENOENT; | 
|---|
| 1957 | } | 
|---|
| 1958 |  | 
|---|
| 1959 | /* Ensure the record is still finalized and has the same @seq. */ | 
|---|
| 1960 | return desc_read_finalized_seq(desc_ring, id, seq, desc_out: &desc); | 
|---|
| 1961 | } | 
|---|
| 1962 |  | 
|---|
| 1963 | /* Get the sequence number of the tail descriptor. */ | 
|---|
| 1964 | u64 prb_first_seq(struct printk_ringbuffer *rb) | 
|---|
| 1965 | { | 
|---|
| 1966 | struct prb_desc_ring *desc_ring = &rb->desc_ring; | 
|---|
| 1967 | enum desc_state d_state; | 
|---|
| 1968 | struct prb_desc desc; | 
|---|
| 1969 | unsigned long id; | 
|---|
| 1970 | u64 seq; | 
|---|
| 1971 |  | 
|---|
| 1972 | for (;;) { | 
|---|
| 1973 | id = atomic_long_read(v: &rb->desc_ring.tail_id); /* LMM(prb_first_seq:A) */ | 
|---|
| 1974 |  | 
|---|
| 1975 | d_state = desc_read(desc_ring, id, desc_out: &desc, seq_out: &seq, NULL); /* LMM(prb_first_seq:B) */ | 
|---|
| 1976 |  | 
|---|
| 1977 | /* | 
|---|
| 1978 | * This loop will not be infinite because the tail is | 
|---|
| 1979 | * _always_ in the finalized or reusable state. | 
|---|
| 1980 | */ | 
|---|
| 1981 | if (d_state == desc_finalized || d_state == desc_reusable) | 
|---|
| 1982 | break; | 
|---|
| 1983 |  | 
|---|
| 1984 | /* | 
|---|
| 1985 | * Guarantee the last state load from desc_read() is before | 
|---|
| 1986 | * reloading @tail_id in order to see a new tail in the case | 
|---|
| 1987 | * that the descriptor has been recycled. This pairs with | 
|---|
| 1988 | * desc_reserve:D. | 
|---|
| 1989 | * | 
|---|
| 1990 | * Memory barrier involvement: | 
|---|
| 1991 | * | 
|---|
| 1992 | * If prb_first_seq:B reads from desc_reserve:F, then | 
|---|
| 1993 | * prb_first_seq:A reads from desc_push_tail:B. | 
|---|
| 1994 | * | 
|---|
| 1995 | * Relies on: | 
|---|
| 1996 | * | 
|---|
| 1997 | * MB from desc_push_tail:B to desc_reserve:F | 
|---|
| 1998 | *    matching | 
|---|
| 1999 | * RMB prb_first_seq:B to prb_first_seq:A | 
|---|
| 2000 | */ | 
|---|
| 2001 | smp_rmb(); /* LMM(prb_first_seq:C) */ | 
|---|
| 2002 | } | 
|---|
| 2003 |  | 
|---|
| 2004 | return seq; | 
|---|
| 2005 | } | 
|---|
| 2006 |  | 
|---|
| 2007 | /** | 
|---|
| 2008 | * prb_next_reserve_seq() - Get the sequence number after the most recently | 
|---|
| 2009 | *                  reserved record. | 
|---|
| 2010 | * | 
|---|
| 2011 | * @rb:  The ringbuffer to get the sequence number from. | 
|---|
| 2012 | * | 
|---|
| 2013 | * This is the public function available to readers to see what sequence | 
|---|
| 2014 | * number will be assigned to the next reserved record. | 
|---|
| 2015 | * | 
|---|
| 2016 | * Note that depending on the situation, this value can be equal to or | 
|---|
| 2017 | * higher than the sequence number returned by prb_next_seq(). | 
|---|
| 2018 | * | 
|---|
| 2019 | * Context: Any context. | 
|---|
| 2020 | * Return: The sequence number that will be assigned to the next record | 
|---|
| 2021 | *         reserved. | 
|---|
| 2022 | */ | 
|---|
| 2023 | u64 prb_next_reserve_seq(struct printk_ringbuffer *rb) | 
|---|
| 2024 | { | 
|---|
| 2025 | struct prb_desc_ring *desc_ring = &rb->desc_ring; | 
|---|
| 2026 | unsigned long last_finalized_id; | 
|---|
| 2027 | atomic_long_t *state_var; | 
|---|
| 2028 | u64 last_finalized_seq; | 
|---|
| 2029 | unsigned long head_id; | 
|---|
| 2030 | struct prb_desc desc; | 
|---|
| 2031 | unsigned long diff; | 
|---|
| 2032 | struct prb_desc *d; | 
|---|
| 2033 | int err; | 
|---|
| 2034 |  | 
|---|
| 2035 | /* | 
|---|
| 2036 | * It may not be possible to read a sequence number for @head_id. | 
|---|
| 2037 | * So the ID of @last_finailzed_seq is used to calculate what the | 
|---|
| 2038 | * sequence number of @head_id will be. | 
|---|
| 2039 | */ | 
|---|
| 2040 |  | 
|---|
| 2041 | try_again: | 
|---|
| 2042 | last_finalized_seq = desc_last_finalized_seq(rb); | 
|---|
| 2043 |  | 
|---|
| 2044 | /* | 
|---|
| 2045 | * @head_id is loaded after @last_finalized_seq to ensure that | 
|---|
| 2046 | * it points to the record with @last_finalized_seq or newer. | 
|---|
| 2047 | * | 
|---|
| 2048 | * Memory barrier involvement: | 
|---|
| 2049 | * | 
|---|
| 2050 | * If desc_last_finalized_seq:A reads from | 
|---|
| 2051 | * desc_update_last_finalized:A, then | 
|---|
| 2052 | * prb_next_reserve_seq:A reads from desc_reserve:D. | 
|---|
| 2053 | * | 
|---|
| 2054 | * Relies on: | 
|---|
| 2055 | * | 
|---|
| 2056 | * RELEASE from desc_reserve:D to desc_update_last_finalized:A | 
|---|
| 2057 | *    matching | 
|---|
| 2058 | * ACQUIRE from desc_last_finalized_seq:A to prb_next_reserve_seq:A | 
|---|
| 2059 | * | 
|---|
| 2060 | * Note: desc_reserve:D and desc_update_last_finalized:A can be | 
|---|
| 2061 | *       different CPUs. However, the desc_update_last_finalized:A CPU | 
|---|
| 2062 | *       (which performs the release) must have previously seen | 
|---|
| 2063 | *       desc_read:C, which implies desc_reserve:D can be seen. | 
|---|
| 2064 | */ | 
|---|
| 2065 | head_id = atomic_long_read(v: &desc_ring->head_id); /* LMM(prb_next_reserve_seq:A) */ | 
|---|
| 2066 |  | 
|---|
| 2067 | d = to_desc(desc_ring, n: last_finalized_seq); | 
|---|
| 2068 | state_var = &d->state_var; | 
|---|
| 2069 |  | 
|---|
| 2070 | /* Extract the ID, used to specify the descriptor to read. */ | 
|---|
| 2071 | last_finalized_id = DESC_ID(atomic_long_read(state_var)); | 
|---|
| 2072 |  | 
|---|
| 2073 | /* Ensure @last_finalized_id is correct. */ | 
|---|
| 2074 | err = desc_read_finalized_seq(desc_ring, id: last_finalized_id, seq: last_finalized_seq, desc_out: &desc); | 
|---|
| 2075 |  | 
|---|
| 2076 | if (err == -EINVAL) { | 
|---|
| 2077 | if (last_finalized_seq == 0) { | 
|---|
| 2078 | /* | 
|---|
| 2079 | * No record has been finalized or even reserved yet. | 
|---|
| 2080 | * | 
|---|
| 2081 | * The @head_id is initialized such that the first | 
|---|
| 2082 | * increment will yield the first record (seq=0). | 
|---|
| 2083 | * Handle it separately to avoid a negative @diff | 
|---|
| 2084 | * below. | 
|---|
| 2085 | */ | 
|---|
| 2086 | if (head_id == DESC0_ID(desc_ring->count_bits)) | 
|---|
| 2087 | return 0; | 
|---|
| 2088 |  | 
|---|
| 2089 | /* | 
|---|
| 2090 | * One or more descriptors are already reserved. Use | 
|---|
| 2091 | * the descriptor ID of the first one (@seq=0) for | 
|---|
| 2092 | * the @diff below. | 
|---|
| 2093 | */ | 
|---|
| 2094 | last_finalized_id = DESC0_ID(desc_ring->count_bits) + 1; | 
|---|
| 2095 | } else { | 
|---|
| 2096 | /* Record must have been overwritten. Try again. */ | 
|---|
| 2097 | goto try_again; | 
|---|
| 2098 | } | 
|---|
| 2099 | } | 
|---|
| 2100 |  | 
|---|
| 2101 | /* Diff of known descriptor IDs to compute related sequence numbers. */ | 
|---|
| 2102 | diff = head_id - last_finalized_id; | 
|---|
| 2103 |  | 
|---|
| 2104 | /* | 
|---|
| 2105 | * @head_id points to the most recently reserved record, but this | 
|---|
| 2106 | * function returns the sequence number that will be assigned to the | 
|---|
| 2107 | * next (not yet reserved) record. Thus +1 is needed. | 
|---|
| 2108 | */ | 
|---|
| 2109 | return (last_finalized_seq + diff + 1); | 
|---|
| 2110 | } | 
|---|
| 2111 |  | 
|---|
| 2112 | /* | 
|---|
| 2113 | * Non-blocking read of a record. | 
|---|
| 2114 | * | 
|---|
| 2115 | * On success @seq is updated to the record that was read and (if provided) | 
|---|
| 2116 | * @r and @line_count will contain the read/calculated data. | 
|---|
| 2117 | * | 
|---|
| 2118 | * On failure @seq is updated to a record that is not yet available to the | 
|---|
| 2119 | * reader, but it will be the next record available to the reader. | 
|---|
| 2120 | * | 
|---|
| 2121 | * Note: When the current CPU is in panic, this function will skip over any | 
|---|
| 2122 | *       non-existent/non-finalized records in order to allow the panic CPU | 
|---|
| 2123 | *       to print any and all records that have been finalized. | 
|---|
| 2124 | */ | 
|---|
| 2125 | static bool _prb_read_valid(struct printk_ringbuffer *rb, u64 *seq, | 
|---|
| 2126 | struct printk_record *r, unsigned int *line_count) | 
|---|
| 2127 | { | 
|---|
| 2128 | u64 tail_seq; | 
|---|
| 2129 | int err; | 
|---|
| 2130 |  | 
|---|
| 2131 | while ((err = prb_read(rb, seq: *seq, r, line_count))) { | 
|---|
| 2132 | tail_seq = prb_first_seq(rb); | 
|---|
| 2133 |  | 
|---|
| 2134 | if (*seq < tail_seq) { | 
|---|
| 2135 | /* | 
|---|
| 2136 | * Behind the tail. Catch up and try again. This | 
|---|
| 2137 | * can happen for -ENOENT and -EINVAL cases. | 
|---|
| 2138 | */ | 
|---|
| 2139 | *seq = tail_seq; | 
|---|
| 2140 |  | 
|---|
| 2141 | } else if (err == -ENOENT) { | 
|---|
| 2142 | /* Record exists, but the data was lost. Skip. */ | 
|---|
| 2143 | (*seq)++; | 
|---|
| 2144 |  | 
|---|
| 2145 | } else { | 
|---|
| 2146 | /* | 
|---|
| 2147 | * Non-existent/non-finalized record. Must stop. | 
|---|
| 2148 | * | 
|---|
| 2149 | * For panic situations it cannot be expected that | 
|---|
| 2150 | * non-finalized records will become finalized. But | 
|---|
| 2151 | * there may be other finalized records beyond that | 
|---|
| 2152 | * need to be printed for a panic situation. If this | 
|---|
| 2153 | * is the panic CPU, skip this | 
|---|
| 2154 | * non-existent/non-finalized record unless non-panic | 
|---|
| 2155 | * CPUs are still running and their debugging is | 
|---|
| 2156 | * explicitly enabled. | 
|---|
| 2157 | * | 
|---|
| 2158 | * Note that new messages printed on panic CPU are | 
|---|
| 2159 | * finalized when we are here. The only exception | 
|---|
| 2160 | * might be the last message without trailing newline. | 
|---|
| 2161 | * But it would have the sequence number returned | 
|---|
| 2162 | * by "prb_next_reserve_seq() - 1". | 
|---|
| 2163 | */ | 
|---|
| 2164 | if (panic_on_this_cpu() && | 
|---|
| 2165 | (!debug_non_panic_cpus || legacy_allow_panic_sync) && | 
|---|
| 2166 | ((*seq + 1) < prb_next_reserve_seq(rb))) { | 
|---|
| 2167 | (*seq)++; | 
|---|
| 2168 | } else { | 
|---|
| 2169 | return false; | 
|---|
| 2170 | } | 
|---|
| 2171 | } | 
|---|
| 2172 | } | 
|---|
| 2173 |  | 
|---|
| 2174 | return true; | 
|---|
| 2175 | } | 
|---|
| 2176 |  | 
|---|
| 2177 | /** | 
|---|
| 2178 | * prb_read_valid() - Non-blocking read of a requested record or (if gone) | 
|---|
| 2179 | *                    the next available record. | 
|---|
| 2180 | * | 
|---|
| 2181 | * @rb:  The ringbuffer to read from. | 
|---|
| 2182 | * @seq: The sequence number of the record to read. | 
|---|
| 2183 | * @r:   A record data buffer to store the read record to. | 
|---|
| 2184 | * | 
|---|
| 2185 | * This is the public function available to readers to read a record. | 
|---|
| 2186 | * | 
|---|
| 2187 | * The reader provides the @info and @text_buf buffers of @r to be | 
|---|
| 2188 | * filled in. Any of the buffer pointers can be set to NULL if the reader | 
|---|
| 2189 | * is not interested in that data. To ensure proper initialization of @r, | 
|---|
| 2190 | * prb_rec_init_rd() should be used. | 
|---|
| 2191 | * | 
|---|
| 2192 | * Context: Any context. | 
|---|
| 2193 | * Return: true if a record was read, otherwise false. | 
|---|
| 2194 | * | 
|---|
| 2195 | * On success, the reader must check r->info.seq to see which record was | 
|---|
| 2196 | * actually read. This allows the reader to detect dropped records. | 
|---|
| 2197 | * | 
|---|
| 2198 | * Failure means @seq refers to a record not yet available to the reader. | 
|---|
| 2199 | */ | 
|---|
| 2200 | bool prb_read_valid(struct printk_ringbuffer *rb, u64 seq, | 
|---|
| 2201 | struct printk_record *r) | 
|---|
| 2202 | { | 
|---|
| 2203 | return _prb_read_valid(rb, seq: &seq, r, NULL); | 
|---|
| 2204 | } | 
|---|
| 2205 | EXPORT_SYMBOL_IF_KUNIT(prb_read_valid); | 
|---|
| 2206 |  | 
|---|
| 2207 | /** | 
|---|
| 2208 | * prb_read_valid_info() - Non-blocking read of meta data for a requested | 
|---|
| 2209 | *                         record or (if gone) the next available record. | 
|---|
| 2210 | * | 
|---|
| 2211 | * @rb:         The ringbuffer to read from. | 
|---|
| 2212 | * @seq:        The sequence number of the record to read. | 
|---|
| 2213 | * @info:       A buffer to store the read record meta data to. | 
|---|
| 2214 | * @line_count: A buffer to store the number of lines in the record text. | 
|---|
| 2215 | * | 
|---|
| 2216 | * This is the public function available to readers to read only the | 
|---|
| 2217 | * meta data of a record. | 
|---|
| 2218 | * | 
|---|
| 2219 | * The reader provides the @info, @line_count buffers to be filled in. | 
|---|
| 2220 | * Either of the buffer pointers can be set to NULL if the reader is not | 
|---|
| 2221 | * interested in that data. | 
|---|
| 2222 | * | 
|---|
| 2223 | * Context: Any context. | 
|---|
| 2224 | * Return: true if a record's meta data was read, otherwise false. | 
|---|
| 2225 | * | 
|---|
| 2226 | * On success, the reader must check info->seq to see which record meta data | 
|---|
| 2227 | * was actually read. This allows the reader to detect dropped records. | 
|---|
| 2228 | * | 
|---|
| 2229 | * Failure means @seq refers to a record not yet available to the reader. | 
|---|
| 2230 | */ | 
|---|
| 2231 | bool prb_read_valid_info(struct printk_ringbuffer *rb, u64 seq, | 
|---|
| 2232 | struct printk_info *info, unsigned int *line_count) | 
|---|
| 2233 | { | 
|---|
| 2234 | struct printk_record r; | 
|---|
| 2235 |  | 
|---|
| 2236 | prb_rec_init_rd(r: &r, info, NULL, text_buf_size: 0); | 
|---|
| 2237 |  | 
|---|
| 2238 | return _prb_read_valid(rb, seq: &seq, r: &r, line_count); | 
|---|
| 2239 | } | 
|---|
| 2240 |  | 
|---|
| 2241 | /** | 
|---|
| 2242 | * prb_first_valid_seq() - Get the sequence number of the oldest available | 
|---|
| 2243 | *                         record. | 
|---|
| 2244 | * | 
|---|
| 2245 | * @rb: The ringbuffer to get the sequence number from. | 
|---|
| 2246 | * | 
|---|
| 2247 | * This is the public function available to readers to see what the | 
|---|
| 2248 | * first/oldest valid sequence number is. | 
|---|
| 2249 | * | 
|---|
| 2250 | * This provides readers a starting point to begin iterating the ringbuffer. | 
|---|
| 2251 | * | 
|---|
| 2252 | * Context: Any context. | 
|---|
| 2253 | * Return: The sequence number of the first/oldest record or, if the | 
|---|
| 2254 | *         ringbuffer is empty, 0 is returned. | 
|---|
| 2255 | */ | 
|---|
| 2256 | u64 prb_first_valid_seq(struct printk_ringbuffer *rb) | 
|---|
| 2257 | { | 
|---|
| 2258 | u64 seq = 0; | 
|---|
| 2259 |  | 
|---|
| 2260 | if (!_prb_read_valid(rb, seq: &seq, NULL, NULL)) | 
|---|
| 2261 | return 0; | 
|---|
| 2262 |  | 
|---|
| 2263 | return seq; | 
|---|
| 2264 | } | 
|---|
| 2265 |  | 
|---|
| 2266 | /** | 
|---|
| 2267 | * prb_next_seq() - Get the sequence number after the last available record. | 
|---|
| 2268 | * | 
|---|
| 2269 | * @rb:  The ringbuffer to get the sequence number from. | 
|---|
| 2270 | * | 
|---|
| 2271 | * This is the public function available to readers to see what the next | 
|---|
| 2272 | * newest sequence number available to readers will be. | 
|---|
| 2273 | * | 
|---|
| 2274 | * This provides readers a sequence number to jump to if all currently | 
|---|
| 2275 | * available records should be skipped. It is guaranteed that all records | 
|---|
| 2276 | * previous to the returned value have been finalized and are (or were) | 
|---|
| 2277 | * available to the reader. | 
|---|
| 2278 | * | 
|---|
| 2279 | * Context: Any context. | 
|---|
| 2280 | * Return: The sequence number of the next newest (not yet available) record | 
|---|
| 2281 | *         for readers. | 
|---|
| 2282 | */ | 
|---|
| 2283 | u64 prb_next_seq(struct printk_ringbuffer *rb) | 
|---|
| 2284 | { | 
|---|
| 2285 | u64 seq; | 
|---|
| 2286 |  | 
|---|
| 2287 | seq = desc_last_finalized_seq(rb); | 
|---|
| 2288 |  | 
|---|
| 2289 | /* | 
|---|
| 2290 | * Begin searching after the last finalized record. | 
|---|
| 2291 | * | 
|---|
| 2292 | * On 0, the search must begin at 0 because of hack#2 | 
|---|
| 2293 | * of the bootstrapping phase it is not known if a | 
|---|
| 2294 | * record at index 0 exists. | 
|---|
| 2295 | */ | 
|---|
| 2296 | if (seq != 0) | 
|---|
| 2297 | seq++; | 
|---|
| 2298 |  | 
|---|
| 2299 | /* | 
|---|
| 2300 | * The information about the last finalized @seq might be inaccurate. | 
|---|
| 2301 | * Search forward to find the current one. | 
|---|
| 2302 | */ | 
|---|
| 2303 | while (_prb_read_valid(rb, seq: &seq, NULL, NULL)) | 
|---|
| 2304 | seq++; | 
|---|
| 2305 |  | 
|---|
| 2306 | return seq; | 
|---|
| 2307 | } | 
|---|
| 2308 |  | 
|---|
| 2309 | /** | 
|---|
| 2310 | * prb_init() - Initialize a ringbuffer to use provided external buffers. | 
|---|
| 2311 | * | 
|---|
| 2312 | * @rb:       The ringbuffer to initialize. | 
|---|
| 2313 | * @text_buf: The data buffer for text data. | 
|---|
| 2314 | * @textbits: The size of @text_buf as a power-of-2 value. | 
|---|
| 2315 | * @descs:    The descriptor buffer for ringbuffer records. | 
|---|
| 2316 | * @descbits: The count of @descs items as a power-of-2 value. | 
|---|
| 2317 | * @infos:    The printk_info buffer for ringbuffer records. | 
|---|
| 2318 | * | 
|---|
| 2319 | * This is the public function available to writers to setup a ringbuffer | 
|---|
| 2320 | * during runtime using provided buffers. | 
|---|
| 2321 | * | 
|---|
| 2322 | * This must match the initialization of DEFINE_PRINTKRB(). | 
|---|
| 2323 | * | 
|---|
| 2324 | * Context: Any context. | 
|---|
| 2325 | */ | 
|---|
| 2326 | void prb_init(struct printk_ringbuffer *rb, | 
|---|
| 2327 | char *text_buf, unsigned int textbits, | 
|---|
| 2328 | struct prb_desc *descs, unsigned int descbits, | 
|---|
| 2329 | struct printk_info *infos) | 
|---|
| 2330 | { | 
|---|
| 2331 | memset(s: descs, c: 0, _DESCS_COUNT(descbits) * sizeof(descs[0])); | 
|---|
| 2332 | memset(s: infos, c: 0, _DESCS_COUNT(descbits) * sizeof(infos[0])); | 
|---|
| 2333 |  | 
|---|
| 2334 | rb->desc_ring.count_bits = descbits; | 
|---|
| 2335 | rb->desc_ring.descs = descs; | 
|---|
| 2336 | rb->desc_ring.infos = infos; | 
|---|
| 2337 | atomic_long_set(v: &rb->desc_ring.head_id, DESC0_ID(descbits)); | 
|---|
| 2338 | atomic_long_set(v: &rb->desc_ring.tail_id, DESC0_ID(descbits)); | 
|---|
| 2339 | atomic_long_set(v: &rb->desc_ring.last_finalized_seq, i: 0); | 
|---|
| 2340 |  | 
|---|
| 2341 | rb->text_data_ring.size_bits = textbits; | 
|---|
| 2342 | rb->text_data_ring.data = text_buf; | 
|---|
| 2343 | atomic_long_set(v: &rb->text_data_ring.head_lpos, BLK0_LPOS(textbits)); | 
|---|
| 2344 | atomic_long_set(v: &rb->text_data_ring.tail_lpos, BLK0_LPOS(textbits)); | 
|---|
| 2345 |  | 
|---|
| 2346 | atomic_long_set(v: &rb->fail, i: 0); | 
|---|
| 2347 |  | 
|---|
| 2348 | atomic_long_set(v: &(descs[_DESCS_COUNT(descbits) - 1].state_var), DESC0_SV(descbits)); | 
|---|
| 2349 | descs[_DESCS_COUNT(descbits) - 1].text_blk_lpos.begin = FAILED_LPOS; | 
|---|
| 2350 | descs[_DESCS_COUNT(descbits) - 1].text_blk_lpos.next = FAILED_LPOS; | 
|---|
| 2351 |  | 
|---|
| 2352 | infos[0].seq = -(u64)_DESCS_COUNT(descbits); | 
|---|
| 2353 | infos[_DESCS_COUNT(descbits) - 1].seq = 0; | 
|---|
| 2354 | } | 
|---|
| 2355 | EXPORT_SYMBOL_IF_KUNIT(prb_init); | 
|---|
| 2356 |  | 
|---|
| 2357 | /** | 
|---|
| 2358 | * prb_record_text_space() - Query the full actual used ringbuffer space for | 
|---|
| 2359 | *                           the text data of a reserved entry. | 
|---|
| 2360 | * | 
|---|
| 2361 | * @e: The successfully reserved entry to query. | 
|---|
| 2362 | * | 
|---|
| 2363 | * This is the public function available to writers to see how much actual | 
|---|
| 2364 | * space is used in the ringbuffer to store the text data of the specified | 
|---|
| 2365 | * entry. | 
|---|
| 2366 | * | 
|---|
| 2367 | * This function is only valid if @e has been successfully reserved using | 
|---|
| 2368 | * prb_reserve(). | 
|---|
| 2369 | * | 
|---|
| 2370 | * Context: Any context. | 
|---|
| 2371 | * Return: The size in bytes used by the text data of the associated record. | 
|---|
| 2372 | */ | 
|---|
| 2373 | unsigned int prb_record_text_space(struct prb_reserved_entry *e) | 
|---|
| 2374 | { | 
|---|
| 2375 | return e->text_space; | 
|---|
| 2376 | } | 
|---|
| 2377 |  | 
|---|