rwsem.c source code [Linux/kernel/locking/rwsem.c]

1	// SPDX-License-Identifier: GPL-2.0
2	/ kernel/rwsem.c: R/W semaphores, public implementation*
3	*
4	* Written by David Howells (dhowells@redhat.com).
5	* Derived from asm-i386/semaphore.h
6	*
7	* Writer lock-stealing by Alex Shi <alex.shi@intel.com>
8	* and Michel Lespinasse <walken@google.com>
9	*
10	* Optimistic spinning by Tim Chen <tim.c.chen@intel.com>
11	* and Davidlohr Bueso <davidlohr@hp.com>. Based on mutexes.
12	*
13	* Rwsem count bit fields re-definition and rwsem rearchitecture by
14	* Waiman Long <longman@redhat.com> and
15	* Peter Zijlstra <peterz@infradead.org>.
16	*/
17
18	#include <linux/types.h>
19	#include <linux/kernel.h>
20	#include <linux/sched.h>
21	#include <linux/sched/rt.h>
22	#include <linux/sched/task.h>
23	#include <linux/sched/debug.h>
24	#include <linux/sched/wake_q.h>
25	#include <linux/sched/signal.h>
26	#include <linux/sched/clock.h>
27	#include <linux/export.h>
28	#include <linux/rwsem.h>
29	#include <linux/atomic.h>
30	#include <linux/hung_task.h>
31	#include <trace/events/lock.h>
32
33	#ifndef CONFIG_PREEMPT_RT
34	#include "lock_events.h"
35
36	/*
37	* The least significant 2 bits of the owner value has the following
38	* meanings when set.
39	* - Bit 0: RWSEM_READER_OWNED - rwsem may be owned by readers (just a hint)
40	* - Bit 1: RWSEM_NONSPINNABLE - Cannot spin on a reader-owned lock
41	*
42	* When the rwsem is reader-owned and a spinning writer has timed out,
43	* the nonspinnable bit will be set to disable optimistic spinning.
44
45	* When a writer acquires a rwsem, it puts its task_struct pointer
46	* into the owner field. It is cleared after an unlock.
47	*
48	* When a reader acquires a rwsem, it will also puts its task_struct
49	* pointer into the owner field with the RWSEM_READER_OWNED bit set.
50	* On unlock, the owner field will largely be left untouched. So
51	* for a free or reader-owned rwsem, the owner value may contain
52	* information about the last reader that acquires the rwsem.
53	*
54	* That information may be helpful in debugging cases where the system
55	* seems to hang on a reader owned rwsem especially if only one reader
56	* is involved. Ideally we would like to track all the readers that own
57	* a rwsem, but the overhead is simply too big.
58	*
59	* A fast path reader optimistic lock stealing is supported when the rwsem
60	* is previously owned by a writer and the following conditions are met:
61	* - rwsem is not currently writer owned
62	* - the handoff isn't set.
63	*/
64	#define RWSEM_READER_OWNED (1UL << 0)
65	#define RWSEM_NONSPINNABLE (1UL << 1)
66	#define RWSEM_OWNER_FLAGS_MASK (RWSEM_READER_OWNED \| RWSEM_NONSPINNABLE)
67
68	#ifdef CONFIG_DEBUG_RWSEMS
69	# define DEBUG_RWSEMS_WARN_ON(c, sem) do { \
70	if (!debug_locks_silent && \
71	WARN_ONCE(c, "DEBUG_RWSEMS_WARN_ON(%s): count = 0x%lx, magic = 0x%lx, owner = 0x%lx, curr 0x%lx, list %sempty\n",\
72	#c, atomic_long_read(&(sem)->count), \
73	(unsigned long) sem->magic, \
74	atomic_long_read(&(sem)->owner), (long)current, \
75	list_empty(&(sem)->wait_list) ? "" : "not ")) \
76	debug_locks_off(); \
77	} while (0)
78	#else
79	# define DEBUG_RWSEMS_WARN_ON(c, sem)
80	#endif
81
82	/*
83	* On 64-bit architectures, the bit definitions of the count are:
84	*
85	* Bit 0 - writer locked bit
86	* Bit 1 - waiters present bit
87	* Bit 2 - lock handoff bit
88	* Bits 3-7 - reserved
89	* Bits 8-62 - 55-bit reader count
90	* Bit 63 - read fail bit
91	*
92	* On 32-bit architectures, the bit definitions of the count are:
93	*
94	* Bit 0 - writer locked bit
95	* Bit 1 - waiters present bit
96	* Bit 2 - lock handoff bit
97	* Bits 3-7 - reserved
98	* Bits 8-30 - 23-bit reader count
99	* Bit 31 - read fail bit
100	*
101	* It is not likely that the most significant bit (read fail bit) will ever
102	* be set. This guard bit is still checked anyway in the down_read() fastpath
103	* just in case we need to use up more of the reader bits for other purpose
104	* in the future.
105	*
106	* atomic_long_fetch_add() is used to obtain reader lock, whereas
107	* atomic_long_cmpxchg() will be used to obtain writer lock.
108	*
109	* There are three places where the lock handoff bit may be set or cleared.
110	* 1) rwsem_mark_wake() for readers -- set, clear
111	* 2) rwsem_try_write_lock() for writers -- set, clear
112	* 3) rwsem_del_waiter() -- clear
113	*
114	* For all the above cases, wait_lock will be held. A writer must also
115	* be the first one in the wait_list to be eligible for setting the handoff
116	* bit. So concurrent setting/clearing of handoff bit is not possible.
117	*/
118	#define RWSEM_WRITER_LOCKED (1UL << 0)
119	#define RWSEM_FLAG_WAITERS (1UL << 1)
120	#define RWSEM_FLAG_HANDOFF (1UL << 2)
121	#define RWSEM_FLAG_READFAIL (1UL << (BITS_PER_LONG - 1))
122
123	#define RWSEM_READER_SHIFT 8
124	#define RWSEM_READER_BIAS (1UL << RWSEM_READER_SHIFT)
125	#define RWSEM_READER_MASK (~(RWSEM_READER_BIAS - 1))
126	#define RWSEM_WRITER_MASK RWSEM_WRITER_LOCKED
127	#define RWSEM_LOCK_MASK (RWSEM_WRITER_MASK\|RWSEM_READER_MASK)
128	#define RWSEM_READ_FAILED_MASK (RWSEM_WRITER_MASK\|RWSEM_FLAG_WAITERS\|\
129	RWSEM_FLAG_HANDOFF\|RWSEM_FLAG_READFAIL)
130
131	/*
132	* All writes to owner are protected by WRITE_ONCE() to make sure that
133	* store tearing can't happen as optimistic spinners may read and use
134	* the owner value concurrently without lock. Read from owner, however,
135	* may not need READ_ONCE() as long as the pointer value is only used
136	* for comparison and isn't being dereferenced.
137	*
138	* Both rwsem_{set,clear}_owner() functions should be in the same
139	* preempt disable section as the atomic op that changes sem->count.
140	*/
141	static inline void rwsem_set_owner(struct rw_semaphore *sem)
142	{
143	lockdep_assert_preemption_disabled();
144	atomic_long_set(v: &sem->owner, i: (long)current);
145	}
146
147	static inline void rwsem_clear_owner(struct rw_semaphore *sem)
148	{
149	lockdep_assert_preemption_disabled();
150	atomic_long_set(v: &sem->owner, i: `0`);
151	}
152
153	/*
154	* Test the flags in the owner field.
155	*/
156	static inline bool rwsem_test_oflags(struct rw_semaphore sem, long* flags)
157	{
158	return atomic_long_read(v: &sem->owner) & flags;
159	}
160
161	/*
162	* The task_struct pointer of the last owning reader will be left in
163	* the owner field.
164	*
165	* Note that the owner value just indicates the task has owned the rwsem
166	* previously, it may not be the real owner or one of the real owners
167	* anymore when that field is examined, so take it with a grain of salt.
168	*
169	* The reader non-spinnable bit is preserved.
170	*/
171	static inline void __rwsem_set_reader_owned(struct rw_semaphore *sem,
172	struct task_struct *owner)
173	{
174	unsigned long val = (unsigned long)owner \| RWSEM_READER_OWNED \|
175	(atomic_long_read(v: &sem->owner) & RWSEM_NONSPINNABLE);
176
177	atomic_long_set(v: &sem->owner, i: val);
178	}
179
180	static inline void rwsem_set_reader_owned(struct rw_semaphore *sem)
181	{
182	__rwsem_set_reader_owned(sem, current);
183	}
184
185	#if defined(CONFIG_DEBUG_RWSEMS) \|\| defined(CONFIG_DETECT_HUNG_TASK_BLOCKER)
186	/*
187	* Return just the real task structure pointer of the owner
188	*/
189	struct task_struct rwsem_owner(struct* rw_semaphore *sem)
190	{
191	return (struct task_struct *)
192	(atomic_long_read(&sem->owner) & ~RWSEM_OWNER_FLAGS_MASK);
193	}
194
195	/*
196	* Return true if the rwsem is owned by a reader.
197	*/
198	bool is_rwsem_reader_owned(struct rw_semaphore *sem)
199	{
200	/*
201	* Check the count to see if it is write-locked.
202	*/
203	long count = atomic_long_read(&sem->count);
204
205	if (count & RWSEM_WRITER_MASK)
206	return false;
207	return rwsem_test_oflags(sem, RWSEM_READER_OWNED);
208	}
209
210	/*
211	* With CONFIG_DEBUG_RWSEMS or CONFIG_DETECT_HUNG_TASK_BLOCKER configured,
212	* it will make sure that the owner field of a reader-owned rwsem either
213	* points to a real reader-owner(s) or gets cleared. The only exception is
214	* when the unlock is done by up_read_non_owner().
215	*/
216	static inline void rwsem_clear_reader_owned(struct rw_semaphore *sem)
217	{
218	unsigned long val = atomic_long_read(&sem->owner);
219
220	while ((val & ~RWSEM_OWNER_FLAGS_MASK) == (unsigned long)current) {
221	if (atomic_long_try_cmpxchg(&sem->owner, &val,
222	val & RWSEM_OWNER_FLAGS_MASK))
223	return;
224	}
225	}
226	#else
227	static inline void rwsem_clear_reader_owned(struct rw_semaphore *sem)
228	{
229	}
230	#endif
231
232	/*
233	* Set the RWSEM_NONSPINNABLE bits if the RWSEM_READER_OWNED flag
234	* remains set. Otherwise, the operation will be aborted.
235	*/
236	static inline void rwsem_set_nonspinnable(struct rw_semaphore *sem)
237	{
238	unsigned long owner = atomic_long_read(v: &sem->owner);
239
240	do {
241	if (!(owner & RWSEM_READER_OWNED))
242	break;
243	if (owner & RWSEM_NONSPINNABLE)
244	break;
245	} while (!atomic_long_try_cmpxchg(v: &sem->owner, old: &owner,
246	new: owner \| RWSEM_NONSPINNABLE));
247	}
248
249	static inline bool rwsem_read_trylock(struct rw_semaphore sem, long* *cntp)
250	{
251	*cntp = atomic_long_add_return_acquire(RWSEM_READER_BIAS, v: &sem->count);
252
253	if (WARN_ON_ONCE(*cntp < `0`))
254	rwsem_set_nonspinnable(sem);
255
256	if (!(*cntp & RWSEM_READ_FAILED_MASK)) {
257	rwsem_set_reader_owned(sem);
258	return true;
259	}
260
261	return false;
262	}
263
264	static inline bool rwsem_write_trylock(struct rw_semaphore *sem)
265	{
266	long tmp = RWSEM_UNLOCKED_VALUE;
267
268	if (atomic_long_try_cmpxchg_acquire(v: &sem->count, old: &tmp, RWSEM_WRITER_LOCKED)) {
269	rwsem_set_owner(sem);
270	return true;
271	}
272
273	return false;
274	}
275
276	/*
277	* Return the real task structure pointer of the owner and the embedded
278	* flags in the owner. pflags must be non-NULL.
279	*/
280	static inline struct task_struct *
281	rwsem_owner_flags(struct rw_semaphore sem, unsigned* long *pflags)
282	{
283	unsigned long owner = atomic_long_read(v: &sem->owner);
284
285	*pflags = owner & RWSEM_OWNER_FLAGS_MASK;
286	return (struct task_struct *)(owner & ~RWSEM_OWNER_FLAGS_MASK);
287	}
288
289	/*
290	* Guide to the rw_semaphore's count field.
291	*
292	* When the RWSEM_WRITER_LOCKED bit in count is set, the lock is owned
293	* by a writer.
294	*
295	* The lock is owned by readers when
296	* (1) the RWSEM_WRITER_LOCKED isn't set in count,
297	* (2) some of the reader bits are set in count, and
298	* (3) the owner field has RWSEM_READ_OWNED bit set.
299	*
300	* Having some reader bits set is not enough to guarantee a readers owned
301	* lock as the readers may be in the process of backing out from the count
302	* and a writer has just released the lock. So another writer may steal
303	* the lock immediately after that.
304	*/
305
306	/*
307	* Initialize an rwsem:
308	*/
309	void __init_rwsem(struct rw_semaphore sem, const* char *name,
310	struct lock_class_key *key)
311	{
312	#ifdef CONFIG_DEBUG_LOCK_ALLOC
313	/*
314	* Make sure we are not reinitializing a held semaphore:
315	*/
316	debug_check_no_locks_freed((void )sem, sizeof(sem));
317	lockdep_init_map_wait(&sem->dep_map, name, key, `0`, LD_WAIT_SLEEP);
318	#endif
319	#ifdef CONFIG_DEBUG_RWSEMS
320	sem->magic = sem;
321	#endif
322	atomic_long_set(v: &sem->count, RWSEM_UNLOCKED_VALUE);
323	raw_spin_lock_init(&sem->wait_lock);
324	INIT_LIST_HEAD(list: &sem->wait_list);
325	atomic_long_set(v: &sem->owner, i: `0L`);
326	#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
327	osq_lock_init(lock: &sem->osq);
328	#endif
329	}
330	EXPORT_SYMBOL(__init_rwsem);
331
332	enum rwsem_waiter_type {
333	RWSEM_WAITING_FOR_WRITE,
334	RWSEM_WAITING_FOR_READ
335	};
336
337	struct rwsem_waiter {
338	struct list_head list;
339	struct task_struct *task;
340	enum rwsem_waiter_type type;
341	unsigned long timeout;
342	bool handoff_set;
343	};
344	#define rwsem_first_waiter(sem) \
345	list_first_entry(&sem->wait_list, struct rwsem_waiter, list)
346
347	enum rwsem_wake_type {
348	RWSEM_WAKE_ANY, / Wake whatever's at head of wait list /
349	RWSEM_WAKE_READERS, / Wake readers only /
350	RWSEM_WAKE_READ_OWNED / Waker thread holds the read lock /
351	};
352
353	/*
354	* The typical HZ value is either 250 or 1000. So set the minimum waiting
355	* time to at least 4ms or 1 jiffy (if it is higher than 4ms) in the wait
356	* queue before initiating the handoff protocol.
357	*/
358	#define RWSEM_WAIT_TIMEOUT DIV_ROUND_UP(HZ, 250)
359
360	/*
361	* Magic number to batch-wakeup waiting readers, even when writers are
362	* also present in the queue. This both limits the amount of work the
363	* waking thread must do and also prevents any potential counter overflow,
364	* however unlikely.
365	*/
366	#define MAX_READERS_WAKEUP 0x100
367
368	static inline void
369	rwsem_add_waiter(struct rw_semaphore sem, struct* rwsem_waiter *waiter)
370	{
371	lockdep_assert_held(&sem->wait_lock);
372	list_add_tail(new: &waiter->list, head: &sem->wait_list);
373	/ caller will set RWSEM_FLAG_WAITERS /
374	}
375
376	/*
377	* Remove a waiter from the wait_list and clear flags.
378	*
379	* Both rwsem_mark_wake() and rwsem_try_write_lock() contain a full 'copy' of
380	* this function. Modify with care.
381	*
382	* Return: true if wait_list isn't empty and false otherwise
383	*/
384	static inline bool
385	rwsem_del_waiter(struct rw_semaphore sem, struct* rwsem_waiter *waiter)
386	{
387	lockdep_assert_held(&sem->wait_lock);
388	list_del(entry: &waiter->list);
389	if (likely(!list_empty(&sem->wait_list)))
390	return true;
391
392	atomic_long_andnot(RWSEM_FLAG_HANDOFF \| RWSEM_FLAG_WAITERS, v: &sem->count);
393	return false;
394	}
395
396	/*
397	* handle the lock release when processes blocked on it that can now run
398	* - if we come here from up_xxxx(), then the RWSEM_FLAG_WAITERS bit must
399	* have been set.
400	* - there must be someone on the queue
401	* - the wait_lock must be held by the caller
402	* - tasks are marked for wakeup, the caller must later invoke wake_up_q()
403	* to actually wakeup the blocked task(s) and drop the reference count,
404	* preferably when the wait_lock is released
405	* - woken process blocks are discarded from the list after having task zeroed
406	* - writers are only marked woken if downgrading is false
407	*
408	* Implies rwsem_del_waiter() for all woken readers.
409	*/
410	static void rwsem_mark_wake(struct rw_semaphore *sem,
411	enum rwsem_wake_type wake_type,
412	struct wake_q_head *wake_q)
413	{
414	struct rwsem_waiter waiter, tmp;
415	long oldcount, woken = `0`, adjustment = `0`;
416	struct list_head wlist;
417
418	lockdep_assert_held(&sem->wait_lock);
419
420	/*
421	* Take a peek at the queue head waiter such that we can determine
422	* the wakeup(s) to perform.
423	*/
424	waiter = rwsem_first_waiter(sem);
425
426	if (waiter->type == RWSEM_WAITING_FOR_WRITE) {
427	if (wake_type == RWSEM_WAKE_ANY) {
428	/*
429	* Mark writer at the front of the queue for wakeup.
430	* Until the task is actually later awoken later by
431	* the caller, other writers are able to steal it.
432	* Readers, on the other hand, will block as they
433	* will notice the queued writer.
434	*/
435	wake_q_add(head: wake_q, task: waiter->task);
436	lockevent_inc(rwsem_wake_writer);
437	}
438
439	return;
440	}
441
442	/*
443	* No reader wakeup if there are too many of them already.
444	*/
445	if (unlikely(atomic_long_read(&sem->count) < `0`))
446	return;
447
448	/*
449	* Writers might steal the lock before we grant it to the next reader.
450	* We prefer to do the first reader grant before counting readers
451	* so we can bail out early if a writer stole the lock.
452	*/
453	if (wake_type != RWSEM_WAKE_READ_OWNED) {
454	struct task_struct *owner;
455
456	adjustment = RWSEM_READER_BIAS;
457	oldcount = atomic_long_fetch_add(i: adjustment, v: &sem->count);
458	if (unlikely(oldcount & RWSEM_WRITER_MASK)) {
459	/*
460	* When we've been waiting "too" long (for writers
461	* to give up the lock), request a HANDOFF to
462	* force the issue.
463	*/
464	if (time_after(jiffies, waiter->timeout)) {
465	if (!(oldcount & RWSEM_FLAG_HANDOFF)) {
466	adjustment -= RWSEM_FLAG_HANDOFF;
467	lockevent_inc(rwsem_rlock_handoff);
468	}
469	waiter->handoff_set = true;
470	}
471
472	atomic_long_add(i: -adjustment, v: &sem->count);
473	return;
474	}
475	/*
476	* Set it to reader-owned to give spinners an early
477	* indication that readers now have the lock.
478	* The reader nonspinnable bit seen at slowpath entry of
479	* the reader is copied over.
480	*/
481	owner = waiter->task;
482	__rwsem_set_reader_owned(sem, owner);
483	}
484
485	/*
486	* Grant up to MAX_READERS_WAKEUP read locks to all the readers in the
487	* queue. We know that the woken will be at least 1 as we accounted
488	* for above. Note we increment the 'active part' of the count by the
489	* number of readers before waking any processes up.
490	*
491	* This is an adaptation of the phase-fair R/W locks where at the
492	* reader phase (first waiter is a reader), all readers are eligible
493	* to acquire the lock at the same time irrespective of their order
494	* in the queue. The writers acquire the lock according to their
495	* order in the queue.
496	*
497	* We have to do wakeup in 2 passes to prevent the possibility that
498	* the reader count may be decremented before it is incremented. It
499	* is because the to-be-woken waiter may not have slept yet. So it
500	* may see waiter->task got cleared, finish its critical section and
501	* do an unlock before the reader count increment.
502	*
503	* 1) Collect the read-waiters in a separate list, count them and
504	* fully increment the reader count in rwsem.
505	* 2) For each waiters in the new list, clear waiter->task and
506	* put them into wake_q to be woken up later.
507	*/
508	INIT_LIST_HEAD(list: &wlist);
509	list_for_each_entry_safe(waiter, tmp, &sem->wait_list, list) {
510	if (waiter->type == RWSEM_WAITING_FOR_WRITE)
511	continue;
512
513	woken++;
514	list_move_tail(list: &waiter->list, head: &wlist);
515
516	/*
517	* Limit # of readers that can be woken up per wakeup call.
518	*/
519	if (unlikely(woken >= MAX_READERS_WAKEUP))
520	break;
521	}
522
523	adjustment = woken * RWSEM_READER_BIAS - adjustment;
524	lockevent_cond_inc(rwsem_wake_reader, woken);
525
526	oldcount = atomic_long_read(v: &sem->count);
527	if (list_empty(head: &sem->wait_list)) {
528	/*
529	* Combined with list_move_tail() above, this implies
530	* rwsem_del_waiter().
531	*/
532	adjustment -= RWSEM_FLAG_WAITERS;
533	if (oldcount & RWSEM_FLAG_HANDOFF)
534	adjustment -= RWSEM_FLAG_HANDOFF;
535	} else if (woken) {
536	/*
537	* When we've woken a reader, we no longer need to force
538	* writers to give up the lock and we can clear HANDOFF.
539	*/
540	if (oldcount & RWSEM_FLAG_HANDOFF)
541	adjustment -= RWSEM_FLAG_HANDOFF;
542	}
543
544	if (adjustment)
545	atomic_long_add(i: adjustment, v: &sem->count);
546
547	/ 2nd pass /
548	list_for_each_entry_safe(waiter, tmp, &wlist, list) {
549	struct task_struct *tsk;
550
551	tsk = waiter->task;
552	get_task_struct(t: tsk);
553
554	/*
555	* Ensure calling get_task_struct() before setting the reader
556	* waiter to nil such that rwsem_down_read_slowpath() cannot
557	* race with do_exit() by always holding a reference count
558	* to the task to wakeup.
559	*/
560	smp_store_release(&waiter->task, NULL);
561	/*
562	* Ensure issuing the wakeup (either by us or someone else)
563	* after setting the reader waiter to nil.
564	*/
565	wake_q_add_safe(head: wake_q, task: tsk);
566	}
567	}
568
569	/*
570	* Remove a waiter and try to wake up other waiters in the wait queue
571	* This function is called from the out_nolock path of both the reader and
572	* writer slowpaths with wait_lock held. It releases the wait_lock and
573	* optionally wake up waiters before it returns.
574	*/
575	static inline void
576	rwsem_del_wake_waiter(struct rw_semaphore sem, struct* rwsem_waiter *waiter,
577	struct wake_q_head *wake_q)
578	__releases(&sem->wait_lock)
579	{
580	bool first = rwsem_first_waiter(sem) == waiter;
581
582	wake_q_init(head: wake_q);
583
584	/*
585	* If the wait_list isn't empty and the waiter to be deleted is
586	* the first waiter, we wake up the remaining waiters as they may
587	* be eligible to acquire or spin on the lock.
588	*/
589	if (rwsem_del_waiter(sem, waiter) && first)
590	rwsem_mark_wake(sem, wake_type: RWSEM_WAKE_ANY, wake_q);
591	raw_spin_unlock_irq(&sem->wait_lock);
592	if (!wake_q_empty(head: wake_q))
593	wake_up_q(head: wake_q);
594	}
595
596	/*
597	* This function must be called with the sem->wait_lock held to prevent
598	* race conditions between checking the rwsem wait list and setting the
599	* sem->count accordingly.
600	*
601	* Implies rwsem_del_waiter() on success.
602	*/
603	static inline bool rwsem_try_write_lock(struct rw_semaphore *sem,
604	struct rwsem_waiter *waiter)
605	{
606	struct rwsem_waiter *first = rwsem_first_waiter(sem);
607	long count, new;
608
609	lockdep_assert_held(&sem->wait_lock);
610
611	count = atomic_long_read(v: &sem->count);
612	do {
613	bool has_handoff = !!(count & RWSEM_FLAG_HANDOFF);
614
615	if (has_handoff) {
616	/*
617	* Honor handoff bit and yield only when the first
618	* waiter is the one that set it. Otherwisee, we
619	* still try to acquire the rwsem.
620	*/
621	if (first->handoff_set && (waiter != first))
622	return false;
623	}
624
625	new = count;
626
627	if (count & RWSEM_LOCK_MASK) {
628	/*
629	* A waiter (first or not) can set the handoff bit
630	* if it is an RT task or wait in the wait queue
631	* for too long.
632	*/
633	if (has_handoff \|\| (!rt_or_dl_task(p: waiter->task) &&
634	!time_after(jiffies, waiter->timeout)))
635	return false;
636
637	new \|= RWSEM_FLAG_HANDOFF;
638	} else {
639	new \|= RWSEM_WRITER_LOCKED;
640	new &= ~RWSEM_FLAG_HANDOFF;
641
642	if (list_is_singular(head: &sem->wait_list))
643	new &= ~RWSEM_FLAG_WAITERS;
644	}
645	} while (!atomic_long_try_cmpxchg_acquire(v: &sem->count, old: &count, new));
646
647	/*
648	* We have either acquired the lock with handoff bit cleared or set
649	* the handoff bit. Only the first waiter can have its handoff_set
650	* set here to enable optimistic spinning in slowpath loop.
651	*/
652	if (new & RWSEM_FLAG_HANDOFF) {
653	first->handoff_set = true;
654	lockevent_inc(rwsem_wlock_handoff);
655	return false;
656	}
657
658	/*
659	* Have rwsem_try_write_lock() fully imply rwsem_del_waiter() on
660	* success.
661	*/
662	list_del(entry: &waiter->list);
663	rwsem_set_owner(sem);
664	return true;
665	}
666
667	/*
668	* The rwsem_spin_on_owner() function returns the following 4 values
669	* depending on the lock owner state.
670	* OWNER_NULL : owner is currently NULL
671	* OWNER_WRITER: when owner changes and is a writer
672	* OWNER_READER: when owner changes and the new owner may be a reader.
673	* OWNER_NONSPINNABLE:
674	* when optimistic spinning has to stop because either the
675	* owner stops running, is unknown, or its timeslice has
676	* been used up.
677	*/
678	enum owner_state {
679	OWNER_NULL = `1` << `0`,
680	OWNER_WRITER = `1` << `1`,
681	OWNER_READER = `1` << `2`,
682	OWNER_NONSPINNABLE = `1` << `3`,
683	};
684
685	#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
686	/*
687	* Try to acquire write lock before the writer has been put on wait queue.
688	*/
689	static inline bool rwsem_try_write_lock_unqueued(struct rw_semaphore *sem)
690	{
691	long count = atomic_long_read(v: &sem->count);
692
693	while (!(count & (RWSEM_LOCK_MASK\|RWSEM_FLAG_HANDOFF))) {
694	if (atomic_long_try_cmpxchg_acquire(v: &sem->count, old: &count,
695	new: count \| RWSEM_WRITER_LOCKED)) {
696	rwsem_set_owner(sem);
697	lockevent_inc(rwsem_opt_lock);
698	return true;
699	}
700	}
701	return false;
702	}
703
704	static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem)
705	{
706	struct task_struct *owner;
707	unsigned long flags;
708	bool ret = true;
709
710	if (need_resched()) {
711	lockevent_inc(rwsem_opt_fail);
712	return false;
713	}
714
715	/*
716	* Disable preemption is equal to the RCU read-side crital section,
717	* thus the task_strcut structure won't go away.
718	*/
719	owner = rwsem_owner_flags(sem, pflags: &flags);
720	/*
721	* Don't check the read-owner as the entry may be stale.
722	*/
723	if ((flags & RWSEM_NONSPINNABLE) \|\|
724	(owner && !(flags & RWSEM_READER_OWNED) && !owner_on_cpu(owner)))
725	ret = false;
726
727	lockevent_cond_inc(rwsem_opt_fail, !ret);
728	return ret;
729	}
730
731	static inline enum owner_state
732	rwsem_owner_state(struct task_struct owner, unsigned* long flags)
733	{
734	if (flags & RWSEM_NONSPINNABLE)
735	return OWNER_NONSPINNABLE;
736
737	if (flags & RWSEM_READER_OWNED)
738	return OWNER_READER;
739
740	return owner ? OWNER_WRITER : OWNER_NULL;
741	}
742
743	static noinline enum owner_state
744	rwsem_spin_on_owner(struct rw_semaphore *sem)
745	{
746	struct task_struct new, owner;
747	unsigned long flags, new_flags;
748	enum owner_state state;
749
750	lockdep_assert_preemption_disabled();
751
752	owner = rwsem_owner_flags(sem, pflags: &flags);
753	state = rwsem_owner_state(owner, flags);
754	if (state != OWNER_WRITER)
755	return state;
756
757	for (;;) {
758	/*
759	* When a waiting writer set the handoff flag, it may spin
760	* on the owner as well. Once that writer acquires the lock,
761	* we can spin on it. So we don't need to quit even when the
762	* handoff bit is set.
763	*/
764	new = rwsem_owner_flags(sem, pflags: &new_flags);
765	if ((new != owner) \|\| (new_flags != flags)) {
766	state = rwsem_owner_state(owner: new, flags: new_flags);
767	break;
768	}
769
770	/*
771	* Ensure we emit the owner->on_cpu, dereference _after_
772	* checking sem->owner still matches owner, if that fails,
773	* owner might point to free()d memory, if it still matches,
774	* our spinning context already disabled preemption which is
775	* equal to RCU read-side crital section ensures the memory
776	* stays valid.
777	*/
778	barrier();
779
780	if (need_resched() \|\| !owner_on_cpu(owner)) {
781	state = OWNER_NONSPINNABLE;
782	break;
783	}
784
785	cpu_relax();
786	}
787
788	return state;
789	}
790
791	/*
792	* Calculate reader-owned rwsem spinning threshold for writer
793	*
794	* The more readers own the rwsem, the longer it will take for them to
795	* wind down and free the rwsem. So the empirical formula used to
796	* determine the actual spinning time limit here is:
797	*
798	* Spinning threshold = (10 + nr_readers/2)us
799	*
800	* The limit is capped to a maximum of 25us (30 readers). This is just
801	* a heuristic and is subjected to change in the future.
802	*/
803	static inline u64 rwsem_rspin_threshold(struct rw_semaphore *sem)
804	{
805	long count = atomic_long_read(v: &sem->count);
806	int readers = count >> RWSEM_READER_SHIFT;
807	u64 delta;
808
809	if (readers > `30`)
810	readers = `30`;
811	delta = (`20` + readers) * NSEC_PER_USEC / `2`;
812
813	return sched_clock() + delta;
814	}
815
816	static bool rwsem_optimistic_spin(struct rw_semaphore *sem)
817	{
818	bool taken = false;
819	int prev_owner_state = OWNER_NULL;
820	int loop = `0`;
821	u64 rspin_threshold = `0`;
822
823	/ sem->wait_lock should not be held when doing optimistic spinning /
824	if (!osq_lock(lock: &sem->osq))
825	goto done;
826
827	/*
828	* Optimistically spin on the owner field and attempt to acquire the
829	* lock whenever the owner changes. Spinning will be stopped when:
830	* 1) the owning writer isn't running; or
831	* 2) readers own the lock and spinning time has exceeded limit.
832	*/
833	for (;;) {
834	enum owner_state owner_state;
835
836	owner_state = rwsem_spin_on_owner(sem);
837	if (owner_state == OWNER_NONSPINNABLE)
838	break;
839
840	/*
841	* Try to acquire the lock
842	*/
843	taken = rwsem_try_write_lock_unqueued(sem);
844
845	if (taken)
846	break;
847
848	/*
849	* Time-based reader-owned rwsem optimistic spinning
850	*/
851	if (owner_state == OWNER_READER) {
852	/*
853	* Re-initialize rspin_threshold every time when
854	* the owner state changes from non-reader to reader.
855	* This allows a writer to steal the lock in between
856	* 2 reader phases and have the threshold reset at
857	* the beginning of the 2nd reader phase.
858	*/
859	if (prev_owner_state != OWNER_READER) {
860	if (rwsem_test_oflags(sem, RWSEM_NONSPINNABLE))
861	break;
862	rspin_threshold = rwsem_rspin_threshold(sem);
863	loop = `0`;
864	}
865
866	/*
867	* Check time threshold once every 16 iterations to
868	* avoid calling sched_clock() too frequently so
869	* as to reduce the average latency between the times
870	* when the lock becomes free and when the spinner
871	* is ready to do a trylock.
872	*/
873	else if (!(++loop & `0xf`) && (sched_clock() > rspin_threshold)) {
874	rwsem_set_nonspinnable(sem);
875	lockevent_inc(rwsem_opt_nospin);
876	break;
877	}
878	}
879
880	/*
881	* An RT task cannot do optimistic spinning if it cannot
882	* be sure the lock holder is running or live-lock may
883	* happen if the current task and the lock holder happen
884	* to run in the same CPU. However, aborting optimistic
885	* spinning while a NULL owner is detected may miss some
886	* opportunity where spinning can continue without causing
887	* problem.
888	*
889	* There are 2 possible cases where an RT task may be able
890	* to continue spinning.
891	*
892	* 1) The lock owner is in the process of releasing the
893	* lock, sem->owner is cleared but the lock has not
894	* been released yet.
895	* 2) The lock was free and owner cleared, but another
896	* task just comes in and acquire the lock before
897	* we try to get it. The new owner may be a spinnable
898	* writer.
899	*
900	* To take advantage of two scenarios listed above, the RT
901	* task is made to retry one more time to see if it can
902	* acquire the lock or continue spinning on the new owning
903	* writer. Of course, if the time lag is long enough or the
904	* new owner is not a writer or spinnable, the RT task will
905	* quit spinning.
906	*
907	* If the owner is a writer, the need_resched() check is
908	* done inside rwsem_spin_on_owner(). If the owner is not
909	* a writer, need_resched() check needs to be done here.
910	*/
911	if (owner_state != OWNER_WRITER) {
912	if (need_resched())
913	break;
914	if (rt_or_dl_task(current) &&
915	(prev_owner_state != OWNER_WRITER))
916	break;
917	}
918	prev_owner_state = owner_state;
919
920	/*
921	* The cpu_relax() call is a compiler barrier which forces
922	* everything in this loop to be re-loaded. We don't need
923	* memory barriers as we'll eventually observe the right
924	* values at the cost of a few extra spins.
925	*/
926	cpu_relax();
927	}
928	osq_unlock(lock: &sem->osq);
929	done:
930	lockevent_cond_inc(rwsem_opt_fail, !taken);
931	return taken;
932	}
933
934	/*
935	* Clear the owner's RWSEM_NONSPINNABLE bit if it is set. This should
936	* only be called when the reader count reaches 0.
937	*/
938	static inline void clear_nonspinnable(struct rw_semaphore *sem)
939	{
940	if (unlikely(rwsem_test_oflags(sem, RWSEM_NONSPINNABLE)))
941	atomic_long_andnot(RWSEM_NONSPINNABLE, v: &sem->owner);
942	}
943
944	#else
945	static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem)
946	{
947	return false;
948	}
949
950	static inline bool rwsem_optimistic_spin(struct rw_semaphore *sem)
951	{
952	return false;
953	}
954
955	static inline void clear_nonspinnable(struct rw_semaphore *sem) { }
956
957	static inline enum owner_state
958	rwsem_spin_on_owner(struct rw_semaphore *sem)
959	{
960	return OWNER_NONSPINNABLE;
961	}
962	#endif
963
964	/*
965	* Prepare to wake up waiter(s) in the wait queue by putting them into the
966	* given wake_q if the rwsem lock owner isn't a writer. If rwsem is likely
967	* reader-owned, wake up read lock waiters in queue front or wake up any
968	* front waiter otherwise.
969
970	* This is being called from both reader and writer slow paths.
971	*/
972	static inline void rwsem_cond_wake_waiter(struct rw_semaphore sem, long* count,
973	struct wake_q_head *wake_q)
974	{
975	enum rwsem_wake_type wake_type;
976
977	if (count & RWSEM_WRITER_MASK)
978	return;
979
980	if (count & RWSEM_READER_MASK) {
981	wake_type = RWSEM_WAKE_READERS;
982	} else {
983	wake_type = RWSEM_WAKE_ANY;
984	clear_nonspinnable(sem);
985	}
986	rwsem_mark_wake(sem, wake_type, wake_q);
987	}
988
989	/*
990	* Wait for the read lock to be granted
991	*/
992	static struct rw_semaphore __sched *
993	rwsem_down_read_slowpath(struct rw_semaphore sem, long* count, unsigned int state)
994	{
995	long adjustment = -RWSEM_READER_BIAS;
996	long rcnt = (count >> RWSEM_READER_SHIFT);
997	struct rwsem_waiter waiter;
998	DEFINE_WAKE_Q(wake_q);
999
1000	/*
1001	* To prevent a constant stream of readers from starving a sleeping
1002	* writer, don't attempt optimistic lock stealing if the lock is
1003	* very likely owned by readers.
1004	*/
1005	if ((atomic_long_read(v: &sem->owner) & RWSEM_READER_OWNED) &&
1006	(rcnt > `1`) && !(count & RWSEM_WRITER_LOCKED))
1007	goto queue;
1008
1009	/*
1010	* Reader optimistic lock stealing.
1011	*/
1012	if (!(count & (RWSEM_WRITER_LOCKED \| RWSEM_FLAG_HANDOFF))) {
1013	rwsem_set_reader_owned(sem);
1014	lockevent_inc(rwsem_rlock_steal);
1015
1016	/*
1017	* Wake up other readers in the wait queue if it is
1018	* the first reader.
1019	*/
1020	if ((rcnt == `1`) && (count & RWSEM_FLAG_WAITERS)) {
1021	raw_spin_lock_irq(&sem->wait_lock);
1022	if (!list_empty(head: &sem->wait_list))
1023	rwsem_mark_wake(sem, wake_type: RWSEM_WAKE_READ_OWNED,
1024	wake_q: &wake_q);
1025	raw_spin_unlock_irq(&sem->wait_lock);
1026	wake_up_q(head: &wake_q);
1027	}
1028	return sem;
1029	}
1030
1031	queue:
1032	waiter.task = current;
1033	waiter.type = RWSEM_WAITING_FOR_READ;
1034	waiter.timeout = jiffies + RWSEM_WAIT_TIMEOUT;
1035	waiter.handoff_set = false;
1036
1037	raw_spin_lock_irq(&sem->wait_lock);
1038	if (list_empty(head: &sem->wait_list)) {
1039	/*
1040	* In case the wait queue is empty and the lock isn't owned
1041	* by a writer, this reader can exit the slowpath and return
1042	* immediately as its RWSEM_READER_BIAS has already been set
1043	* in the count.
1044	*/
1045	if (!(atomic_long_read(v: &sem->count) & RWSEM_WRITER_MASK)) {
1046	/ Provide lock ACQUIRE /
1047	smp_acquire__after_ctrl_dep();
1048	raw_spin_unlock_irq(&sem->wait_lock);
1049	rwsem_set_reader_owned(sem);
1050	lockevent_inc(rwsem_rlock_fast);
1051	return sem;
1052	}
1053	adjustment += RWSEM_FLAG_WAITERS;
1054	}
1055	rwsem_add_waiter(sem, waiter: &waiter);
1056
1057	/ we're now waiting on the lock, but no longer actively locking /
1058	count = atomic_long_add_return(i: adjustment, v: &sem->count);
1059
1060	rwsem_cond_wake_waiter(sem, count, wake_q: &wake_q);
1061	raw_spin_unlock_irq(&sem->wait_lock);
1062
1063	if (!wake_q_empty(head: &wake_q))
1064	wake_up_q(head: &wake_q);
1065
1066	trace_contention_begin(lock: sem, LCB_F_READ);
1067	set_current_state(state);
1068
1069	if (state == TASK_UNINTERRUPTIBLE)
1070	hung_task_set_blocker(lock: sem, BLOCKER_TYPE_RWSEM_READER);
1071
1072	/ wait to be given the lock /
1073	for (;;) {
1074	if (!smp_load_acquire(&waiter.task)) {
1075	/ Matches rwsem_mark_wake()'s smp_store_release(). /
1076	break;
1077	}
1078	if (signal_pending_state(state, current)) {
1079	raw_spin_lock_irq(&sem->wait_lock);
1080	if (waiter.task)
1081	goto out_nolock;
1082	raw_spin_unlock_irq(&sem->wait_lock);
1083	/ Ordered by sem->wait_lock against rwsem_mark_wake(). /
1084	break;
1085	}
1086	schedule_preempt_disabled();
1087	lockevent_inc(rwsem_sleep_reader);
1088	set_current_state(state);
1089	}
1090
1091	if (state == TASK_UNINTERRUPTIBLE)
1092	hung_task_clear_blocker();
1093
1094	__set_current_state(TASK_RUNNING);
1095	lockevent_inc(rwsem_rlock);
1096	trace_contention_end(lock: sem, ret: `0`);
1097	return sem;
1098
1099	out_nolock:
1100	rwsem_del_wake_waiter(sem, waiter: &waiter, wake_q: &wake_q);
1101	__set_current_state(TASK_RUNNING);
1102	lockevent_inc(rwsem_rlock_fail);
1103	trace_contention_end(lock: sem, ret: -EINTR);
1104	return ERR_PTR(error: -EINTR);
1105	}
1106
1107	/*
1108	* Wait until we successfully acquire the write lock
1109	*/
1110	static struct rw_semaphore __sched *
1111	rwsem_down_write_slowpath(struct rw_semaphore sem, int* state)
1112	{
1113	struct rwsem_waiter waiter;
1114	DEFINE_WAKE_Q(wake_q);
1115
1116	/ do optimistic spinning and steal lock if possible /
1117	if (rwsem_can_spin_on_owner(sem) && rwsem_optimistic_spin(sem)) {
1118	/ rwsem_optimistic_spin() implies ACQUIRE on success /
1119	return sem;
1120	}
1121
1122	/*
1123	* Optimistic spinning failed, proceed to the slowpath
1124	* and block until we can acquire the sem.
1125	*/
1126	waiter.task = current;
1127	waiter.type = RWSEM_WAITING_FOR_WRITE;
1128	waiter.timeout = jiffies + RWSEM_WAIT_TIMEOUT;
1129	waiter.handoff_set = false;
1130
1131	raw_spin_lock_irq(&sem->wait_lock);
1132	rwsem_add_waiter(sem, waiter: &waiter);
1133
1134	/ we're now waiting on the lock /
1135	if (rwsem_first_waiter(sem) != &waiter) {
1136	rwsem_cond_wake_waiter(sem, count: atomic_long_read(v: &sem->count),
1137	wake_q: &wake_q);
1138	if (!wake_q_empty(head: &wake_q)) {
1139	/*
1140	* We want to minimize wait_lock hold time especially
1141	* when a large number of readers are to be woken up.
1142	*/
1143	raw_spin_unlock_irq(&sem->wait_lock);
1144	wake_up_q(head: &wake_q);
1145	raw_spin_lock_irq(&sem->wait_lock);
1146	}
1147	} else {
1148	atomic_long_or(RWSEM_FLAG_WAITERS, v: &sem->count);
1149	}
1150
1151	/ wait until we successfully acquire the lock /
1152	set_current_state(state);
1153	trace_contention_begin(lock: sem, LCB_F_WRITE);
1154
1155	if (state == TASK_UNINTERRUPTIBLE)
1156	hung_task_set_blocker(lock: sem, BLOCKER_TYPE_RWSEM_WRITER);
1157
1158	for (;;) {
1159	if (rwsem_try_write_lock(sem, waiter: &waiter)) {
1160	/ rwsem_try_write_lock() implies ACQUIRE on success /
1161	break;
1162	}
1163
1164	raw_spin_unlock_irq(&sem->wait_lock);
1165
1166	if (signal_pending_state(state, current))
1167	goto out_nolock;
1168
1169	/*
1170	* After setting the handoff bit and failing to acquire
1171	* the lock, attempt to spin on owner to accelerate lock
1172	* transfer. If the previous owner is a on-cpu writer and it
1173	* has just released the lock, OWNER_NULL will be returned.
1174	* In this case, we attempt to acquire the lock again
1175	* without sleeping.
1176	*/
1177	if (waiter.handoff_set) {
1178	enum owner_state owner_state;
1179
1180	owner_state = rwsem_spin_on_owner(sem);
1181	if (owner_state == OWNER_NULL)
1182	goto trylock_again;
1183	}
1184
1185	schedule_preempt_disabled();
1186	lockevent_inc(rwsem_sleep_writer);
1187	set_current_state(state);
1188	trylock_again:
1189	raw_spin_lock_irq(&sem->wait_lock);
1190	}
1191
1192	if (state == TASK_UNINTERRUPTIBLE)
1193	hung_task_clear_blocker();
1194
1195	__set_current_state(TASK_RUNNING);
1196	raw_spin_unlock_irq(&sem->wait_lock);
1197	lockevent_inc(rwsem_wlock);
1198	trace_contention_end(lock: sem, ret: `0`);
1199	return sem;
1200
1201	out_nolock:
1202	__set_current_state(TASK_RUNNING);
1203	raw_spin_lock_irq(&sem->wait_lock);
1204	rwsem_del_wake_waiter(sem, waiter: &waiter, wake_q: &wake_q);
1205	lockevent_inc(rwsem_wlock_fail);
1206	trace_contention_end(lock: sem, ret: -EINTR);
1207	return ERR_PTR(error: -EINTR);
1208	}
1209
1210	/*
1211	* handle waking up a waiter on the semaphore
1212	* - up_read/up_write has decremented the active part of count if we come here
1213	*/
1214	static struct rw_semaphore rwsem_wake(struct* rw_semaphore *sem)
1215	{
1216	unsigned long flags;
1217	DEFINE_WAKE_Q(wake_q);
1218
1219	raw_spin_lock_irqsave(&sem->wait_lock, flags);
1220
1221	if (!list_empty(head: &sem->wait_list))
1222	rwsem_mark_wake(sem, wake_type: RWSEM_WAKE_ANY, wake_q: &wake_q);
1223
1224	raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
1225	wake_up_q(head: &wake_q);
1226
1227	return sem;
1228	}
1229
1230	/*
1231	* downgrade a write lock into a read lock
1232	* - caller incremented waiting part of count and discovered it still negative
1233	* - just wake up any readers at the front of the queue
1234	*/
1235	static struct rw_semaphore rwsem_downgrade_wake(struct* rw_semaphore *sem)
1236	{
1237	unsigned long flags;
1238	DEFINE_WAKE_Q(wake_q);
1239
1240	raw_spin_lock_irqsave(&sem->wait_lock, flags);
1241
1242	if (!list_empty(head: &sem->wait_list))
1243	rwsem_mark_wake(sem, wake_type: RWSEM_WAKE_READ_OWNED, wake_q: &wake_q);
1244
1245	raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
1246	wake_up_q(head: &wake_q);
1247
1248	return sem;
1249	}
1250
1251	/*
1252	* lock for reading
1253	*/
1254	static __always_inline int __down_read_common(struct rw_semaphore sem, int* state)
1255	{
1256	int ret = `0`;
1257	long count;
1258
1259	preempt_disable();
1260	if (!rwsem_read_trylock(sem, cntp: &count)) {
1261	if (IS_ERR(ptr: rwsem_down_read_slowpath(sem, count, state))) {
1262	ret = -EINTR;
1263	goto out;
1264	}
1265	DEBUG_RWSEMS_WARN_ON(!is_rwsem_reader_owned(sem), sem);
1266	}
1267	out:
1268	preempt_enable();
1269	return ret;
1270	}
1271
1272	static __always_inline void __down_read(struct rw_semaphore *sem)
1273	{
1274	__down_read_common(sem, TASK_UNINTERRUPTIBLE);
1275	}
1276
1277	static __always_inline int __down_read_interruptible(struct rw_semaphore *sem)
1278	{
1279	return __down_read_common(sem, TASK_INTERRUPTIBLE);
1280	}
1281
1282	static __always_inline int __down_read_killable(struct rw_semaphore *sem)
1283	{
1284	return __down_read_common(sem, TASK_KILLABLE);
1285	}
1286
1287	static inline int __down_read_trylock(struct rw_semaphore *sem)
1288	{
1289	int ret = `0`;
1290	long tmp;
1291
1292	DEBUG_RWSEMS_WARN_ON(sem->magic != sem, sem);
1293
1294	preempt_disable();
1295	tmp = atomic_long_read(v: &sem->count);
1296	while (!(tmp & RWSEM_READ_FAILED_MASK)) {
1297	if (atomic_long_try_cmpxchg_acquire(v: &sem->count, old: &tmp,
1298	new: tmp + RWSEM_READER_BIAS)) {
1299	rwsem_set_reader_owned(sem);
1300	ret = `1`;
1301	break;
1302	}
1303	}
1304	preempt_enable();
1305	return ret;
1306	}
1307
1308	/*
1309	* lock for writing
1310	*/
1311	static __always_inline int __down_write_common(struct rw_semaphore sem, int* state)
1312	{
1313	int ret = `0`;
1314
1315	preempt_disable();
1316	if (unlikely(!rwsem_write_trylock(sem))) {
1317	if (IS_ERR(ptr: rwsem_down_write_slowpath(sem, state)))
1318	ret = -EINTR;
1319	}
1320	preempt_enable();
1321	return ret;
1322	}
1323
1324	static __always_inline void __down_write(struct rw_semaphore *sem)
1325	{
1326	__down_write_common(sem, TASK_UNINTERRUPTIBLE);
1327	}
1328
1329	static __always_inline int __down_write_killable(struct rw_semaphore *sem)
1330	{
1331	return __down_write_common(sem, TASK_KILLABLE);
1332	}
1333
1334	static inline int __down_write_trylock(struct rw_semaphore *sem)
1335	{
1336	int ret;
1337
1338	preempt_disable();
1339	DEBUG_RWSEMS_WARN_ON(sem->magic != sem, sem);
1340	ret = rwsem_write_trylock(sem);
1341	preempt_enable();
1342
1343	return ret;
1344	}
1345
1346	/*
1347	* unlock after reading
1348	*/
1349	static inline void __up_read(struct rw_semaphore *sem)
1350	{
1351	long tmp;
1352
1353	DEBUG_RWSEMS_WARN_ON(sem->magic != sem, sem);
1354	DEBUG_RWSEMS_WARN_ON(!is_rwsem_reader_owned(sem), sem);
1355
1356	preempt_disable();
1357	rwsem_clear_reader_owned(sem);
1358	tmp = atomic_long_add_return_release(i: -RWSEM_READER_BIAS, v: &sem->count);
1359	DEBUG_RWSEMS_WARN_ON(tmp < `0`, sem);
1360	if (unlikely((tmp & (RWSEM_LOCK_MASK\|RWSEM_FLAG_WAITERS)) ==
1361	RWSEM_FLAG_WAITERS)) {
1362	clear_nonspinnable(sem);
1363	rwsem_wake(sem);
1364	}
1365	preempt_enable();
1366	}
1367
1368	/*
1369	* unlock after writing
1370	*/
1371	static inline void __up_write(struct rw_semaphore *sem)
1372	{
1373	long tmp;
1374
1375	DEBUG_RWSEMS_WARN_ON(sem->magic != sem, sem);
1376	/*
1377	* sem->owner may differ from current if the ownership is transferred
1378	* to an anonymous writer by setting the RWSEM_NONSPINNABLE bits.
1379	*/
1380	DEBUG_RWSEMS_WARN_ON((rwsem_owner(sem) != current) &&
1381	!rwsem_test_oflags(sem, RWSEM_NONSPINNABLE), sem);
1382
1383	preempt_disable();
1384	rwsem_clear_owner(sem);
1385	tmp = atomic_long_fetch_add_release(i: -RWSEM_WRITER_LOCKED, v: &sem->count);
1386	if (unlikely(tmp & RWSEM_FLAG_WAITERS))
1387	rwsem_wake(sem);
1388	preempt_enable();
1389	}
1390
1391	/*
1392	* downgrade write lock to read lock
1393	*/
1394	static inline void __downgrade_write(struct rw_semaphore *sem)
1395	{
1396	long tmp;
1397
1398	/*
1399	* When downgrading from exclusive to shared ownership,
1400	* anything inside the write-locked region cannot leak
1401	* into the read side. In contrast, anything in the
1402	* read-locked region is ok to be re-ordered into the
1403	* write side. As such, rely on RELEASE semantics.
1404	*/
1405	DEBUG_RWSEMS_WARN_ON(rwsem_owner(sem) != current, sem);
1406	preempt_disable();
1407	tmp = atomic_long_fetch_add_release(
1408	i: -RWSEM_WRITER_LOCKED+RWSEM_READER_BIAS, v: &sem->count);
1409	rwsem_set_reader_owned(sem);
1410	if (tmp & RWSEM_FLAG_WAITERS)
1411	rwsem_downgrade_wake(sem);
1412	preempt_enable();
1413	}
1414
1415	#else /* !CONFIG_PREEMPT_RT */
1416
1417	#define RT_MUTEX_BUILD_MUTEX
1418	#include "rtmutex.c"
1419
1420	#define rwbase_set_and_save_current_state(state) \
1421	set_current_state(state)
1422
1423	#define rwbase_restore_current_state() \
1424	__set_current_state(TASK_RUNNING)
1425
1426	#define rwbase_rtmutex_lock_state(rtm, state) \
1427	__rt_mutex_lock(rtm, state)
1428
1429	#define rwbase_rtmutex_slowlock_locked(rtm, state, wq) \
1430	__rt_mutex_slowlock_locked(rtm, NULL, state, wq)
1431
1432	#define rwbase_rtmutex_unlock(rtm) \
1433	__rt_mutex_unlock(rtm)
1434
1435	#define rwbase_rtmutex_trylock(rtm) \
1436	__rt_mutex_trylock(rtm)
1437
1438	#define rwbase_signal_pending_state(state, current) \
1439	signal_pending_state(state, current)
1440
1441	#define rwbase_pre_schedule() \
1442	rt_mutex_pre_schedule()
1443
1444	#define rwbase_schedule() \
1445	rt_mutex_schedule()
1446
1447	#define rwbase_post_schedule() \
1448	rt_mutex_post_schedule()
1449
1450	#include "rwbase_rt.c"
1451
1452	void __init_rwsem(struct rw_semaphore sem, const* char *name,
1453	struct lock_class_key *key)
1454	{
1455	init_rwbase_rt(&(sem)->rwbase);
1456
1457	#ifdef CONFIG_DEBUG_LOCK_ALLOC
1458	debug_check_no_locks_freed((void )sem, sizeof(sem));
1459	lockdep_init_map_wait(&sem->dep_map, name, key, `0`, LD_WAIT_SLEEP);
1460	#endif
1461	}
1462	EXPORT_SYMBOL(__init_rwsem);
1463
1464	static inline void __down_read(struct rw_semaphore *sem)
1465	{
1466	rwbase_read_lock(&sem->rwbase, TASK_UNINTERRUPTIBLE);
1467	}
1468
1469	static inline int __down_read_interruptible(struct rw_semaphore *sem)
1470	{
1471	return rwbase_read_lock(&sem->rwbase, TASK_INTERRUPTIBLE);
1472	}
1473
1474	static inline int __down_read_killable(struct rw_semaphore *sem)
1475	{
1476	return rwbase_read_lock(&sem->rwbase, TASK_KILLABLE);
1477	}
1478
1479	static inline int __down_read_trylock(struct rw_semaphore *sem)
1480	{
1481	return rwbase_read_trylock(&sem->rwbase);
1482	}
1483
1484	static inline void __up_read(struct rw_semaphore *sem)
1485	{
1486	rwbase_read_unlock(&sem->rwbase, TASK_NORMAL);
1487	}
1488
1489	static inline void __sched __down_write(struct rw_semaphore *sem)
1490	{
1491	rwbase_write_lock(&sem->rwbase, TASK_UNINTERRUPTIBLE);
1492	}
1493
1494	static inline int __sched __down_write_killable(struct rw_semaphore *sem)
1495	{
1496	return rwbase_write_lock(&sem->rwbase, TASK_KILLABLE);
1497	}
1498
1499	static inline int __down_write_trylock(struct rw_semaphore *sem)
1500	{
1501	return rwbase_write_trylock(&sem->rwbase);
1502	}
1503
1504	static inline void __up_write(struct rw_semaphore *sem)
1505	{
1506	rwbase_write_unlock(&sem->rwbase);
1507	}
1508
1509	static inline void __downgrade_write(struct rw_semaphore *sem)
1510	{
1511	rwbase_write_downgrade(&sem->rwbase);
1512	}
1513
1514	/ Debug stubs for the common API /
1515	#define DEBUG_RWSEMS_WARN_ON(c, sem)
1516
1517	static inline void __rwsem_set_reader_owned(struct rw_semaphore *sem,
1518	struct task_struct *owner)
1519	{
1520	}
1521
1522	static inline bool is_rwsem_reader_owned(struct rw_semaphore *sem)
1523	{
1524	int count = atomic_read(&sem->rwbase.readers);
1525
1526	return count < `0` && count != READER_BIAS;
1527	}
1528
1529	#endif /* CONFIG_PREEMPT_RT */
1530
1531	/*
1532	* lock for reading
1533	*/
1534	void __sched down_read(struct rw_semaphore *sem)
1535	{
1536	might_sleep();
1537	rwsem_acquire_read(&sem->dep_map, `0`, `0`, _RET_IP_);
1538
1539	LOCK_CONTENDED(sem, __down_read_trylock, __down_read);
1540	}
1541	EXPORT_SYMBOL(down_read);
1542
1543	int __sched down_read_interruptible(struct rw_semaphore *sem)
1544	{
1545	might_sleep();
1546	rwsem_acquire_read(&sem->dep_map, `0`, `0`, _RET_IP_);
1547
1548	if (LOCK_CONTENDED_RETURN(sem, __down_read_trylock, __down_read_interruptible)) {
1549	rwsem_release(&sem->dep_map, _RET_IP_);
1550	return -EINTR;
1551	}
1552
1553	return `0`;
1554	}
1555	EXPORT_SYMBOL(down_read_interruptible);
1556
1557	int __sched down_read_killable(struct rw_semaphore *sem)
1558	{
1559	might_sleep();
1560	rwsem_acquire_read(&sem->dep_map, `0`, `0`, _RET_IP_);
1561
1562	if (LOCK_CONTENDED_RETURN(sem, __down_read_trylock, __down_read_killable)) {
1563	rwsem_release(&sem->dep_map, _RET_IP_);
1564	return -EINTR;
1565	}
1566
1567	return `0`;
1568	}
1569	EXPORT_SYMBOL(down_read_killable);
1570
1571	/*
1572	* trylock for reading -- returns 1 if successful, 0 if contention
1573	*/
1574	int down_read_trylock(struct rw_semaphore *sem)
1575	{
1576	int ret = __down_read_trylock(sem);
1577
1578	if (ret == `1`)
1579	rwsem_acquire_read(&sem->dep_map, `0`, `1`, _RET_IP_);
1580	return ret;
1581	}
1582	EXPORT_SYMBOL(down_read_trylock);
1583
1584	/*
1585	* lock for writing
1586	*/
1587	void __sched down_write(struct rw_semaphore *sem)
1588	{
1589	might_sleep();
1590	rwsem_acquire(&sem->dep_map, `0`, `0`, _RET_IP_);
1591	LOCK_CONTENDED(sem, __down_write_trylock, __down_write);
1592	}
1593	EXPORT_SYMBOL(down_write);
1594
1595	/*
1596	* lock for writing
1597	*/
1598	int __sched down_write_killable(struct rw_semaphore *sem)
1599	{
1600	might_sleep();
1601	rwsem_acquire(&sem->dep_map, `0`, `0`, _RET_IP_);
1602
1603	if (LOCK_CONTENDED_RETURN(sem, __down_write_trylock,
1604	__down_write_killable)) {
1605	rwsem_release(&sem->dep_map, _RET_IP_);
1606	return -EINTR;
1607	}
1608
1609	return `0`;
1610	}
1611	EXPORT_SYMBOL(down_write_killable);
1612
1613	/*
1614	* trylock for writing -- returns 1 if successful, 0 if contention
1615	*/
1616	int down_write_trylock(struct rw_semaphore *sem)
1617	{
1618	int ret = __down_write_trylock(sem);
1619
1620	if (ret == `1`)
1621	rwsem_acquire(&sem->dep_map, `0`, `1`, _RET_IP_);
1622
1623	return ret;
1624	}
1625	EXPORT_SYMBOL(down_write_trylock);
1626
1627	/*
1628	* release a read lock
1629	*/
1630	void up_read(struct rw_semaphore *sem)
1631	{
1632	rwsem_release(&sem->dep_map, _RET_IP_);
1633	__up_read(sem);
1634	}
1635	EXPORT_SYMBOL(up_read);
1636
1637	/*
1638	* release a write lock
1639	*/
1640	void up_write(struct rw_semaphore *sem)
1641	{
1642	rwsem_release(&sem->dep_map, _RET_IP_);
1643	__up_write(sem);
1644	}
1645	EXPORT_SYMBOL(up_write);
1646
1647	/*
1648	* downgrade write lock to read lock
1649	*/
1650	void downgrade_write(struct rw_semaphore *sem)
1651	{
1652	lock_downgrade(&sem->dep_map, _RET_IP_);
1653	__downgrade_write(sem);
1654	}
1655	EXPORT_SYMBOL(downgrade_write);
1656
1657	#ifdef CONFIG_DEBUG_LOCK_ALLOC
1658
1659	void down_read_nested(struct rw_semaphore sem, int* subclass)
1660	{
1661	might_sleep();
1662	rwsem_acquire_read(&sem->dep_map, subclass, `0`, _RET_IP_);
1663	LOCK_CONTENDED(sem, __down_read_trylock, __down_read);
1664	}
1665	EXPORT_SYMBOL(down_read_nested);
1666
1667	int down_read_killable_nested(struct rw_semaphore sem, int* subclass)
1668	{
1669	might_sleep();
1670	rwsem_acquire_read(&sem->dep_map, subclass, `0`, _RET_IP_);
1671
1672	if (LOCK_CONTENDED_RETURN(sem, __down_read_trylock, __down_read_killable)) {
1673	rwsem_release(&sem->dep_map, _RET_IP_);
1674	return -EINTR;
1675	}
1676
1677	return `0`;
1678	}
1679	EXPORT_SYMBOL(down_read_killable_nested);
1680
1681	void _down_write_nest_lock(struct rw_semaphore sem, struct* lockdep_map *nest)
1682	{
1683	might_sleep();
1684	rwsem_acquire_nest(&sem->dep_map, `0`, `0`, nest, _RET_IP_);
1685	LOCK_CONTENDED(sem, __down_write_trylock, __down_write);
1686	}
1687	EXPORT_SYMBOL(_down_write_nest_lock);
1688
1689	void down_read_non_owner(struct rw_semaphore *sem)
1690	{
1691	might_sleep();
1692	__down_read(sem);
1693	/*
1694	* The owner value for a reader-owned lock is mostly for debugging
1695	* purpose only and is not critical to the correct functioning of
1696	* rwsem. So it is perfectly fine to set it in a preempt-enabled
1697	* context here.
1698	*/
1699	__rwsem_set_reader_owned(sem, NULL);
1700	}
1701	EXPORT_SYMBOL(down_read_non_owner);
1702
1703	void down_write_nested(struct rw_semaphore sem, int* subclass)
1704	{
1705	might_sleep();
1706	rwsem_acquire(&sem->dep_map, subclass, `0`, _RET_IP_);
1707	LOCK_CONTENDED(sem, __down_write_trylock, __down_write);
1708	}
1709	EXPORT_SYMBOL(down_write_nested);
1710
1711	int __sched down_write_killable_nested(struct rw_semaphore sem, int* subclass)
1712	{
1713	might_sleep();
1714	rwsem_acquire(&sem->dep_map, subclass, `0`, _RET_IP_);
1715
1716	if (LOCK_CONTENDED_RETURN(sem, __down_write_trylock,
1717	__down_write_killable)) {
1718	rwsem_release(&sem->dep_map, _RET_IP_);
1719	return -EINTR;
1720	}
1721
1722	return `0`;
1723	}
1724	EXPORT_SYMBOL(down_write_killable_nested);
1725
1726	void up_read_non_owner(struct rw_semaphore *sem)
1727	{
1728	DEBUG_RWSEMS_WARN_ON(!is_rwsem_reader_owned(sem), sem);
1729	__up_read(sem);
1730	}
1731	EXPORT_SYMBOL(up_read_non_owner);
1732
1733	#endif
1734

Browse the source code of Linux/kernel/locking/rwsem.c