1/* SPDX-License-Identifier: GPL-2.0-or-later */
2/*
3 * Queued spinlock defines
4 *
5 * This file contains macro definitions and functions shared between different
6 * qspinlock slow path implementations.
7 */
8#ifndef __LINUX_QSPINLOCK_H
9#define __LINUX_QSPINLOCK_H
10
11#include <asm-generic/percpu.h>
12#include <linux/percpu-defs.h>
13#include <asm-generic/qspinlock.h>
14#include <asm-generic/mcs_spinlock.h>
15
16#define _Q_MAX_NODES 4
17
18/*
19 * The pending bit spinning loop count.
20 * This heuristic is used to limit the number of lockword accesses
21 * made by atomic_cond_read_relaxed when waiting for the lock to
22 * transition out of the "== _Q_PENDING_VAL" state. We don't spin
23 * indefinitely because there's no guarantee that we'll make forward
24 * progress.
25 */
26#ifndef _Q_PENDING_LOOPS
27#define _Q_PENDING_LOOPS 1
28#endif
29
30/*
31 * On 64-bit architectures, the mcs_spinlock structure will be 16 bytes in
32 * size and four of them will fit nicely in one 64-byte cacheline. For
33 * pvqspinlock, however, we need more space for extra data. To accommodate
34 * that, we insert two more long words to pad it up to 32 bytes. IOW, only
35 * two of them can fit in a cacheline in this case. That is OK as it is rare
36 * to have more than 2 levels of slowpath nesting in actual use. We don't
37 * want to penalize pvqspinlocks to optimize for a rare case in native
38 * qspinlocks.
39 */
40struct qnode {
41 struct mcs_spinlock mcs;
42#ifdef CONFIG_PARAVIRT_SPINLOCKS
43 long reserved[2];
44#endif
45};
46
47/*
48 * We must be able to distinguish between no-tail and the tail at 0:0,
49 * therefore increment the cpu number by one.
50 */
51
52static inline __pure u32 encode_tail(int cpu, int idx)
53{
54 u32 tail;
55
56 tail = (cpu + 1) << _Q_TAIL_CPU_OFFSET;
57 tail |= idx << _Q_TAIL_IDX_OFFSET; /* assume < 4 */
58
59 return tail;
60}
61
62static inline __pure struct mcs_spinlock *decode_tail(u32 tail,
63 struct qnode __percpu *qnodes)
64{
65 int cpu = (tail >> _Q_TAIL_CPU_OFFSET) - 1;
66 int idx = (tail & _Q_TAIL_IDX_MASK) >> _Q_TAIL_IDX_OFFSET;
67
68 return per_cpu_ptr(&qnodes[idx].mcs, cpu);
69}
70
71static inline __pure
72struct mcs_spinlock *grab_mcs_node(struct mcs_spinlock *base, int idx)
73{
74 return &((struct qnode *)base + idx)->mcs;
75}
76
77#define _Q_LOCKED_PENDING_MASK (_Q_LOCKED_MASK | _Q_PENDING_MASK)
78
79#if _Q_PENDING_BITS == 8
80/**
81 * clear_pending - clear the pending bit.
82 * @lock: Pointer to queued spinlock structure
83 *
84 * *,1,* -> *,0,*
85 */
86static __always_inline void clear_pending(struct qspinlock *lock)
87{
88 WRITE_ONCE(lock->pending, 0);
89}
90
91/**
92 * clear_pending_set_locked - take ownership and clear the pending bit.
93 * @lock: Pointer to queued spinlock structure
94 *
95 * *,1,0 -> *,0,1
96 *
97 * Lock stealing is not allowed if this function is used.
98 */
99static __always_inline void clear_pending_set_locked(struct qspinlock *lock)
100{
101 WRITE_ONCE(lock->locked_pending, _Q_LOCKED_VAL);
102}
103
104/*
105 * xchg_tail - Put in the new queue tail code word & retrieve previous one
106 * @lock : Pointer to queued spinlock structure
107 * @tail : The new queue tail code word
108 * Return: The previous queue tail code word
109 *
110 * xchg(lock, tail), which heads an address dependency
111 *
112 * p,*,* -> n,*,* ; prev = xchg(lock, node)
113 */
114static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail)
115{
116 /*
117 * We can use relaxed semantics since the caller ensures that the
118 * MCS node is properly initialized before updating the tail.
119 */
120 return (u32)xchg_relaxed(&lock->tail,
121 tail >> _Q_TAIL_OFFSET) << _Q_TAIL_OFFSET;
122}
123
124#else /* _Q_PENDING_BITS == 8 */
125
126/**
127 * clear_pending - clear the pending bit.
128 * @lock: Pointer to queued spinlock structure
129 *
130 * *,1,* -> *,0,*
131 */
132static __always_inline void clear_pending(struct qspinlock *lock)
133{
134 atomic_andnot(_Q_PENDING_VAL, &lock->val);
135}
136
137/**
138 * clear_pending_set_locked - take ownership and clear the pending bit.
139 * @lock: Pointer to queued spinlock structure
140 *
141 * *,1,0 -> *,0,1
142 */
143static __always_inline void clear_pending_set_locked(struct qspinlock *lock)
144{
145 atomic_add(-_Q_PENDING_VAL + _Q_LOCKED_VAL, &lock->val);
146}
147
148/**
149 * xchg_tail - Put in the new queue tail code word & retrieve previous one
150 * @lock : Pointer to queued spinlock structure
151 * @tail : The new queue tail code word
152 * Return: The previous queue tail code word
153 *
154 * xchg(lock, tail)
155 *
156 * p,*,* -> n,*,* ; prev = xchg(lock, node)
157 */
158static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail)
159{
160 u32 old, new;
161
162 old = atomic_read(&lock->val);
163 do {
164 new = (old & _Q_LOCKED_PENDING_MASK) | tail;
165 /*
166 * We can use relaxed semantics since the caller ensures that
167 * the MCS node is properly initialized before updating the
168 * tail.
169 */
170 } while (!atomic_try_cmpxchg_relaxed(&lock->val, &old, new));
171
172 return old;
173}
174#endif /* _Q_PENDING_BITS == 8 */
175
176/**
177 * queued_fetch_set_pending_acquire - fetch the whole lock value and set pending
178 * @lock : Pointer to queued spinlock structure
179 * Return: The previous lock value
180 *
181 * *,*,* -> *,1,*
182 */
183#ifndef queued_fetch_set_pending_acquire
184static __always_inline u32 queued_fetch_set_pending_acquire(struct qspinlock *lock)
185{
186 return atomic_fetch_or_acquire(_Q_PENDING_VAL, &lock->val);
187}
188#endif
189
190/**
191 * set_locked - Set the lock bit and own the lock
192 * @lock: Pointer to queued spinlock structure
193 *
194 * *,*,0 -> *,0,1
195 */
196static __always_inline void set_locked(struct qspinlock *lock)
197{
198 WRITE_ONCE(lock->locked, _Q_LOCKED_VAL);
199}
200
201#endif /* __LINUX_QSPINLOCK_H */
202