| 1 | // SPDX-License-Identifier: GPL-2.0 |
| 2 | #include <linux/irq_work.h> |
| 3 | #include <linux/spinlock.h> |
| 4 | #include <linux/task_work.h> |
| 5 | #include <linux/resume_user_mode.h> |
| 6 | |
| 7 | static struct callback_head work_exited; /* all we need is ->next == NULL */ |
| 8 | |
| 9 | #ifdef CONFIG_IRQ_WORK |
| 10 | static void task_work_set_notify_irq(struct irq_work *entry) |
| 11 | { |
| 12 | test_and_set_tsk_thread_flag(current, TIF_NOTIFY_RESUME); |
| 13 | } |
| 14 | static DEFINE_PER_CPU(struct irq_work, irq_work_NMI_resume) = |
| 15 | IRQ_WORK_INIT_HARD(task_work_set_notify_irq); |
| 16 | #endif |
| 17 | |
| 18 | /** |
| 19 | * task_work_add - ask the @task to execute @work->func() |
| 20 | * @task: the task which should run the callback |
| 21 | * @work: the callback to run |
| 22 | * @notify: how to notify the targeted task |
| 23 | * |
| 24 | * Queue @work for task_work_run() below and notify the @task if @notify |
| 25 | * is @TWA_RESUME, @TWA_SIGNAL, @TWA_SIGNAL_NO_IPI or @TWA_NMI_CURRENT. |
| 26 | * |
| 27 | * @TWA_SIGNAL works like signals, in that the it will interrupt the targeted |
| 28 | * task and run the task_work, regardless of whether the task is currently |
| 29 | * running in the kernel or userspace. |
| 30 | * @TWA_SIGNAL_NO_IPI works like @TWA_SIGNAL, except it doesn't send a |
| 31 | * reschedule IPI to force the targeted task to reschedule and run task_work. |
| 32 | * This can be advantageous if there's no strict requirement that the |
| 33 | * task_work be run as soon as possible, just whenever the task enters the |
| 34 | * kernel anyway. |
| 35 | * @TWA_RESUME work is run only when the task exits the kernel and returns to |
| 36 | * user mode, or before entering guest mode. |
| 37 | * @TWA_NMI_CURRENT works like @TWA_RESUME, except it can only be used for the |
| 38 | * current @task and if the current context is NMI. |
| 39 | * |
| 40 | * Fails if the @task is exiting/exited and thus it can't process this @work. |
| 41 | * Otherwise @work->func() will be called when the @task goes through one of |
| 42 | * the aforementioned transitions, or exits. |
| 43 | * |
| 44 | * If the targeted task is exiting, then an error is returned and the work item |
| 45 | * is not queued. It's up to the caller to arrange for an alternative mechanism |
| 46 | * in that case. |
| 47 | * |
| 48 | * Note: there is no ordering guarantee on works queued here. The task_work |
| 49 | * list is LIFO. |
| 50 | * |
| 51 | * RETURNS: |
| 52 | * 0 if succeeds or -ESRCH. |
| 53 | */ |
| 54 | int task_work_add(struct task_struct *task, struct callback_head *work, |
| 55 | enum task_work_notify_mode notify) |
| 56 | { |
| 57 | struct callback_head *head; |
| 58 | |
| 59 | if (notify == TWA_NMI_CURRENT) { |
| 60 | if (WARN_ON_ONCE(task != current)) |
| 61 | return -EINVAL; |
| 62 | if (!IS_ENABLED(CONFIG_IRQ_WORK)) |
| 63 | return -EINVAL; |
| 64 | } else { |
| 65 | kasan_record_aux_stack(ptr: work); |
| 66 | } |
| 67 | |
| 68 | head = READ_ONCE(task->task_works); |
| 69 | do { |
| 70 | if (unlikely(head == &work_exited)) |
| 71 | return -ESRCH; |
| 72 | work->next = head; |
| 73 | } while (!try_cmpxchg(&task->task_works, &head, work)); |
| 74 | |
| 75 | switch (notify) { |
| 76 | case TWA_NONE: |
| 77 | break; |
| 78 | case TWA_RESUME: |
| 79 | set_notify_resume(task); |
| 80 | break; |
| 81 | case TWA_SIGNAL: |
| 82 | set_notify_signal(task); |
| 83 | break; |
| 84 | case TWA_SIGNAL_NO_IPI: |
| 85 | __set_notify_signal(task); |
| 86 | break; |
| 87 | #ifdef CONFIG_IRQ_WORK |
| 88 | case TWA_NMI_CURRENT: |
| 89 | irq_work_queue(this_cpu_ptr(&irq_work_NMI_resume)); |
| 90 | break; |
| 91 | #endif |
| 92 | default: |
| 93 | WARN_ON_ONCE(1); |
| 94 | break; |
| 95 | } |
| 96 | |
| 97 | return 0; |
| 98 | } |
| 99 | |
| 100 | /** |
| 101 | * task_work_cancel_match - cancel a pending work added by task_work_add() |
| 102 | * @task: the task which should execute the work |
| 103 | * @match: match function to call |
| 104 | * @data: data to be passed in to match function |
| 105 | * |
| 106 | * RETURNS: |
| 107 | * The found work or NULL if not found. |
| 108 | */ |
| 109 | struct callback_head * |
| 110 | task_work_cancel_match(struct task_struct *task, |
| 111 | bool (*match)(struct callback_head *, void *data), |
| 112 | void *data) |
| 113 | { |
| 114 | struct callback_head **pprev = &task->task_works; |
| 115 | struct callback_head *work; |
| 116 | unsigned long flags; |
| 117 | |
| 118 | if (likely(!task_work_pending(task))) |
| 119 | return NULL; |
| 120 | /* |
| 121 | * If cmpxchg() fails we continue without updating pprev. |
| 122 | * Either we raced with task_work_add() which added the |
| 123 | * new entry before this work, we will find it again. Or |
| 124 | * we raced with task_work_run(), *pprev == NULL/exited. |
| 125 | */ |
| 126 | raw_spin_lock_irqsave(&task->pi_lock, flags); |
| 127 | work = READ_ONCE(*pprev); |
| 128 | while (work) { |
| 129 | if (!match(work, data)) { |
| 130 | pprev = &work->next; |
| 131 | work = READ_ONCE(*pprev); |
| 132 | } else if (try_cmpxchg(pprev, &work, work->next)) |
| 133 | break; |
| 134 | } |
| 135 | raw_spin_unlock_irqrestore(&task->pi_lock, flags); |
| 136 | |
| 137 | return work; |
| 138 | } |
| 139 | |
| 140 | static bool task_work_func_match(struct callback_head *cb, void *data) |
| 141 | { |
| 142 | return cb->func == data; |
| 143 | } |
| 144 | |
| 145 | /** |
| 146 | * task_work_cancel_func - cancel a pending work matching a function added by task_work_add() |
| 147 | * @task: the task which should execute the func's work |
| 148 | * @func: identifies the func to match with a work to remove |
| 149 | * |
| 150 | * Find the last queued pending work with ->func == @func and remove |
| 151 | * it from queue. |
| 152 | * |
| 153 | * RETURNS: |
| 154 | * The found work or NULL if not found. |
| 155 | */ |
| 156 | struct callback_head * |
| 157 | task_work_cancel_func(struct task_struct *task, task_work_func_t func) |
| 158 | { |
| 159 | return task_work_cancel_match(task, match: task_work_func_match, data: func); |
| 160 | } |
| 161 | |
| 162 | static bool task_work_match(struct callback_head *cb, void *data) |
| 163 | { |
| 164 | return cb == data; |
| 165 | } |
| 166 | |
| 167 | /** |
| 168 | * task_work_cancel - cancel a pending work added by task_work_add() |
| 169 | * @task: the task which should execute the work |
| 170 | * @cb: the callback to remove if queued |
| 171 | * |
| 172 | * Remove a callback from a task's queue if queued. |
| 173 | * |
| 174 | * RETURNS: |
| 175 | * True if the callback was queued and got cancelled, false otherwise. |
| 176 | */ |
| 177 | bool task_work_cancel(struct task_struct *task, struct callback_head *cb) |
| 178 | { |
| 179 | struct callback_head *ret; |
| 180 | |
| 181 | ret = task_work_cancel_match(task, match: task_work_match, data: cb); |
| 182 | |
| 183 | return ret == cb; |
| 184 | } |
| 185 | |
| 186 | /** |
| 187 | * task_work_run - execute the works added by task_work_add() |
| 188 | * |
| 189 | * Flush the pending works. Should be used by the core kernel code. |
| 190 | * Called before the task returns to the user-mode or stops, or when |
| 191 | * it exits. In the latter case task_work_add() can no longer add the |
| 192 | * new work after task_work_run() returns. |
| 193 | */ |
| 194 | void task_work_run(void) |
| 195 | { |
| 196 | struct task_struct *task = current; |
| 197 | struct callback_head *work, *head, *next; |
| 198 | |
| 199 | for (;;) { |
| 200 | /* |
| 201 | * work->func() can do task_work_add(), do not set |
| 202 | * work_exited unless the list is empty. |
| 203 | */ |
| 204 | work = READ_ONCE(task->task_works); |
| 205 | do { |
| 206 | head = NULL; |
| 207 | if (!work) { |
| 208 | if (task->flags & PF_EXITING) |
| 209 | head = &work_exited; |
| 210 | else |
| 211 | break; |
| 212 | } |
| 213 | } while (!try_cmpxchg(&task->task_works, &work, head)); |
| 214 | |
| 215 | if (!work) |
| 216 | break; |
| 217 | /* |
| 218 | * Synchronize with task_work_cancel_match(). It can not remove |
| 219 | * the first entry == work, cmpxchg(task_works) must fail. |
| 220 | * But it can remove another entry from the ->next list. |
| 221 | */ |
| 222 | raw_spin_lock_irq(&task->pi_lock); |
| 223 | raw_spin_unlock_irq(&task->pi_lock); |
| 224 | |
| 225 | do { |
| 226 | next = work->next; |
| 227 | work->func(work); |
| 228 | work = next; |
| 229 | cond_resched(); |
| 230 | } while (work); |
| 231 | } |
| 232 | } |
| 233 | |