1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /*
3  * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
4  * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
5  */
6 
7 #include "rxe.h"
8 
9 static struct workqueue_struct *rxe_wq;
10 
rxe_alloc_wq(void)11 int rxe_alloc_wq(void)
12 {
13 	rxe_wq = alloc_workqueue("rxe_wq", WQ_UNBOUND, WQ_MAX_ACTIVE);
14 	if (!rxe_wq)
15 		return -ENOMEM;
16 
17 	return 0;
18 }
19 
rxe_destroy_wq(void)20 void rxe_destroy_wq(void)
21 {
22 	destroy_workqueue(rxe_wq);
23 }
24 
25 /* Check if task is idle i.e. not running, not scheduled in
26  * work queue and not draining. If so move to busy to
27  * reserve a slot in do_task() by setting to busy and taking
28  * a qp reference to cover the gap from now until the task finishes.
29  * state will move out of busy if task returns a non zero value
30  * in do_task(). If state is already busy it is raised to armed
31  * to indicate to do_task that additional pass should be made
32  * over the task.
33  * Context: caller should hold task->lock.
34  * Returns: true if state transitioned from idle to busy else false.
35  */
__reserve_if_idle(struct rxe_task * task)36 static bool __reserve_if_idle(struct rxe_task *task)
37 {
38 	WARN_ON(rxe_read(task->qp) <= 0);
39 
40 	if (task->state == TASK_STATE_IDLE) {
41 		rxe_get(task->qp);
42 		task->state = TASK_STATE_BUSY;
43 		task->num_sched++;
44 		return true;
45 	}
46 
47 	if (task->state == TASK_STATE_BUSY)
48 		task->state = TASK_STATE_ARMED;
49 
50 	return false;
51 }
52 
53 /* check if task is idle or drained and not currently
54  * scheduled in the work queue. This routine is
55  * called by rxe_cleanup_task or rxe_disable_task to
56  * see if the queue is empty.
57  * Context: caller should hold task->lock.
58  * Returns true if done else false.
59  */
__is_done(struct rxe_task * task)60 static bool __is_done(struct rxe_task *task)
61 {
62 	if (work_pending(&task->work))
63 		return false;
64 
65 	if (task->state == TASK_STATE_IDLE ||
66 	    task->state == TASK_STATE_DRAINED) {
67 		return true;
68 	}
69 
70 	return false;
71 }
72 
73 /* a locked version of __is_done */
is_done(struct rxe_task * task)74 static bool is_done(struct rxe_task *task)
75 {
76 	unsigned long flags;
77 	int done;
78 
79 	spin_lock_irqsave(&task->lock, flags);
80 	done = __is_done(task);
81 	spin_unlock_irqrestore(&task->lock, flags);
82 
83 	return done;
84 }
85 
86 /* do_task is a wrapper for the three tasks (requester,
87  * completer, responder) and calls them in a loop until
88  * they return a non-zero value. It is called either
89  * directly by rxe_run_task or indirectly if rxe_sched_task
90  * schedules the task. They must call __reserve_if_idle to
91  * move the task to busy before calling or scheduling.
92  * The task can also be moved to drained or invalid
93  * by calls to rxe_cleanup_task or rxe_disable_task.
94  * In that case tasks which get here are not executed but
95  * just flushed. The tasks are designed to look to see if
96  * there is work to do and then do part of it before returning
97  * here with a return value of zero until all the work
98  * has been consumed then it returns a non-zero value.
99  * The number of times the task can be run is limited by
100  * max iterations so one task cannot hold the cpu forever.
101  * If the limit is hit and work remains the task is rescheduled.
102  */
do_task(struct rxe_task * task)103 static void do_task(struct rxe_task *task)
104 {
105 	unsigned int iterations;
106 	unsigned long flags;
107 	int resched = 0;
108 	int cont;
109 	int ret;
110 
111 	WARN_ON(rxe_read(task->qp) <= 0);
112 
113 	spin_lock_irqsave(&task->lock, flags);
114 	if (task->state >= TASK_STATE_DRAINED) {
115 		rxe_put(task->qp);
116 		task->num_done++;
117 		spin_unlock_irqrestore(&task->lock, flags);
118 		return;
119 	}
120 	spin_unlock_irqrestore(&task->lock, flags);
121 
122 	do {
123 		iterations = RXE_MAX_ITERATIONS;
124 		cont = 0;
125 
126 		do {
127 			ret = task->func(task->qp);
128 		} while (ret == 0 && iterations-- > 0);
129 
130 		spin_lock_irqsave(&task->lock, flags);
131 		/* we're not done yet but we ran out of iterations.
132 		 * yield the cpu and reschedule the task
133 		 */
134 		if (!ret) {
135 			task->state = TASK_STATE_IDLE;
136 			resched = 1;
137 			goto exit;
138 		}
139 
140 		switch (task->state) {
141 		case TASK_STATE_BUSY:
142 			task->state = TASK_STATE_IDLE;
143 			break;
144 
145 		/* someone tried to schedule the task while we
146 		 * were running, keep going
147 		 */
148 		case TASK_STATE_ARMED:
149 			task->state = TASK_STATE_BUSY;
150 			cont = 1;
151 			break;
152 
153 		case TASK_STATE_DRAINING:
154 			task->state = TASK_STATE_DRAINED;
155 			break;
156 
157 		default:
158 			WARN_ON(1);
159 			rxe_dbg_qp(task->qp, "unexpected task state = %d",
160 				   task->state);
161 			task->state = TASK_STATE_IDLE;
162 		}
163 
164 exit:
165 		if (!cont) {
166 			task->num_done++;
167 			if (WARN_ON(task->num_done != task->num_sched))
168 				rxe_dbg_qp(
169 					task->qp,
170 					"%ld tasks scheduled, %ld tasks done",
171 					task->num_sched, task->num_done);
172 		}
173 		spin_unlock_irqrestore(&task->lock, flags);
174 	} while (cont);
175 
176 	task->ret = ret;
177 
178 	if (resched)
179 		rxe_sched_task(task);
180 
181 	rxe_put(task->qp);
182 }
183 
184 /* wrapper around do_task to fix argument for work queue */
do_work(struct work_struct * work)185 static void do_work(struct work_struct *work)
186 {
187 	do_task(container_of(work, struct rxe_task, work));
188 }
189 
rxe_init_task(struct rxe_task * task,struct rxe_qp * qp,int (* func)(struct rxe_qp *))190 int rxe_init_task(struct rxe_task *task, struct rxe_qp *qp,
191 		  int (*func)(struct rxe_qp *))
192 {
193 	WARN_ON(rxe_read(qp) <= 0);
194 
195 	task->qp = qp;
196 	task->func = func;
197 	task->state = TASK_STATE_IDLE;
198 	spin_lock_init(&task->lock);
199 	INIT_WORK(&task->work, do_work);
200 
201 	return 0;
202 }
203 
204 /* rxe_cleanup_task is only called from rxe_do_qp_cleanup in
205  * process context. The qp is already completed with no
206  * remaining references. Once the queue is drained the
207  * task is moved to invalid and returns. The qp cleanup
208  * code then calls the task functions directly without
209  * using the task struct to drain any late arriving packets
210  * or work requests.
211  */
rxe_cleanup_task(struct rxe_task * task)212 void rxe_cleanup_task(struct rxe_task *task)
213 {
214 	unsigned long flags;
215 
216 	spin_lock_irqsave(&task->lock, flags);
217 	if (!__is_done(task) && task->state < TASK_STATE_DRAINED) {
218 		task->state = TASK_STATE_DRAINING;
219 	} else {
220 		task->state = TASK_STATE_INVALID;
221 		spin_unlock_irqrestore(&task->lock, flags);
222 		return;
223 	}
224 	spin_unlock_irqrestore(&task->lock, flags);
225 
226 	/* now the task cannot be scheduled or run just wait
227 	 * for the previously scheduled tasks to finish.
228 	 */
229 	while (!is_done(task))
230 		cond_resched();
231 
232 	spin_lock_irqsave(&task->lock, flags);
233 	task->state = TASK_STATE_INVALID;
234 	spin_unlock_irqrestore(&task->lock, flags);
235 }
236 
237 /* run the task inline if it is currently idle
238  * cannot call do_task holding the lock
239  */
rxe_run_task(struct rxe_task * task)240 void rxe_run_task(struct rxe_task *task)
241 {
242 	unsigned long flags;
243 	bool run;
244 
245 	WARN_ON(rxe_read(task->qp) <= 0);
246 
247 	spin_lock_irqsave(&task->lock, flags);
248 	run = __reserve_if_idle(task);
249 	spin_unlock_irqrestore(&task->lock, flags);
250 
251 	if (run)
252 		do_task(task);
253 }
254 
255 /* schedule the task to run later as a work queue entry.
256  * the queue_work call can be called holding
257  * the lock.
258  */
rxe_sched_task(struct rxe_task * task)259 void rxe_sched_task(struct rxe_task *task)
260 {
261 	unsigned long flags;
262 
263 	WARN_ON(rxe_read(task->qp) <= 0);
264 
265 	spin_lock_irqsave(&task->lock, flags);
266 	if (__reserve_if_idle(task))
267 		queue_work(rxe_wq, &task->work);
268 	spin_unlock_irqrestore(&task->lock, flags);
269 }
270 
271 /* rxe_disable/enable_task are only called from
272  * rxe_modify_qp in process context. Task is moved
273  * to the drained state by do_task.
274  */
rxe_disable_task(struct rxe_task * task)275 void rxe_disable_task(struct rxe_task *task)
276 {
277 	unsigned long flags;
278 
279 	WARN_ON(rxe_read(task->qp) <= 0);
280 
281 	spin_lock_irqsave(&task->lock, flags);
282 	if (!__is_done(task) && task->state < TASK_STATE_DRAINED) {
283 		task->state = TASK_STATE_DRAINING;
284 	} else {
285 		task->state = TASK_STATE_DRAINED;
286 		spin_unlock_irqrestore(&task->lock, flags);
287 		return;
288 	}
289 	spin_unlock_irqrestore(&task->lock, flags);
290 
291 	while (!is_done(task))
292 		cond_resched();
293 
294 	spin_lock_irqsave(&task->lock, flags);
295 	task->state = TASK_STATE_DRAINED;
296 	spin_unlock_irqrestore(&task->lock, flags);
297 }
298 
rxe_enable_task(struct rxe_task * task)299 void rxe_enable_task(struct rxe_task *task)
300 {
301 	unsigned long flags;
302 
303 	WARN_ON(rxe_read(task->qp) <= 0);
304 
305 	spin_lock_irqsave(&task->lock, flags);
306 	if (task->state == TASK_STATE_INVALID) {
307 		spin_unlock_irqrestore(&task->lock, flags);
308 		return;
309 	}
310 
311 	task->state = TASK_STATE_IDLE;
312 	spin_unlock_irqrestore(&task->lock, flags);
313 }
314