xref: /openbmc/linux/drivers/accel/habanalabs/common/hw_queue.c (revision 9a87ffc99ec8eb8d35eed7c4f816d75f5cc9662e)
1*e65e175bSOded Gabbay // SPDX-License-Identifier: GPL-2.0
2*e65e175bSOded Gabbay 
3*e65e175bSOded Gabbay /*
4*e65e175bSOded Gabbay  * Copyright 2016-2019 HabanaLabs, Ltd.
5*e65e175bSOded Gabbay  * All Rights Reserved.
6*e65e175bSOded Gabbay  */
7*e65e175bSOded Gabbay 
8*e65e175bSOded Gabbay #include "habanalabs.h"
9*e65e175bSOded Gabbay 
10*e65e175bSOded Gabbay #include <linux/slab.h>
11*e65e175bSOded Gabbay 
12*e65e175bSOded Gabbay /*
13*e65e175bSOded Gabbay  * hl_queue_add_ptr - add to pi or ci and checks if it wraps around
14*e65e175bSOded Gabbay  *
15*e65e175bSOded Gabbay  * @ptr: the current pi/ci value
16*e65e175bSOded Gabbay  * @val: the amount to add
17*e65e175bSOded Gabbay  *
18*e65e175bSOded Gabbay  * Add val to ptr. It can go until twice the queue length.
19*e65e175bSOded Gabbay  */
hl_hw_queue_add_ptr(u32 ptr,u16 val)20*e65e175bSOded Gabbay inline u32 hl_hw_queue_add_ptr(u32 ptr, u16 val)
21*e65e175bSOded Gabbay {
22*e65e175bSOded Gabbay 	ptr += val;
23*e65e175bSOded Gabbay 	ptr &= ((HL_QUEUE_LENGTH << 1) - 1);
24*e65e175bSOded Gabbay 	return ptr;
25*e65e175bSOded Gabbay }
queue_ci_get(atomic_t * ci,u32 queue_len)26*e65e175bSOded Gabbay static inline int queue_ci_get(atomic_t *ci, u32 queue_len)
27*e65e175bSOded Gabbay {
28*e65e175bSOded Gabbay 	return atomic_read(ci) & ((queue_len << 1) - 1);
29*e65e175bSOded Gabbay }
30*e65e175bSOded Gabbay 
queue_free_slots(struct hl_hw_queue * q,u32 queue_len)31*e65e175bSOded Gabbay static inline int queue_free_slots(struct hl_hw_queue *q, u32 queue_len)
32*e65e175bSOded Gabbay {
33*e65e175bSOded Gabbay 	int delta = (q->pi - queue_ci_get(&q->ci, queue_len));
34*e65e175bSOded Gabbay 
35*e65e175bSOded Gabbay 	if (delta >= 0)
36*e65e175bSOded Gabbay 		return (queue_len - delta);
37*e65e175bSOded Gabbay 	else
38*e65e175bSOded Gabbay 		return (abs(delta) - queue_len);
39*e65e175bSOded Gabbay }
40*e65e175bSOded Gabbay 
hl_hw_queue_update_ci(struct hl_cs * cs)41*e65e175bSOded Gabbay void hl_hw_queue_update_ci(struct hl_cs *cs)
42*e65e175bSOded Gabbay {
43*e65e175bSOded Gabbay 	struct hl_device *hdev = cs->ctx->hdev;
44*e65e175bSOded Gabbay 	struct hl_hw_queue *q;
45*e65e175bSOded Gabbay 	int i;
46*e65e175bSOded Gabbay 
47*e65e175bSOded Gabbay 	if (hdev->disabled)
48*e65e175bSOded Gabbay 		return;
49*e65e175bSOded Gabbay 
50*e65e175bSOded Gabbay 	q = &hdev->kernel_queues[0];
51*e65e175bSOded Gabbay 
52*e65e175bSOded Gabbay 	/* There are no internal queues if H/W queues are being used */
53*e65e175bSOded Gabbay 	if (!hdev->asic_prop.max_queues || q->queue_type == QUEUE_TYPE_HW)
54*e65e175bSOded Gabbay 		return;
55*e65e175bSOded Gabbay 
56*e65e175bSOded Gabbay 	/* We must increment CI for every queue that will never get a
57*e65e175bSOded Gabbay 	 * completion, there are 2 scenarios this can happen:
58*e65e175bSOded Gabbay 	 * 1. All queues of a non completion CS will never get a completion.
59*e65e175bSOded Gabbay 	 * 2. Internal queues never gets completion.
60*e65e175bSOded Gabbay 	 */
61*e65e175bSOded Gabbay 	for (i = 0 ; i < hdev->asic_prop.max_queues ; i++, q++) {
62*e65e175bSOded Gabbay 		if (!cs_needs_completion(cs) || q->queue_type == QUEUE_TYPE_INT)
63*e65e175bSOded Gabbay 			atomic_add(cs->jobs_in_queue_cnt[i], &q->ci);
64*e65e175bSOded Gabbay 	}
65*e65e175bSOded Gabbay }
66*e65e175bSOded Gabbay 
67*e65e175bSOded Gabbay /*
68*e65e175bSOded Gabbay  * hl_hw_queue_submit_bd() - Submit a buffer descriptor to an external or a
69*e65e175bSOded Gabbay  *                                H/W queue.
70*e65e175bSOded Gabbay  * @hdev: pointer to habanalabs device structure
71*e65e175bSOded Gabbay  * @q: pointer to habanalabs queue structure
72*e65e175bSOded Gabbay  * @ctl: BD's control word
73*e65e175bSOded Gabbay  * @len: BD's length
74*e65e175bSOded Gabbay  * @ptr: BD's pointer
75*e65e175bSOded Gabbay  *
76*e65e175bSOded Gabbay  * This function assumes there is enough space on the queue to submit a new
77*e65e175bSOded Gabbay  * BD to it. It initializes the next BD and calls the device specific
78*e65e175bSOded Gabbay  * function to set the pi (and doorbell)
79*e65e175bSOded Gabbay  *
80*e65e175bSOded Gabbay  * This function must be called when the scheduler mutex is taken
81*e65e175bSOded Gabbay  *
82*e65e175bSOded Gabbay  */
hl_hw_queue_submit_bd(struct hl_device * hdev,struct hl_hw_queue * q,u32 ctl,u32 len,u64 ptr)83*e65e175bSOded Gabbay void hl_hw_queue_submit_bd(struct hl_device *hdev, struct hl_hw_queue *q,
84*e65e175bSOded Gabbay 		u32 ctl, u32 len, u64 ptr)
85*e65e175bSOded Gabbay {
86*e65e175bSOded Gabbay 	struct hl_bd *bd;
87*e65e175bSOded Gabbay 
88*e65e175bSOded Gabbay 	bd = q->kernel_address;
89*e65e175bSOded Gabbay 	bd += hl_pi_2_offset(q->pi);
90*e65e175bSOded Gabbay 	bd->ctl = cpu_to_le32(ctl);
91*e65e175bSOded Gabbay 	bd->len = cpu_to_le32(len);
92*e65e175bSOded Gabbay 	bd->ptr = cpu_to_le64(ptr);
93*e65e175bSOded Gabbay 
94*e65e175bSOded Gabbay 	q->pi = hl_queue_inc_ptr(q->pi);
95*e65e175bSOded Gabbay 	hdev->asic_funcs->ring_doorbell(hdev, q->hw_queue_id, q->pi);
96*e65e175bSOded Gabbay }
97*e65e175bSOded Gabbay 
98*e65e175bSOded Gabbay /*
99*e65e175bSOded Gabbay  * ext_queue_sanity_checks - perform some sanity checks on external queue
100*e65e175bSOded Gabbay  *
101*e65e175bSOded Gabbay  * @hdev              : pointer to hl_device structure
102*e65e175bSOded Gabbay  * @q                 :	pointer to hl_hw_queue structure
103*e65e175bSOded Gabbay  * @num_of_entries    : how many entries to check for space
104*e65e175bSOded Gabbay  * @reserve_cq_entry  :	whether to reserve an entry in the cq
105*e65e175bSOded Gabbay  *
106*e65e175bSOded Gabbay  * H/W queues spinlock should be taken before calling this function
107*e65e175bSOded Gabbay  *
108*e65e175bSOded Gabbay  * Perform the following:
109*e65e175bSOded Gabbay  * - Make sure we have enough space in the h/w queue
110*e65e175bSOded Gabbay  * - Make sure we have enough space in the completion queue
111*e65e175bSOded Gabbay  * - Reserve space in the completion queue (needs to be reversed if there
112*e65e175bSOded Gabbay  *   is a failure down the road before the actual submission of work). Only
113*e65e175bSOded Gabbay  *   do this action if reserve_cq_entry is true
114*e65e175bSOded Gabbay  *
115*e65e175bSOded Gabbay  */
ext_queue_sanity_checks(struct hl_device * hdev,struct hl_hw_queue * q,int num_of_entries,bool reserve_cq_entry)116*e65e175bSOded Gabbay static int ext_queue_sanity_checks(struct hl_device *hdev,
117*e65e175bSOded Gabbay 				struct hl_hw_queue *q, int num_of_entries,
118*e65e175bSOded Gabbay 				bool reserve_cq_entry)
119*e65e175bSOded Gabbay {
120*e65e175bSOded Gabbay 	atomic_t *free_slots =
121*e65e175bSOded Gabbay 			&hdev->completion_queue[q->cq_id].free_slots_cnt;
122*e65e175bSOded Gabbay 	int free_slots_cnt;
123*e65e175bSOded Gabbay 
124*e65e175bSOded Gabbay 	/* Check we have enough space in the queue */
125*e65e175bSOded Gabbay 	free_slots_cnt = queue_free_slots(q, HL_QUEUE_LENGTH);
126*e65e175bSOded Gabbay 
127*e65e175bSOded Gabbay 	if (free_slots_cnt < num_of_entries) {
128*e65e175bSOded Gabbay 		dev_dbg(hdev->dev, "Queue %d doesn't have room for %d CBs\n",
129*e65e175bSOded Gabbay 			q->hw_queue_id, num_of_entries);
130*e65e175bSOded Gabbay 		return -EAGAIN;
131*e65e175bSOded Gabbay 	}
132*e65e175bSOded Gabbay 
133*e65e175bSOded Gabbay 	if (reserve_cq_entry) {
134*e65e175bSOded Gabbay 		/*
135*e65e175bSOded Gabbay 		 * Check we have enough space in the completion queue
136*e65e175bSOded Gabbay 		 * Add -1 to counter (decrement) unless counter was already 0
137*e65e175bSOded Gabbay 		 * In that case, CQ is full so we can't submit a new CB because
138*e65e175bSOded Gabbay 		 * we won't get ack on its completion
139*e65e175bSOded Gabbay 		 * atomic_add_unless will return 0 if counter was already 0
140*e65e175bSOded Gabbay 		 */
141*e65e175bSOded Gabbay 		if (atomic_add_negative(num_of_entries * -1, free_slots)) {
142*e65e175bSOded Gabbay 			dev_dbg(hdev->dev, "No space for %d on CQ %d\n",
143*e65e175bSOded Gabbay 				num_of_entries, q->hw_queue_id);
144*e65e175bSOded Gabbay 			atomic_add(num_of_entries, free_slots);
145*e65e175bSOded Gabbay 			return -EAGAIN;
146*e65e175bSOded Gabbay 		}
147*e65e175bSOded Gabbay 	}
148*e65e175bSOded Gabbay 
149*e65e175bSOded Gabbay 	return 0;
150*e65e175bSOded Gabbay }
151*e65e175bSOded Gabbay 
152*e65e175bSOded Gabbay /*
153*e65e175bSOded Gabbay  * int_queue_sanity_checks - perform some sanity checks on internal queue
154*e65e175bSOded Gabbay  *
155*e65e175bSOded Gabbay  * @hdev              : pointer to hl_device structure
156*e65e175bSOded Gabbay  * @q                 :	pointer to hl_hw_queue structure
157*e65e175bSOded Gabbay  * @num_of_entries    : how many entries to check for space
158*e65e175bSOded Gabbay  *
159*e65e175bSOded Gabbay  * H/W queues spinlock should be taken before calling this function
160*e65e175bSOded Gabbay  *
161*e65e175bSOded Gabbay  * Perform the following:
162*e65e175bSOded Gabbay  * - Make sure we have enough space in the h/w queue
163*e65e175bSOded Gabbay  *
164*e65e175bSOded Gabbay  */
int_queue_sanity_checks(struct hl_device * hdev,struct hl_hw_queue * q,int num_of_entries)165*e65e175bSOded Gabbay static int int_queue_sanity_checks(struct hl_device *hdev,
166*e65e175bSOded Gabbay 					struct hl_hw_queue *q,
167*e65e175bSOded Gabbay 					int num_of_entries)
168*e65e175bSOded Gabbay {
169*e65e175bSOded Gabbay 	int free_slots_cnt;
170*e65e175bSOded Gabbay 
171*e65e175bSOded Gabbay 	if (num_of_entries > q->int_queue_len) {
172*e65e175bSOded Gabbay 		dev_err(hdev->dev,
173*e65e175bSOded Gabbay 			"Cannot populate queue %u with %u jobs\n",
174*e65e175bSOded Gabbay 			q->hw_queue_id, num_of_entries);
175*e65e175bSOded Gabbay 		return -ENOMEM;
176*e65e175bSOded Gabbay 	}
177*e65e175bSOded Gabbay 
178*e65e175bSOded Gabbay 	/* Check we have enough space in the queue */
179*e65e175bSOded Gabbay 	free_slots_cnt = queue_free_slots(q, q->int_queue_len);
180*e65e175bSOded Gabbay 
181*e65e175bSOded Gabbay 	if (free_slots_cnt < num_of_entries) {
182*e65e175bSOded Gabbay 		dev_dbg(hdev->dev, "Queue %d doesn't have room for %d CBs\n",
183*e65e175bSOded Gabbay 			q->hw_queue_id, num_of_entries);
184*e65e175bSOded Gabbay 		return -EAGAIN;
185*e65e175bSOded Gabbay 	}
186*e65e175bSOded Gabbay 
187*e65e175bSOded Gabbay 	return 0;
188*e65e175bSOded Gabbay }
189*e65e175bSOded Gabbay 
190*e65e175bSOded Gabbay /*
191*e65e175bSOded Gabbay  * hw_queue_sanity_checks() - Make sure we have enough space in the h/w queue
192*e65e175bSOded Gabbay  * @hdev: Pointer to hl_device structure.
193*e65e175bSOded Gabbay  * @q: Pointer to hl_hw_queue structure.
194*e65e175bSOded Gabbay  * @num_of_entries: How many entries to check for space.
195*e65e175bSOded Gabbay  *
196*e65e175bSOded Gabbay  * Notice: We do not reserve queue entries so this function mustn't be called
197*e65e175bSOded Gabbay  *         more than once per CS for the same queue
198*e65e175bSOded Gabbay  *
199*e65e175bSOded Gabbay  */
hw_queue_sanity_checks(struct hl_device * hdev,struct hl_hw_queue * q,int num_of_entries)200*e65e175bSOded Gabbay static int hw_queue_sanity_checks(struct hl_device *hdev, struct hl_hw_queue *q,
201*e65e175bSOded Gabbay 					int num_of_entries)
202*e65e175bSOded Gabbay {
203*e65e175bSOded Gabbay 	int free_slots_cnt;
204*e65e175bSOded Gabbay 
205*e65e175bSOded Gabbay 	/* Check we have enough space in the queue */
206*e65e175bSOded Gabbay 	free_slots_cnt = queue_free_slots(q, HL_QUEUE_LENGTH);
207*e65e175bSOded Gabbay 
208*e65e175bSOded Gabbay 	if (free_slots_cnt < num_of_entries) {
209*e65e175bSOded Gabbay 		dev_dbg(hdev->dev, "Queue %d doesn't have room for %d CBs\n",
210*e65e175bSOded Gabbay 			q->hw_queue_id, num_of_entries);
211*e65e175bSOded Gabbay 		return -EAGAIN;
212*e65e175bSOded Gabbay 	}
213*e65e175bSOded Gabbay 
214*e65e175bSOded Gabbay 	return 0;
215*e65e175bSOded Gabbay }
216*e65e175bSOded Gabbay 
217*e65e175bSOded Gabbay /*
218*e65e175bSOded Gabbay  * hl_hw_queue_send_cb_no_cmpl - send a single CB (not a JOB) without completion
219*e65e175bSOded Gabbay  *
220*e65e175bSOded Gabbay  * @hdev: pointer to hl_device structure
221*e65e175bSOded Gabbay  * @hw_queue_id: Queue's type
222*e65e175bSOded Gabbay  * @cb_size: size of CB
223*e65e175bSOded Gabbay  * @cb_ptr: pointer to CB location
224*e65e175bSOded Gabbay  *
225*e65e175bSOded Gabbay  * This function sends a single CB, that must NOT generate a completion entry.
226*e65e175bSOded Gabbay  * Sending CPU messages can be done instead via 'hl_hw_queue_submit_bd()'
227*e65e175bSOded Gabbay  */
hl_hw_queue_send_cb_no_cmpl(struct hl_device * hdev,u32 hw_queue_id,u32 cb_size,u64 cb_ptr)228*e65e175bSOded Gabbay int hl_hw_queue_send_cb_no_cmpl(struct hl_device *hdev, u32 hw_queue_id,
229*e65e175bSOded Gabbay 				u32 cb_size, u64 cb_ptr)
230*e65e175bSOded Gabbay {
231*e65e175bSOded Gabbay 	struct hl_hw_queue *q = &hdev->kernel_queues[hw_queue_id];
232*e65e175bSOded Gabbay 	int rc = 0;
233*e65e175bSOded Gabbay 
234*e65e175bSOded Gabbay 	hdev->asic_funcs->hw_queues_lock(hdev);
235*e65e175bSOded Gabbay 
236*e65e175bSOded Gabbay 	if (hdev->disabled) {
237*e65e175bSOded Gabbay 		rc = -EPERM;
238*e65e175bSOded Gabbay 		goto out;
239*e65e175bSOded Gabbay 	}
240*e65e175bSOded Gabbay 
241*e65e175bSOded Gabbay 	/*
242*e65e175bSOded Gabbay 	 * hl_hw_queue_send_cb_no_cmpl() is called for queues of a H/W queue
243*e65e175bSOded Gabbay 	 * type only on init phase, when the queues are empty and being tested,
244*e65e175bSOded Gabbay 	 * so there is no need for sanity checks.
245*e65e175bSOded Gabbay 	 */
246*e65e175bSOded Gabbay 	if (q->queue_type != QUEUE_TYPE_HW) {
247*e65e175bSOded Gabbay 		rc = ext_queue_sanity_checks(hdev, q, 1, false);
248*e65e175bSOded Gabbay 		if (rc)
249*e65e175bSOded Gabbay 			goto out;
250*e65e175bSOded Gabbay 	}
251*e65e175bSOded Gabbay 
252*e65e175bSOded Gabbay 	hl_hw_queue_submit_bd(hdev, q, 0, cb_size, cb_ptr);
253*e65e175bSOded Gabbay 
254*e65e175bSOded Gabbay out:
255*e65e175bSOded Gabbay 	hdev->asic_funcs->hw_queues_unlock(hdev);
256*e65e175bSOded Gabbay 
257*e65e175bSOded Gabbay 	return rc;
258*e65e175bSOded Gabbay }
259*e65e175bSOded Gabbay 
260*e65e175bSOded Gabbay /*
261*e65e175bSOded Gabbay  * ext_queue_schedule_job - submit a JOB to an external queue
262*e65e175bSOded Gabbay  *
263*e65e175bSOded Gabbay  * @job: pointer to the job that needs to be submitted to the queue
264*e65e175bSOded Gabbay  *
265*e65e175bSOded Gabbay  * This function must be called when the scheduler mutex is taken
266*e65e175bSOded Gabbay  *
267*e65e175bSOded Gabbay  */
ext_queue_schedule_job(struct hl_cs_job * job)268*e65e175bSOded Gabbay static void ext_queue_schedule_job(struct hl_cs_job *job)
269*e65e175bSOded Gabbay {
270*e65e175bSOded Gabbay 	struct hl_device *hdev = job->cs->ctx->hdev;
271*e65e175bSOded Gabbay 	struct hl_hw_queue *q = &hdev->kernel_queues[job->hw_queue_id];
272*e65e175bSOded Gabbay 	struct hl_cq_entry cq_pkt;
273*e65e175bSOded Gabbay 	struct hl_cq *cq;
274*e65e175bSOded Gabbay 	u64 cq_addr;
275*e65e175bSOded Gabbay 	struct hl_cb *cb;
276*e65e175bSOded Gabbay 	u32 ctl;
277*e65e175bSOded Gabbay 	u32 len;
278*e65e175bSOded Gabbay 	u64 ptr;
279*e65e175bSOded Gabbay 
280*e65e175bSOded Gabbay 	/*
281*e65e175bSOded Gabbay 	 * Update the JOB ID inside the BD CTL so the device would know what
282*e65e175bSOded Gabbay 	 * to write in the completion queue
283*e65e175bSOded Gabbay 	 */
284*e65e175bSOded Gabbay 	ctl = ((q->pi << BD_CTL_SHADOW_INDEX_SHIFT) & BD_CTL_SHADOW_INDEX_MASK);
285*e65e175bSOded Gabbay 
286*e65e175bSOded Gabbay 	cb = job->patched_cb;
287*e65e175bSOded Gabbay 	len = job->job_cb_size;
288*e65e175bSOded Gabbay 	ptr = cb->bus_address;
289*e65e175bSOded Gabbay 
290*e65e175bSOded Gabbay 	/* Skip completion flow in case this is a non completion CS */
291*e65e175bSOded Gabbay 	if (!cs_needs_completion(job->cs))
292*e65e175bSOded Gabbay 		goto submit_bd;
293*e65e175bSOded Gabbay 
294*e65e175bSOded Gabbay 	cq_pkt.data = cpu_to_le32(
295*e65e175bSOded Gabbay 			((q->pi << CQ_ENTRY_SHADOW_INDEX_SHIFT)
296*e65e175bSOded Gabbay 				& CQ_ENTRY_SHADOW_INDEX_MASK) |
297*e65e175bSOded Gabbay 			FIELD_PREP(CQ_ENTRY_SHADOW_INDEX_VALID_MASK, 1) |
298*e65e175bSOded Gabbay 			FIELD_PREP(CQ_ENTRY_READY_MASK, 1));
299*e65e175bSOded Gabbay 
300*e65e175bSOded Gabbay 	/*
301*e65e175bSOded Gabbay 	 * No need to protect pi_offset because scheduling to the
302*e65e175bSOded Gabbay 	 * H/W queues is done under the scheduler mutex
303*e65e175bSOded Gabbay 	 *
304*e65e175bSOded Gabbay 	 * No need to check if CQ is full because it was already
305*e65e175bSOded Gabbay 	 * checked in ext_queue_sanity_checks
306*e65e175bSOded Gabbay 	 */
307*e65e175bSOded Gabbay 	cq = &hdev->completion_queue[q->cq_id];
308*e65e175bSOded Gabbay 	cq_addr = cq->bus_address + cq->pi * sizeof(struct hl_cq_entry);
309*e65e175bSOded Gabbay 
310*e65e175bSOded Gabbay 	hdev->asic_funcs->add_end_of_cb_packets(hdev, cb->kernel_address, len,
311*e65e175bSOded Gabbay 						job->user_cb_size,
312*e65e175bSOded Gabbay 						cq_addr,
313*e65e175bSOded Gabbay 						le32_to_cpu(cq_pkt.data),
314*e65e175bSOded Gabbay 						q->msi_vec,
315*e65e175bSOded Gabbay 						job->contains_dma_pkt);
316*e65e175bSOded Gabbay 
317*e65e175bSOded Gabbay 	q->shadow_queue[hl_pi_2_offset(q->pi)] = job;
318*e65e175bSOded Gabbay 
319*e65e175bSOded Gabbay 	cq->pi = hl_cq_inc_ptr(cq->pi);
320*e65e175bSOded Gabbay 
321*e65e175bSOded Gabbay submit_bd:
322*e65e175bSOded Gabbay 	hl_hw_queue_submit_bd(hdev, q, ctl, len, ptr);
323*e65e175bSOded Gabbay }
324*e65e175bSOded Gabbay 
325*e65e175bSOded Gabbay /*
326*e65e175bSOded Gabbay  * int_queue_schedule_job - submit a JOB to an internal queue
327*e65e175bSOded Gabbay  *
328*e65e175bSOded Gabbay  * @job: pointer to the job that needs to be submitted to the queue
329*e65e175bSOded Gabbay  *
330*e65e175bSOded Gabbay  * This function must be called when the scheduler mutex is taken
331*e65e175bSOded Gabbay  *
332*e65e175bSOded Gabbay  */
int_queue_schedule_job(struct hl_cs_job * job)333*e65e175bSOded Gabbay static void int_queue_schedule_job(struct hl_cs_job *job)
334*e65e175bSOded Gabbay {
335*e65e175bSOded Gabbay 	struct hl_device *hdev = job->cs->ctx->hdev;
336*e65e175bSOded Gabbay 	struct hl_hw_queue *q = &hdev->kernel_queues[job->hw_queue_id];
337*e65e175bSOded Gabbay 	struct hl_bd bd;
338*e65e175bSOded Gabbay 	__le64 *pi;
339*e65e175bSOded Gabbay 
340*e65e175bSOded Gabbay 	bd.ctl = 0;
341*e65e175bSOded Gabbay 	bd.len = cpu_to_le32(job->job_cb_size);
342*e65e175bSOded Gabbay 
343*e65e175bSOded Gabbay 	if (job->is_kernel_allocated_cb)
344*e65e175bSOded Gabbay 		/* bus_address is actually a mmu mapped address
345*e65e175bSOded Gabbay 		 * allocated from an internal pool
346*e65e175bSOded Gabbay 		 */
347*e65e175bSOded Gabbay 		bd.ptr = cpu_to_le64(job->user_cb->bus_address);
348*e65e175bSOded Gabbay 	else
349*e65e175bSOded Gabbay 		bd.ptr = cpu_to_le64((u64) (uintptr_t) job->user_cb);
350*e65e175bSOded Gabbay 
351*e65e175bSOded Gabbay 	pi = q->kernel_address + (q->pi & (q->int_queue_len - 1)) * sizeof(bd);
352*e65e175bSOded Gabbay 
353*e65e175bSOded Gabbay 	q->pi++;
354*e65e175bSOded Gabbay 	q->pi &= ((q->int_queue_len << 1) - 1);
355*e65e175bSOded Gabbay 
356*e65e175bSOded Gabbay 	hdev->asic_funcs->pqe_write(hdev, pi, &bd);
357*e65e175bSOded Gabbay 
358*e65e175bSOded Gabbay 	hdev->asic_funcs->ring_doorbell(hdev, q->hw_queue_id, q->pi);
359*e65e175bSOded Gabbay }
360*e65e175bSOded Gabbay 
361*e65e175bSOded Gabbay /*
362*e65e175bSOded Gabbay  * hw_queue_schedule_job - submit a JOB to a H/W queue
363*e65e175bSOded Gabbay  *
364*e65e175bSOded Gabbay  * @job: pointer to the job that needs to be submitted to the queue
365*e65e175bSOded Gabbay  *
366*e65e175bSOded Gabbay  * This function must be called when the scheduler mutex is taken
367*e65e175bSOded Gabbay  *
368*e65e175bSOded Gabbay  */
hw_queue_schedule_job(struct hl_cs_job * job)369*e65e175bSOded Gabbay static void hw_queue_schedule_job(struct hl_cs_job *job)
370*e65e175bSOded Gabbay {
371*e65e175bSOded Gabbay 	struct hl_device *hdev = job->cs->ctx->hdev;
372*e65e175bSOded Gabbay 	struct hl_hw_queue *q = &hdev->kernel_queues[job->hw_queue_id];
373*e65e175bSOded Gabbay 	u64 ptr;
374*e65e175bSOded Gabbay 	u32 offset, ctl, len;
375*e65e175bSOded Gabbay 
376*e65e175bSOded Gabbay 	/*
377*e65e175bSOded Gabbay 	 * Upon PQE completion, COMP_DATA is used as the write data to the
378*e65e175bSOded Gabbay 	 * completion queue (QMAN HBW message), and COMP_OFFSET is used as the
379*e65e175bSOded Gabbay 	 * write address offset in the SM block (QMAN LBW message).
380*e65e175bSOded Gabbay 	 * The write address offset is calculated as "COMP_OFFSET << 2".
381*e65e175bSOded Gabbay 	 */
382*e65e175bSOded Gabbay 	offset = job->cs->sequence & (hdev->asic_prop.max_pending_cs - 1);
383*e65e175bSOded Gabbay 	ctl = ((offset << BD_CTL_COMP_OFFSET_SHIFT) & BD_CTL_COMP_OFFSET_MASK) |
384*e65e175bSOded Gabbay 		((q->pi << BD_CTL_COMP_DATA_SHIFT) & BD_CTL_COMP_DATA_MASK);
385*e65e175bSOded Gabbay 
386*e65e175bSOded Gabbay 	len = job->job_cb_size;
387*e65e175bSOded Gabbay 
388*e65e175bSOded Gabbay 	/*
389*e65e175bSOded Gabbay 	 * A patched CB is created only if a user CB was allocated by driver and
390*e65e175bSOded Gabbay 	 * MMU is disabled. If MMU is enabled, the user CB should be used
391*e65e175bSOded Gabbay 	 * instead. If the user CB wasn't allocated by driver, assume that it
392*e65e175bSOded Gabbay 	 * holds an address.
393*e65e175bSOded Gabbay 	 */
394*e65e175bSOded Gabbay 	if (job->patched_cb)
395*e65e175bSOded Gabbay 		ptr = job->patched_cb->bus_address;
396*e65e175bSOded Gabbay 	else if (job->is_kernel_allocated_cb)
397*e65e175bSOded Gabbay 		ptr = job->user_cb->bus_address;
398*e65e175bSOded Gabbay 	else
399*e65e175bSOded Gabbay 		ptr = (u64) (uintptr_t) job->user_cb;
400*e65e175bSOded Gabbay 
401*e65e175bSOded Gabbay 	hl_hw_queue_submit_bd(hdev, q, ctl, len, ptr);
402*e65e175bSOded Gabbay }
403*e65e175bSOded Gabbay 
init_signal_cs(struct hl_device * hdev,struct hl_cs_job * job,struct hl_cs_compl * cs_cmpl)404*e65e175bSOded Gabbay static int init_signal_cs(struct hl_device *hdev,
405*e65e175bSOded Gabbay 		struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl)
406*e65e175bSOded Gabbay {
407*e65e175bSOded Gabbay 	struct hl_sync_stream_properties *prop;
408*e65e175bSOded Gabbay 	struct hl_hw_sob *hw_sob;
409*e65e175bSOded Gabbay 	u32 q_idx;
410*e65e175bSOded Gabbay 	int rc = 0;
411*e65e175bSOded Gabbay 
412*e65e175bSOded Gabbay 	q_idx = job->hw_queue_id;
413*e65e175bSOded Gabbay 	prop = &hdev->kernel_queues[q_idx].sync_stream_prop;
414*e65e175bSOded Gabbay 	hw_sob = &prop->hw_sob[prop->curr_sob_offset];
415*e65e175bSOded Gabbay 
416*e65e175bSOded Gabbay 	cs_cmpl->hw_sob = hw_sob;
417*e65e175bSOded Gabbay 	cs_cmpl->sob_val = prop->next_sob_val;
418*e65e175bSOded Gabbay 
419*e65e175bSOded Gabbay 	dev_dbg(hdev->dev,
420*e65e175bSOded Gabbay 		"generate signal CB, sob_id: %d, sob val: %u, q_idx: %d, seq: %llu\n",
421*e65e175bSOded Gabbay 		cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val, q_idx,
422*e65e175bSOded Gabbay 		cs_cmpl->cs_seq);
423*e65e175bSOded Gabbay 
424*e65e175bSOded Gabbay 	/* we set an EB since we must make sure all oeprations are done
425*e65e175bSOded Gabbay 	 * when sending the signal
426*e65e175bSOded Gabbay 	 */
427*e65e175bSOded Gabbay 	hdev->asic_funcs->gen_signal_cb(hdev, job->patched_cb,
428*e65e175bSOded Gabbay 				cs_cmpl->hw_sob->sob_id, 0, true);
429*e65e175bSOded Gabbay 
430*e65e175bSOded Gabbay 	rc = hl_cs_signal_sob_wraparound_handler(hdev, q_idx, &hw_sob, 1,
431*e65e175bSOded Gabbay 								false);
432*e65e175bSOded Gabbay 
433*e65e175bSOded Gabbay 	job->cs->sob_addr_offset = hw_sob->sob_addr;
434*e65e175bSOded Gabbay 	job->cs->initial_sob_count = prop->next_sob_val - 1;
435*e65e175bSOded Gabbay 
436*e65e175bSOded Gabbay 	return rc;
437*e65e175bSOded Gabbay }
438*e65e175bSOded Gabbay 
hl_hw_queue_encaps_sig_set_sob_info(struct hl_device * hdev,struct hl_cs * cs,struct hl_cs_job * job,struct hl_cs_compl * cs_cmpl)439*e65e175bSOded Gabbay void hl_hw_queue_encaps_sig_set_sob_info(struct hl_device *hdev,
440*e65e175bSOded Gabbay 			struct hl_cs *cs, struct hl_cs_job *job,
441*e65e175bSOded Gabbay 			struct hl_cs_compl *cs_cmpl)
442*e65e175bSOded Gabbay {
443*e65e175bSOded Gabbay 	struct hl_cs_encaps_sig_handle *handle = cs->encaps_sig_hdl;
444*e65e175bSOded Gabbay 	u32 offset = 0;
445*e65e175bSOded Gabbay 
446*e65e175bSOded Gabbay 	cs_cmpl->hw_sob = handle->hw_sob;
447*e65e175bSOded Gabbay 
448*e65e175bSOded Gabbay 	/* Note that encaps_sig_wait_offset was validated earlier in the flow
449*e65e175bSOded Gabbay 	 * for offset value which exceeds the max reserved signal count.
450*e65e175bSOded Gabbay 	 * always decrement 1 of the offset since when the user
451*e65e175bSOded Gabbay 	 * set offset 1 for example he mean to wait only for the first
452*e65e175bSOded Gabbay 	 * signal only, which will be pre_sob_val, and if he set offset 2
453*e65e175bSOded Gabbay 	 * then the value required is (pre_sob_val + 1) and so on...
454*e65e175bSOded Gabbay 	 * if user set wait offset to 0, then treat it as legacy wait cs,
455*e65e175bSOded Gabbay 	 * wait for the next signal.
456*e65e175bSOded Gabbay 	 */
457*e65e175bSOded Gabbay 	if (job->encaps_sig_wait_offset)
458*e65e175bSOded Gabbay 		offset = job->encaps_sig_wait_offset - 1;
459*e65e175bSOded Gabbay 
460*e65e175bSOded Gabbay 	cs_cmpl->sob_val = handle->pre_sob_val + offset;
461*e65e175bSOded Gabbay }
462*e65e175bSOded Gabbay 
init_wait_cs(struct hl_device * hdev,struct hl_cs * cs,struct hl_cs_job * job,struct hl_cs_compl * cs_cmpl)463*e65e175bSOded Gabbay static int init_wait_cs(struct hl_device *hdev, struct hl_cs *cs,
464*e65e175bSOded Gabbay 		struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl)
465*e65e175bSOded Gabbay {
466*e65e175bSOded Gabbay 	struct hl_gen_wait_properties wait_prop;
467*e65e175bSOded Gabbay 	struct hl_sync_stream_properties *prop;
468*e65e175bSOded Gabbay 	struct hl_cs_compl *signal_cs_cmpl;
469*e65e175bSOded Gabbay 	u32 q_idx;
470*e65e175bSOded Gabbay 
471*e65e175bSOded Gabbay 	q_idx = job->hw_queue_id;
472*e65e175bSOded Gabbay 	prop = &hdev->kernel_queues[q_idx].sync_stream_prop;
473*e65e175bSOded Gabbay 
474*e65e175bSOded Gabbay 	signal_cs_cmpl = container_of(cs->signal_fence,
475*e65e175bSOded Gabbay 					struct hl_cs_compl,
476*e65e175bSOded Gabbay 					base_fence);
477*e65e175bSOded Gabbay 
478*e65e175bSOded Gabbay 	if (cs->encaps_signals) {
479*e65e175bSOded Gabbay 		/* use the encaps signal handle stored earlier in the flow
480*e65e175bSOded Gabbay 		 * and set the SOB information from the encaps
481*e65e175bSOded Gabbay 		 * signals handle
482*e65e175bSOded Gabbay 		 */
483*e65e175bSOded Gabbay 		hl_hw_queue_encaps_sig_set_sob_info(hdev, cs, job, cs_cmpl);
484*e65e175bSOded Gabbay 
485*e65e175bSOded Gabbay 		dev_dbg(hdev->dev, "Wait for encaps signals handle, qidx(%u), CS sequence(%llu), sob val: 0x%x, offset: %u\n",
486*e65e175bSOded Gabbay 				cs->encaps_sig_hdl->q_idx,
487*e65e175bSOded Gabbay 				cs->encaps_sig_hdl->cs_seq,
488*e65e175bSOded Gabbay 				cs_cmpl->sob_val,
489*e65e175bSOded Gabbay 				job->encaps_sig_wait_offset);
490*e65e175bSOded Gabbay 	} else {
491*e65e175bSOded Gabbay 		/* Copy the SOB id and value of the signal CS */
492*e65e175bSOded Gabbay 		cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob;
493*e65e175bSOded Gabbay 		cs_cmpl->sob_val = signal_cs_cmpl->sob_val;
494*e65e175bSOded Gabbay 	}
495*e65e175bSOded Gabbay 
496*e65e175bSOded Gabbay 	/* check again if the signal cs already completed.
497*e65e175bSOded Gabbay 	 * if yes then don't send any wait cs since the hw_sob
498*e65e175bSOded Gabbay 	 * could be in reset already. if signal is not completed
499*e65e175bSOded Gabbay 	 * then get refcount to hw_sob to prevent resetting the sob
500*e65e175bSOded Gabbay 	 * while wait cs is not submitted.
501*e65e175bSOded Gabbay 	 * note that this check is protected by two locks,
502*e65e175bSOded Gabbay 	 * hw queue lock and completion object lock,
503*e65e175bSOded Gabbay 	 * and the same completion object lock also protects
504*e65e175bSOded Gabbay 	 * the hw_sob reset handler function.
505*e65e175bSOded Gabbay 	 * The hw_queue lock prevent out of sync of hw_sob
506*e65e175bSOded Gabbay 	 * refcount value, changed by signal/wait flows.
507*e65e175bSOded Gabbay 	 */
508*e65e175bSOded Gabbay 	spin_lock(&signal_cs_cmpl->lock);
509*e65e175bSOded Gabbay 
510*e65e175bSOded Gabbay 	if (completion_done(&cs->signal_fence->completion)) {
511*e65e175bSOded Gabbay 		spin_unlock(&signal_cs_cmpl->lock);
512*e65e175bSOded Gabbay 		return -EINVAL;
513*e65e175bSOded Gabbay 	}
514*e65e175bSOded Gabbay 
515*e65e175bSOded Gabbay 	kref_get(&cs_cmpl->hw_sob->kref);
516*e65e175bSOded Gabbay 
517*e65e175bSOded Gabbay 	spin_unlock(&signal_cs_cmpl->lock);
518*e65e175bSOded Gabbay 
519*e65e175bSOded Gabbay 	dev_dbg(hdev->dev,
520*e65e175bSOded Gabbay 		"generate wait CB, sob_id: %d, sob_val: 0x%x, mon_id: %d, q_idx: %d, seq: %llu\n",
521*e65e175bSOded Gabbay 		cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val,
522*e65e175bSOded Gabbay 		prop->base_mon_id, q_idx, cs->sequence);
523*e65e175bSOded Gabbay 
524*e65e175bSOded Gabbay 	wait_prop.data = (void *) job->patched_cb;
525*e65e175bSOded Gabbay 	wait_prop.sob_base = cs_cmpl->hw_sob->sob_id;
526*e65e175bSOded Gabbay 	wait_prop.sob_mask = 0x1;
527*e65e175bSOded Gabbay 	wait_prop.sob_val = cs_cmpl->sob_val;
528*e65e175bSOded Gabbay 	wait_prop.mon_id = prop->base_mon_id;
529*e65e175bSOded Gabbay 	wait_prop.q_idx = q_idx;
530*e65e175bSOded Gabbay 	wait_prop.size = 0;
531*e65e175bSOded Gabbay 
532*e65e175bSOded Gabbay 	hdev->asic_funcs->gen_wait_cb(hdev, &wait_prop);
533*e65e175bSOded Gabbay 
534*e65e175bSOded Gabbay 	mb();
535*e65e175bSOded Gabbay 	hl_fence_put(cs->signal_fence);
536*e65e175bSOded Gabbay 	cs->signal_fence = NULL;
537*e65e175bSOded Gabbay 
538*e65e175bSOded Gabbay 	return 0;
539*e65e175bSOded Gabbay }
540*e65e175bSOded Gabbay 
541*e65e175bSOded Gabbay /*
542*e65e175bSOded Gabbay  * init_signal_wait_cs - initialize a signal/wait CS
543*e65e175bSOded Gabbay  * @cs: pointer to the signal/wait CS
544*e65e175bSOded Gabbay  *
545*e65e175bSOded Gabbay  * H/W queues spinlock should be taken before calling this function
546*e65e175bSOded Gabbay  */
init_signal_wait_cs(struct hl_cs * cs)547*e65e175bSOded Gabbay static int init_signal_wait_cs(struct hl_cs *cs)
548*e65e175bSOded Gabbay {
549*e65e175bSOded Gabbay 	struct hl_ctx *ctx = cs->ctx;
550*e65e175bSOded Gabbay 	struct hl_device *hdev = ctx->hdev;
551*e65e175bSOded Gabbay 	struct hl_cs_job *job;
552*e65e175bSOded Gabbay 	struct hl_cs_compl *cs_cmpl =
553*e65e175bSOded Gabbay 			container_of(cs->fence, struct hl_cs_compl, base_fence);
554*e65e175bSOded Gabbay 	int rc = 0;
555*e65e175bSOded Gabbay 
556*e65e175bSOded Gabbay 	/* There is only one job in a signal/wait CS */
557*e65e175bSOded Gabbay 	job = list_first_entry(&cs->job_list, struct hl_cs_job,
558*e65e175bSOded Gabbay 				cs_node);
559*e65e175bSOded Gabbay 
560*e65e175bSOded Gabbay 	if (cs->type & CS_TYPE_SIGNAL)
561*e65e175bSOded Gabbay 		rc = init_signal_cs(hdev, job, cs_cmpl);
562*e65e175bSOded Gabbay 	else if (cs->type & CS_TYPE_WAIT)
563*e65e175bSOded Gabbay 		rc = init_wait_cs(hdev, cs, job, cs_cmpl);
564*e65e175bSOded Gabbay 
565*e65e175bSOded Gabbay 	return rc;
566*e65e175bSOded Gabbay }
567*e65e175bSOded Gabbay 
encaps_sig_first_staged_cs_handler(struct hl_device * hdev,struct hl_cs * cs)568*e65e175bSOded Gabbay static int encaps_sig_first_staged_cs_handler
569*e65e175bSOded Gabbay 			(struct hl_device *hdev, struct hl_cs *cs)
570*e65e175bSOded Gabbay {
571*e65e175bSOded Gabbay 	struct hl_cs_compl *cs_cmpl =
572*e65e175bSOded Gabbay 			container_of(cs->fence,
573*e65e175bSOded Gabbay 					struct hl_cs_compl, base_fence);
574*e65e175bSOded Gabbay 	struct hl_cs_encaps_sig_handle *encaps_sig_hdl;
575*e65e175bSOded Gabbay 	struct hl_encaps_signals_mgr *mgr;
576*e65e175bSOded Gabbay 	int rc = 0;
577*e65e175bSOded Gabbay 
578*e65e175bSOded Gabbay 	mgr = &cs->ctx->sig_mgr;
579*e65e175bSOded Gabbay 
580*e65e175bSOded Gabbay 	spin_lock(&mgr->lock);
581*e65e175bSOded Gabbay 	encaps_sig_hdl = idr_find(&mgr->handles, cs->encaps_sig_hdl_id);
582*e65e175bSOded Gabbay 	if (encaps_sig_hdl) {
583*e65e175bSOded Gabbay 		/*
584*e65e175bSOded Gabbay 		 * Set handler CS sequence,
585*e65e175bSOded Gabbay 		 * the CS which contains the encapsulated signals.
586*e65e175bSOded Gabbay 		 */
587*e65e175bSOded Gabbay 		encaps_sig_hdl->cs_seq = cs->sequence;
588*e65e175bSOded Gabbay 		/* store the handle and set encaps signal indication,
589*e65e175bSOded Gabbay 		 * to be used later in cs_do_release to put the last
590*e65e175bSOded Gabbay 		 * reference to encaps signals handlers.
591*e65e175bSOded Gabbay 		 */
592*e65e175bSOded Gabbay 		cs_cmpl->encaps_signals = true;
593*e65e175bSOded Gabbay 		cs_cmpl->encaps_sig_hdl = encaps_sig_hdl;
594*e65e175bSOded Gabbay 
595*e65e175bSOded Gabbay 		/* set hw_sob pointer in completion object
596*e65e175bSOded Gabbay 		 * since it's used in cs_do_release flow to put
597*e65e175bSOded Gabbay 		 * refcount to sob
598*e65e175bSOded Gabbay 		 */
599*e65e175bSOded Gabbay 		cs_cmpl->hw_sob = encaps_sig_hdl->hw_sob;
600*e65e175bSOded Gabbay 		cs_cmpl->sob_val = encaps_sig_hdl->pre_sob_val +
601*e65e175bSOded Gabbay 						encaps_sig_hdl->count;
602*e65e175bSOded Gabbay 
603*e65e175bSOded Gabbay 		dev_dbg(hdev->dev, "CS seq (%llu) added to encaps signal handler id (%u), count(%u), qidx(%u), sob(%u), val(%u)\n",
604*e65e175bSOded Gabbay 				cs->sequence, encaps_sig_hdl->id,
605*e65e175bSOded Gabbay 				encaps_sig_hdl->count,
606*e65e175bSOded Gabbay 				encaps_sig_hdl->q_idx,
607*e65e175bSOded Gabbay 				cs_cmpl->hw_sob->sob_id,
608*e65e175bSOded Gabbay 				cs_cmpl->sob_val);
609*e65e175bSOded Gabbay 
610*e65e175bSOded Gabbay 	} else {
611*e65e175bSOded Gabbay 		dev_err(hdev->dev, "encaps handle id(%u) wasn't found!\n",
612*e65e175bSOded Gabbay 				cs->encaps_sig_hdl_id);
613*e65e175bSOded Gabbay 		rc = -EINVAL;
614*e65e175bSOded Gabbay 	}
615*e65e175bSOded Gabbay 
616*e65e175bSOded Gabbay 	spin_unlock(&mgr->lock);
617*e65e175bSOded Gabbay 
618*e65e175bSOded Gabbay 	return rc;
619*e65e175bSOded Gabbay }
620*e65e175bSOded Gabbay 
621*e65e175bSOded Gabbay /*
622*e65e175bSOded Gabbay  * hl_hw_queue_schedule_cs - schedule a command submission
623*e65e175bSOded Gabbay  * @cs: pointer to the CS
624*e65e175bSOded Gabbay  */
hl_hw_queue_schedule_cs(struct hl_cs * cs)625*e65e175bSOded Gabbay int hl_hw_queue_schedule_cs(struct hl_cs *cs)
626*e65e175bSOded Gabbay {
627*e65e175bSOded Gabbay 	enum hl_device_status status;
628*e65e175bSOded Gabbay 	struct hl_cs_counters_atomic *cntr;
629*e65e175bSOded Gabbay 	struct hl_ctx *ctx = cs->ctx;
630*e65e175bSOded Gabbay 	struct hl_device *hdev = ctx->hdev;
631*e65e175bSOded Gabbay 	struct hl_cs_job *job, *tmp;
632*e65e175bSOded Gabbay 	struct hl_hw_queue *q;
633*e65e175bSOded Gabbay 	int rc = 0, i, cq_cnt;
634*e65e175bSOded Gabbay 	bool first_entry;
635*e65e175bSOded Gabbay 	u32 max_queues;
636*e65e175bSOded Gabbay 
637*e65e175bSOded Gabbay 	cntr = &hdev->aggregated_cs_counters;
638*e65e175bSOded Gabbay 
639*e65e175bSOded Gabbay 	hdev->asic_funcs->hw_queues_lock(hdev);
640*e65e175bSOded Gabbay 
641*e65e175bSOded Gabbay 	if (!hl_device_operational(hdev, &status)) {
642*e65e175bSOded Gabbay 		atomic64_inc(&cntr->device_in_reset_drop_cnt);
643*e65e175bSOded Gabbay 		atomic64_inc(&ctx->cs_counters.device_in_reset_drop_cnt);
644*e65e175bSOded Gabbay 		dev_err(hdev->dev,
645*e65e175bSOded Gabbay 			"device is %s, CS rejected!\n", hdev->status[status]);
646*e65e175bSOded Gabbay 		rc = -EPERM;
647*e65e175bSOded Gabbay 		goto out;
648*e65e175bSOded Gabbay 	}
649*e65e175bSOded Gabbay 
650*e65e175bSOded Gabbay 	max_queues = hdev->asic_prop.max_queues;
651*e65e175bSOded Gabbay 
652*e65e175bSOded Gabbay 	q = &hdev->kernel_queues[0];
653*e65e175bSOded Gabbay 	for (i = 0, cq_cnt = 0 ; i < max_queues ; i++, q++) {
654*e65e175bSOded Gabbay 		if (cs->jobs_in_queue_cnt[i]) {
655*e65e175bSOded Gabbay 			switch (q->queue_type) {
656*e65e175bSOded Gabbay 			case QUEUE_TYPE_EXT:
657*e65e175bSOded Gabbay 				rc = ext_queue_sanity_checks(hdev, q,
658*e65e175bSOded Gabbay 						cs->jobs_in_queue_cnt[i],
659*e65e175bSOded Gabbay 						cs_needs_completion(cs) ?
660*e65e175bSOded Gabbay 								true : false);
661*e65e175bSOded Gabbay 				break;
662*e65e175bSOded Gabbay 			case QUEUE_TYPE_INT:
663*e65e175bSOded Gabbay 				rc = int_queue_sanity_checks(hdev, q,
664*e65e175bSOded Gabbay 						cs->jobs_in_queue_cnt[i]);
665*e65e175bSOded Gabbay 				break;
666*e65e175bSOded Gabbay 			case QUEUE_TYPE_HW:
667*e65e175bSOded Gabbay 				rc = hw_queue_sanity_checks(hdev, q,
668*e65e175bSOded Gabbay 						cs->jobs_in_queue_cnt[i]);
669*e65e175bSOded Gabbay 				break;
670*e65e175bSOded Gabbay 			default:
671*e65e175bSOded Gabbay 				dev_err(hdev->dev, "Queue type %d is invalid\n",
672*e65e175bSOded Gabbay 					q->queue_type);
673*e65e175bSOded Gabbay 				rc = -EINVAL;
674*e65e175bSOded Gabbay 				break;
675*e65e175bSOded Gabbay 			}
676*e65e175bSOded Gabbay 
677*e65e175bSOded Gabbay 			if (rc) {
678*e65e175bSOded Gabbay 				atomic64_inc(
679*e65e175bSOded Gabbay 					&ctx->cs_counters.queue_full_drop_cnt);
680*e65e175bSOded Gabbay 				atomic64_inc(&cntr->queue_full_drop_cnt);
681*e65e175bSOded Gabbay 				goto unroll_cq_resv;
682*e65e175bSOded Gabbay 			}
683*e65e175bSOded Gabbay 
684*e65e175bSOded Gabbay 			if (q->queue_type == QUEUE_TYPE_EXT)
685*e65e175bSOded Gabbay 				cq_cnt++;
686*e65e175bSOded Gabbay 		}
687*e65e175bSOded Gabbay 	}
688*e65e175bSOded Gabbay 
689*e65e175bSOded Gabbay 	if ((cs->type == CS_TYPE_SIGNAL) || (cs->type == CS_TYPE_WAIT)) {
690*e65e175bSOded Gabbay 		rc = init_signal_wait_cs(cs);
691*e65e175bSOded Gabbay 		if (rc)
692*e65e175bSOded Gabbay 			goto unroll_cq_resv;
693*e65e175bSOded Gabbay 	} else if (cs->type == CS_TYPE_COLLECTIVE_WAIT) {
694*e65e175bSOded Gabbay 		rc = hdev->asic_funcs->collective_wait_init_cs(cs);
695*e65e175bSOded Gabbay 		if (rc)
696*e65e175bSOded Gabbay 			goto unroll_cq_resv;
697*e65e175bSOded Gabbay 	}
698*e65e175bSOded Gabbay 
699*e65e175bSOded Gabbay 	rc = hdev->asic_funcs->pre_schedule_cs(cs);
700*e65e175bSOded Gabbay 	if (rc) {
701*e65e175bSOded Gabbay 		dev_err(hdev->dev,
702*e65e175bSOded Gabbay 			"Failed in pre-submission operations of CS %d.%llu\n",
703*e65e175bSOded Gabbay 			ctx->asid, cs->sequence);
704*e65e175bSOded Gabbay 		goto unroll_cq_resv;
705*e65e175bSOded Gabbay 	}
706*e65e175bSOded Gabbay 
707*e65e175bSOded Gabbay 	hdev->shadow_cs_queue[cs->sequence &
708*e65e175bSOded Gabbay 				(hdev->asic_prop.max_pending_cs - 1)] = cs;
709*e65e175bSOded Gabbay 
710*e65e175bSOded Gabbay 	if (cs->encaps_signals && cs->staged_first) {
711*e65e175bSOded Gabbay 		rc = encaps_sig_first_staged_cs_handler(hdev, cs);
712*e65e175bSOded Gabbay 		if (rc)
713*e65e175bSOded Gabbay 			goto unroll_cq_resv;
714*e65e175bSOded Gabbay 	}
715*e65e175bSOded Gabbay 
716*e65e175bSOded Gabbay 	spin_lock(&hdev->cs_mirror_lock);
717*e65e175bSOded Gabbay 
718*e65e175bSOded Gabbay 	/* Verify staged CS exists and add to the staged list */
719*e65e175bSOded Gabbay 	if (cs->staged_cs && !cs->staged_first) {
720*e65e175bSOded Gabbay 		struct hl_cs *staged_cs;
721*e65e175bSOded Gabbay 
722*e65e175bSOded Gabbay 		staged_cs = hl_staged_cs_find_first(hdev, cs->staged_sequence);
723*e65e175bSOded Gabbay 		if (!staged_cs) {
724*e65e175bSOded Gabbay 			dev_err(hdev->dev,
725*e65e175bSOded Gabbay 				"Cannot find staged submission sequence %llu",
726*e65e175bSOded Gabbay 				cs->staged_sequence);
727*e65e175bSOded Gabbay 			rc = -EINVAL;
728*e65e175bSOded Gabbay 			goto unlock_cs_mirror;
729*e65e175bSOded Gabbay 		}
730*e65e175bSOded Gabbay 
731*e65e175bSOded Gabbay 		if (is_staged_cs_last_exists(hdev, staged_cs)) {
732*e65e175bSOded Gabbay 			dev_err(hdev->dev,
733*e65e175bSOded Gabbay 				"Staged submission sequence %llu already submitted",
734*e65e175bSOded Gabbay 				cs->staged_sequence);
735*e65e175bSOded Gabbay 			rc = -EINVAL;
736*e65e175bSOded Gabbay 			goto unlock_cs_mirror;
737*e65e175bSOded Gabbay 		}
738*e65e175bSOded Gabbay 
739*e65e175bSOded Gabbay 		list_add_tail(&cs->staged_cs_node, &staged_cs->staged_cs_node);
740*e65e175bSOded Gabbay 
741*e65e175bSOded Gabbay 		/* update stream map of the first CS */
742*e65e175bSOded Gabbay 		if (hdev->supports_wait_for_multi_cs)
743*e65e175bSOded Gabbay 			staged_cs->fence->stream_master_qid_map |=
744*e65e175bSOded Gabbay 					cs->fence->stream_master_qid_map;
745*e65e175bSOded Gabbay 	}
746*e65e175bSOded Gabbay 
747*e65e175bSOded Gabbay 	list_add_tail(&cs->mirror_node, &hdev->cs_mirror_list);
748*e65e175bSOded Gabbay 
749*e65e175bSOded Gabbay 	/* Queue TDR if the CS is the first entry and if timeout is wanted */
750*e65e175bSOded Gabbay 	first_entry = list_first_entry(&hdev->cs_mirror_list,
751*e65e175bSOded Gabbay 					struct hl_cs, mirror_node) == cs;
752*e65e175bSOded Gabbay 	if ((hdev->timeout_jiffies != MAX_SCHEDULE_TIMEOUT) &&
753*e65e175bSOded Gabbay 				first_entry && cs_needs_timeout(cs)) {
754*e65e175bSOded Gabbay 		cs->tdr_active = true;
755*e65e175bSOded Gabbay 		schedule_delayed_work(&cs->work_tdr, cs->timeout_jiffies);
756*e65e175bSOded Gabbay 
757*e65e175bSOded Gabbay 	}
758*e65e175bSOded Gabbay 
759*e65e175bSOded Gabbay 	spin_unlock(&hdev->cs_mirror_lock);
760*e65e175bSOded Gabbay 
761*e65e175bSOded Gabbay 	list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node)
762*e65e175bSOded Gabbay 		switch (job->queue_type) {
763*e65e175bSOded Gabbay 		case QUEUE_TYPE_EXT:
764*e65e175bSOded Gabbay 			ext_queue_schedule_job(job);
765*e65e175bSOded Gabbay 			break;
766*e65e175bSOded Gabbay 		case QUEUE_TYPE_INT:
767*e65e175bSOded Gabbay 			int_queue_schedule_job(job);
768*e65e175bSOded Gabbay 			break;
769*e65e175bSOded Gabbay 		case QUEUE_TYPE_HW:
770*e65e175bSOded Gabbay 			hw_queue_schedule_job(job);
771*e65e175bSOded Gabbay 			break;
772*e65e175bSOded Gabbay 		default:
773*e65e175bSOded Gabbay 			break;
774*e65e175bSOded Gabbay 		}
775*e65e175bSOded Gabbay 
776*e65e175bSOded Gabbay 	cs->submitted = true;
777*e65e175bSOded Gabbay 
778*e65e175bSOded Gabbay 	goto out;
779*e65e175bSOded Gabbay 
780*e65e175bSOded Gabbay unlock_cs_mirror:
781*e65e175bSOded Gabbay 	spin_unlock(&hdev->cs_mirror_lock);
782*e65e175bSOded Gabbay unroll_cq_resv:
783*e65e175bSOded Gabbay 	q = &hdev->kernel_queues[0];
784*e65e175bSOded Gabbay 	for (i = 0 ; (i < max_queues) && (cq_cnt > 0) ; i++, q++) {
785*e65e175bSOded Gabbay 		if ((q->queue_type == QUEUE_TYPE_EXT) &&
786*e65e175bSOded Gabbay 						(cs->jobs_in_queue_cnt[i])) {
787*e65e175bSOded Gabbay 			atomic_t *free_slots =
788*e65e175bSOded Gabbay 				&hdev->completion_queue[i].free_slots_cnt;
789*e65e175bSOded Gabbay 			atomic_add(cs->jobs_in_queue_cnt[i], free_slots);
790*e65e175bSOded Gabbay 			cq_cnt--;
791*e65e175bSOded Gabbay 		}
792*e65e175bSOded Gabbay 	}
793*e65e175bSOded Gabbay 
794*e65e175bSOded Gabbay out:
795*e65e175bSOded Gabbay 	hdev->asic_funcs->hw_queues_unlock(hdev);
796*e65e175bSOded Gabbay 
797*e65e175bSOded Gabbay 	return rc;
798*e65e175bSOded Gabbay }
799*e65e175bSOded Gabbay 
800*e65e175bSOded Gabbay /*
801*e65e175bSOded Gabbay  * hl_hw_queue_inc_ci_kernel - increment ci for kernel's queue
802*e65e175bSOded Gabbay  *
803*e65e175bSOded Gabbay  * @hdev: pointer to hl_device structure
804*e65e175bSOded Gabbay  * @hw_queue_id: which queue to increment its ci
805*e65e175bSOded Gabbay  */
hl_hw_queue_inc_ci_kernel(struct hl_device * hdev,u32 hw_queue_id)806*e65e175bSOded Gabbay void hl_hw_queue_inc_ci_kernel(struct hl_device *hdev, u32 hw_queue_id)
807*e65e175bSOded Gabbay {
808*e65e175bSOded Gabbay 	struct hl_hw_queue *q = &hdev->kernel_queues[hw_queue_id];
809*e65e175bSOded Gabbay 
810*e65e175bSOded Gabbay 	atomic_inc(&q->ci);
811*e65e175bSOded Gabbay }
812*e65e175bSOded Gabbay 
ext_and_cpu_queue_init(struct hl_device * hdev,struct hl_hw_queue * q,bool is_cpu_queue)813*e65e175bSOded Gabbay static int ext_and_cpu_queue_init(struct hl_device *hdev, struct hl_hw_queue *q,
814*e65e175bSOded Gabbay 					bool is_cpu_queue)
815*e65e175bSOded Gabbay {
816*e65e175bSOded Gabbay 	void *p;
817*e65e175bSOded Gabbay 	int rc;
818*e65e175bSOded Gabbay 
819*e65e175bSOded Gabbay 	if (is_cpu_queue)
820*e65e175bSOded Gabbay 		p = hl_cpu_accessible_dma_pool_alloc(hdev, HL_QUEUE_SIZE_IN_BYTES, &q->bus_address);
821*e65e175bSOded Gabbay 	else
822*e65e175bSOded Gabbay 		p = hl_asic_dma_alloc_coherent(hdev, HL_QUEUE_SIZE_IN_BYTES, &q->bus_address,
823*e65e175bSOded Gabbay 						GFP_KERNEL | __GFP_ZERO);
824*e65e175bSOded Gabbay 	if (!p)
825*e65e175bSOded Gabbay 		return -ENOMEM;
826*e65e175bSOded Gabbay 
827*e65e175bSOded Gabbay 	q->kernel_address = p;
828*e65e175bSOded Gabbay 
829*e65e175bSOded Gabbay 	q->shadow_queue = kmalloc_array(HL_QUEUE_LENGTH, sizeof(struct hl_cs_job *), GFP_KERNEL);
830*e65e175bSOded Gabbay 	if (!q->shadow_queue) {
831*e65e175bSOded Gabbay 		dev_err(hdev->dev,
832*e65e175bSOded Gabbay 			"Failed to allocate shadow queue for H/W queue %d\n",
833*e65e175bSOded Gabbay 			q->hw_queue_id);
834*e65e175bSOded Gabbay 		rc = -ENOMEM;
835*e65e175bSOded Gabbay 		goto free_queue;
836*e65e175bSOded Gabbay 	}
837*e65e175bSOded Gabbay 
838*e65e175bSOded Gabbay 	/* Make sure read/write pointers are initialized to start of queue */
839*e65e175bSOded Gabbay 	atomic_set(&q->ci, 0);
840*e65e175bSOded Gabbay 	q->pi = 0;
841*e65e175bSOded Gabbay 
842*e65e175bSOded Gabbay 	return 0;
843*e65e175bSOded Gabbay 
844*e65e175bSOded Gabbay free_queue:
845*e65e175bSOded Gabbay 	if (is_cpu_queue)
846*e65e175bSOded Gabbay 		hl_cpu_accessible_dma_pool_free(hdev, HL_QUEUE_SIZE_IN_BYTES, q->kernel_address);
847*e65e175bSOded Gabbay 	else
848*e65e175bSOded Gabbay 		hl_asic_dma_free_coherent(hdev, HL_QUEUE_SIZE_IN_BYTES, q->kernel_address,
849*e65e175bSOded Gabbay 						q->bus_address);
850*e65e175bSOded Gabbay 
851*e65e175bSOded Gabbay 	return rc;
852*e65e175bSOded Gabbay }
853*e65e175bSOded Gabbay 
int_queue_init(struct hl_device * hdev,struct hl_hw_queue * q)854*e65e175bSOded Gabbay static int int_queue_init(struct hl_device *hdev, struct hl_hw_queue *q)
855*e65e175bSOded Gabbay {
856*e65e175bSOded Gabbay 	void *p;
857*e65e175bSOded Gabbay 
858*e65e175bSOded Gabbay 	p = hdev->asic_funcs->get_int_queue_base(hdev, q->hw_queue_id,
859*e65e175bSOded Gabbay 					&q->bus_address, &q->int_queue_len);
860*e65e175bSOded Gabbay 	if (!p) {
861*e65e175bSOded Gabbay 		dev_err(hdev->dev,
862*e65e175bSOded Gabbay 			"Failed to get base address for internal queue %d\n",
863*e65e175bSOded Gabbay 			q->hw_queue_id);
864*e65e175bSOded Gabbay 		return -EFAULT;
865*e65e175bSOded Gabbay 	}
866*e65e175bSOded Gabbay 
867*e65e175bSOded Gabbay 	q->kernel_address = p;
868*e65e175bSOded Gabbay 	q->pi = 0;
869*e65e175bSOded Gabbay 	atomic_set(&q->ci, 0);
870*e65e175bSOded Gabbay 
871*e65e175bSOded Gabbay 	return 0;
872*e65e175bSOded Gabbay }
873*e65e175bSOded Gabbay 
cpu_queue_init(struct hl_device * hdev,struct hl_hw_queue * q)874*e65e175bSOded Gabbay static int cpu_queue_init(struct hl_device *hdev, struct hl_hw_queue *q)
875*e65e175bSOded Gabbay {
876*e65e175bSOded Gabbay 	return ext_and_cpu_queue_init(hdev, q, true);
877*e65e175bSOded Gabbay }
878*e65e175bSOded Gabbay 
ext_queue_init(struct hl_device * hdev,struct hl_hw_queue * q)879*e65e175bSOded Gabbay static int ext_queue_init(struct hl_device *hdev, struct hl_hw_queue *q)
880*e65e175bSOded Gabbay {
881*e65e175bSOded Gabbay 	return ext_and_cpu_queue_init(hdev, q, false);
882*e65e175bSOded Gabbay }
883*e65e175bSOded Gabbay 
hw_queue_init(struct hl_device * hdev,struct hl_hw_queue * q)884*e65e175bSOded Gabbay static int hw_queue_init(struct hl_device *hdev, struct hl_hw_queue *q)
885*e65e175bSOded Gabbay {
886*e65e175bSOded Gabbay 	void *p;
887*e65e175bSOded Gabbay 
888*e65e175bSOded Gabbay 	p = hl_asic_dma_alloc_coherent(hdev, HL_QUEUE_SIZE_IN_BYTES, &q->bus_address,
889*e65e175bSOded Gabbay 					GFP_KERNEL | __GFP_ZERO);
890*e65e175bSOded Gabbay 	if (!p)
891*e65e175bSOded Gabbay 		return -ENOMEM;
892*e65e175bSOded Gabbay 
893*e65e175bSOded Gabbay 	q->kernel_address = p;
894*e65e175bSOded Gabbay 
895*e65e175bSOded Gabbay 	/* Make sure read/write pointers are initialized to start of queue */
896*e65e175bSOded Gabbay 	atomic_set(&q->ci, 0);
897*e65e175bSOded Gabbay 	q->pi = 0;
898*e65e175bSOded Gabbay 
899*e65e175bSOded Gabbay 	return 0;
900*e65e175bSOded Gabbay }
901*e65e175bSOded Gabbay 
sync_stream_queue_init(struct hl_device * hdev,u32 q_idx)902*e65e175bSOded Gabbay static void sync_stream_queue_init(struct hl_device *hdev, u32 q_idx)
903*e65e175bSOded Gabbay {
904*e65e175bSOded Gabbay 	struct hl_sync_stream_properties *sync_stream_prop;
905*e65e175bSOded Gabbay 	struct asic_fixed_properties *prop = &hdev->asic_prop;
906*e65e175bSOded Gabbay 	struct hl_hw_sob *hw_sob;
907*e65e175bSOded Gabbay 	int sob, reserved_mon_idx, queue_idx;
908*e65e175bSOded Gabbay 
909*e65e175bSOded Gabbay 	sync_stream_prop = &hdev->kernel_queues[q_idx].sync_stream_prop;
910*e65e175bSOded Gabbay 
911*e65e175bSOded Gabbay 	/* We use 'collective_mon_idx' as a running index in order to reserve
912*e65e175bSOded Gabbay 	 * monitors for collective master/slave queues.
913*e65e175bSOded Gabbay 	 * collective master queue gets 2 reserved monitors
914*e65e175bSOded Gabbay 	 * collective slave queue gets 1 reserved monitor
915*e65e175bSOded Gabbay 	 */
916*e65e175bSOded Gabbay 	if (hdev->kernel_queues[q_idx].collective_mode ==
917*e65e175bSOded Gabbay 			HL_COLLECTIVE_MASTER) {
918*e65e175bSOded Gabbay 		reserved_mon_idx = hdev->collective_mon_idx;
919*e65e175bSOded Gabbay 
920*e65e175bSOded Gabbay 		/* reserve the first monitor for collective master queue */
921*e65e175bSOded Gabbay 		sync_stream_prop->collective_mstr_mon_id[0] =
922*e65e175bSOded Gabbay 			prop->collective_first_mon + reserved_mon_idx;
923*e65e175bSOded Gabbay 
924*e65e175bSOded Gabbay 		/* reserve the second monitor for collective master queue */
925*e65e175bSOded Gabbay 		sync_stream_prop->collective_mstr_mon_id[1] =
926*e65e175bSOded Gabbay 			prop->collective_first_mon + reserved_mon_idx + 1;
927*e65e175bSOded Gabbay 
928*e65e175bSOded Gabbay 		hdev->collective_mon_idx += HL_COLLECTIVE_RSVD_MSTR_MONS;
929*e65e175bSOded Gabbay 	} else if (hdev->kernel_queues[q_idx].collective_mode ==
930*e65e175bSOded Gabbay 			HL_COLLECTIVE_SLAVE) {
931*e65e175bSOded Gabbay 		reserved_mon_idx = hdev->collective_mon_idx++;
932*e65e175bSOded Gabbay 
933*e65e175bSOded Gabbay 		/* reserve a monitor for collective slave queue */
934*e65e175bSOded Gabbay 		sync_stream_prop->collective_slave_mon_id =
935*e65e175bSOded Gabbay 			prop->collective_first_mon + reserved_mon_idx;
936*e65e175bSOded Gabbay 	}
937*e65e175bSOded Gabbay 
938*e65e175bSOded Gabbay 	if (!hdev->kernel_queues[q_idx].supports_sync_stream)
939*e65e175bSOded Gabbay 		return;
940*e65e175bSOded Gabbay 
941*e65e175bSOded Gabbay 	queue_idx = hdev->sync_stream_queue_idx++;
942*e65e175bSOded Gabbay 
943*e65e175bSOded Gabbay 	sync_stream_prop->base_sob_id = prop->sync_stream_first_sob +
944*e65e175bSOded Gabbay 			(queue_idx * HL_RSVD_SOBS);
945*e65e175bSOded Gabbay 	sync_stream_prop->base_mon_id = prop->sync_stream_first_mon +
946*e65e175bSOded Gabbay 			(queue_idx * HL_RSVD_MONS);
947*e65e175bSOded Gabbay 	sync_stream_prop->next_sob_val = 1;
948*e65e175bSOded Gabbay 	sync_stream_prop->curr_sob_offset = 0;
949*e65e175bSOded Gabbay 
950*e65e175bSOded Gabbay 	for (sob = 0 ; sob < HL_RSVD_SOBS ; sob++) {
951*e65e175bSOded Gabbay 		hw_sob = &sync_stream_prop->hw_sob[sob];
952*e65e175bSOded Gabbay 		hw_sob->hdev = hdev;
953*e65e175bSOded Gabbay 		hw_sob->sob_id = sync_stream_prop->base_sob_id + sob;
954*e65e175bSOded Gabbay 		hw_sob->sob_addr =
955*e65e175bSOded Gabbay 			hdev->asic_funcs->get_sob_addr(hdev, hw_sob->sob_id);
956*e65e175bSOded Gabbay 		hw_sob->q_idx = q_idx;
957*e65e175bSOded Gabbay 		kref_init(&hw_sob->kref);
958*e65e175bSOded Gabbay 	}
959*e65e175bSOded Gabbay }
960*e65e175bSOded Gabbay 
sync_stream_queue_reset(struct hl_device * hdev,u32 q_idx)961*e65e175bSOded Gabbay static void sync_stream_queue_reset(struct hl_device *hdev, u32 q_idx)
962*e65e175bSOded Gabbay {
963*e65e175bSOded Gabbay 	struct hl_sync_stream_properties *prop =
964*e65e175bSOded Gabbay 			&hdev->kernel_queues[q_idx].sync_stream_prop;
965*e65e175bSOded Gabbay 
966*e65e175bSOded Gabbay 	/*
967*e65e175bSOded Gabbay 	 * In case we got here due to a stuck CS, the refcnt might be bigger
968*e65e175bSOded Gabbay 	 * than 1 and therefore we reset it.
969*e65e175bSOded Gabbay 	 */
970*e65e175bSOded Gabbay 	kref_init(&prop->hw_sob[prop->curr_sob_offset].kref);
971*e65e175bSOded Gabbay 	prop->curr_sob_offset = 0;
972*e65e175bSOded Gabbay 	prop->next_sob_val = 1;
973*e65e175bSOded Gabbay }
974*e65e175bSOded Gabbay 
975*e65e175bSOded Gabbay /*
976*e65e175bSOded Gabbay  * queue_init - main initialization function for H/W queue object
977*e65e175bSOded Gabbay  *
978*e65e175bSOded Gabbay  * @hdev: pointer to hl_device device structure
979*e65e175bSOded Gabbay  * @q: pointer to hl_hw_queue queue structure
980*e65e175bSOded Gabbay  * @hw_queue_id: The id of the H/W queue
981*e65e175bSOded Gabbay  *
982*e65e175bSOded Gabbay  * Allocate dma-able memory for the queue and initialize fields
983*e65e175bSOded Gabbay  * Returns 0 on success
984*e65e175bSOded Gabbay  */
queue_init(struct hl_device * hdev,struct hl_hw_queue * q,u32 hw_queue_id)985*e65e175bSOded Gabbay static int queue_init(struct hl_device *hdev, struct hl_hw_queue *q,
986*e65e175bSOded Gabbay 			u32 hw_queue_id)
987*e65e175bSOded Gabbay {
988*e65e175bSOded Gabbay 	int rc;
989*e65e175bSOded Gabbay 
990*e65e175bSOded Gabbay 	q->hw_queue_id = hw_queue_id;
991*e65e175bSOded Gabbay 
992*e65e175bSOded Gabbay 	switch (q->queue_type) {
993*e65e175bSOded Gabbay 	case QUEUE_TYPE_EXT:
994*e65e175bSOded Gabbay 		rc = ext_queue_init(hdev, q);
995*e65e175bSOded Gabbay 		break;
996*e65e175bSOded Gabbay 	case QUEUE_TYPE_INT:
997*e65e175bSOded Gabbay 		rc = int_queue_init(hdev, q);
998*e65e175bSOded Gabbay 		break;
999*e65e175bSOded Gabbay 	case QUEUE_TYPE_CPU:
1000*e65e175bSOded Gabbay 		rc = cpu_queue_init(hdev, q);
1001*e65e175bSOded Gabbay 		break;
1002*e65e175bSOded Gabbay 	case QUEUE_TYPE_HW:
1003*e65e175bSOded Gabbay 		rc = hw_queue_init(hdev, q);
1004*e65e175bSOded Gabbay 		break;
1005*e65e175bSOded Gabbay 	case QUEUE_TYPE_NA:
1006*e65e175bSOded Gabbay 		q->valid = 0;
1007*e65e175bSOded Gabbay 		return 0;
1008*e65e175bSOded Gabbay 	default:
1009*e65e175bSOded Gabbay 		dev_crit(hdev->dev, "wrong queue type %d during init\n",
1010*e65e175bSOded Gabbay 			q->queue_type);
1011*e65e175bSOded Gabbay 		rc = -EINVAL;
1012*e65e175bSOded Gabbay 		break;
1013*e65e175bSOded Gabbay 	}
1014*e65e175bSOded Gabbay 
1015*e65e175bSOded Gabbay 	sync_stream_queue_init(hdev, q->hw_queue_id);
1016*e65e175bSOded Gabbay 
1017*e65e175bSOded Gabbay 	if (rc)
1018*e65e175bSOded Gabbay 		return rc;
1019*e65e175bSOded Gabbay 
1020*e65e175bSOded Gabbay 	q->valid = 1;
1021*e65e175bSOded Gabbay 
1022*e65e175bSOded Gabbay 	return 0;
1023*e65e175bSOded Gabbay }
1024*e65e175bSOded Gabbay 
1025*e65e175bSOded Gabbay /*
1026*e65e175bSOded Gabbay  * hw_queue_fini - destroy queue
1027*e65e175bSOded Gabbay  *
1028*e65e175bSOded Gabbay  * @hdev: pointer to hl_device device structure
1029*e65e175bSOded Gabbay  * @q: pointer to hl_hw_queue queue structure
1030*e65e175bSOded Gabbay  *
1031*e65e175bSOded Gabbay  * Free the queue memory
1032*e65e175bSOded Gabbay  */
queue_fini(struct hl_device * hdev,struct hl_hw_queue * q)1033*e65e175bSOded Gabbay static void queue_fini(struct hl_device *hdev, struct hl_hw_queue *q)
1034*e65e175bSOded Gabbay {
1035*e65e175bSOded Gabbay 	if (!q->valid)
1036*e65e175bSOded Gabbay 		return;
1037*e65e175bSOded Gabbay 
1038*e65e175bSOded Gabbay 	/*
1039*e65e175bSOded Gabbay 	 * If we arrived here, there are no jobs waiting on this queue
1040*e65e175bSOded Gabbay 	 * so we can safely remove it.
1041*e65e175bSOded Gabbay 	 * This is because this function can only called when:
1042*e65e175bSOded Gabbay 	 * 1. Either a context is deleted, which only can occur if all its
1043*e65e175bSOded Gabbay 	 *    jobs were finished
1044*e65e175bSOded Gabbay 	 * 2. A context wasn't able to be created due to failure or timeout,
1045*e65e175bSOded Gabbay 	 *    which means there are no jobs on the queue yet
1046*e65e175bSOded Gabbay 	 *
1047*e65e175bSOded Gabbay 	 * The only exception are the queues of the kernel context, but
1048*e65e175bSOded Gabbay 	 * if they are being destroyed, it means that the entire module is
1049*e65e175bSOded Gabbay 	 * being removed. If the module is removed, it means there is no open
1050*e65e175bSOded Gabbay 	 * user context. It also means that if a job was submitted by
1051*e65e175bSOded Gabbay 	 * the kernel driver (e.g. context creation), the job itself was
1052*e65e175bSOded Gabbay 	 * released by the kernel driver when a timeout occurred on its
1053*e65e175bSOded Gabbay 	 * Completion. Thus, we don't need to release it again.
1054*e65e175bSOded Gabbay 	 */
1055*e65e175bSOded Gabbay 
1056*e65e175bSOded Gabbay 	if (q->queue_type == QUEUE_TYPE_INT)
1057*e65e175bSOded Gabbay 		return;
1058*e65e175bSOded Gabbay 
1059*e65e175bSOded Gabbay 	kfree(q->shadow_queue);
1060*e65e175bSOded Gabbay 
1061*e65e175bSOded Gabbay 	if (q->queue_type == QUEUE_TYPE_CPU)
1062*e65e175bSOded Gabbay 		hl_cpu_accessible_dma_pool_free(hdev, HL_QUEUE_SIZE_IN_BYTES, q->kernel_address);
1063*e65e175bSOded Gabbay 	else
1064*e65e175bSOded Gabbay 		hl_asic_dma_free_coherent(hdev, HL_QUEUE_SIZE_IN_BYTES, q->kernel_address,
1065*e65e175bSOded Gabbay 						q->bus_address);
1066*e65e175bSOded Gabbay }
1067*e65e175bSOded Gabbay 
hl_hw_queues_create(struct hl_device * hdev)1068*e65e175bSOded Gabbay int hl_hw_queues_create(struct hl_device *hdev)
1069*e65e175bSOded Gabbay {
1070*e65e175bSOded Gabbay 	struct asic_fixed_properties *asic = &hdev->asic_prop;
1071*e65e175bSOded Gabbay 	struct hl_hw_queue *q;
1072*e65e175bSOded Gabbay 	int i, rc, q_ready_cnt;
1073*e65e175bSOded Gabbay 
1074*e65e175bSOded Gabbay 	hdev->kernel_queues = kcalloc(asic->max_queues,
1075*e65e175bSOded Gabbay 				sizeof(*hdev->kernel_queues), GFP_KERNEL);
1076*e65e175bSOded Gabbay 
1077*e65e175bSOded Gabbay 	if (!hdev->kernel_queues) {
1078*e65e175bSOded Gabbay 		dev_err(hdev->dev, "Not enough memory for H/W queues\n");
1079*e65e175bSOded Gabbay 		return -ENOMEM;
1080*e65e175bSOded Gabbay 	}
1081*e65e175bSOded Gabbay 
1082*e65e175bSOded Gabbay 	/* Initialize the H/W queues */
1083*e65e175bSOded Gabbay 	for (i = 0, q_ready_cnt = 0, q = hdev->kernel_queues;
1084*e65e175bSOded Gabbay 			i < asic->max_queues ; i++, q_ready_cnt++, q++) {
1085*e65e175bSOded Gabbay 
1086*e65e175bSOded Gabbay 		q->queue_type = asic->hw_queues_props[i].type;
1087*e65e175bSOded Gabbay 		q->supports_sync_stream =
1088*e65e175bSOded Gabbay 				asic->hw_queues_props[i].supports_sync_stream;
1089*e65e175bSOded Gabbay 		q->collective_mode = asic->hw_queues_props[i].collective_mode;
1090*e65e175bSOded Gabbay 		rc = queue_init(hdev, q, i);
1091*e65e175bSOded Gabbay 		if (rc) {
1092*e65e175bSOded Gabbay 			dev_err(hdev->dev,
1093*e65e175bSOded Gabbay 				"failed to initialize queue %d\n", i);
1094*e65e175bSOded Gabbay 			goto release_queues;
1095*e65e175bSOded Gabbay 		}
1096*e65e175bSOded Gabbay 	}
1097*e65e175bSOded Gabbay 
1098*e65e175bSOded Gabbay 	return 0;
1099*e65e175bSOded Gabbay 
1100*e65e175bSOded Gabbay release_queues:
1101*e65e175bSOded Gabbay 	for (i = 0, q = hdev->kernel_queues ; i < q_ready_cnt ; i++, q++)
1102*e65e175bSOded Gabbay 		queue_fini(hdev, q);
1103*e65e175bSOded Gabbay 
1104*e65e175bSOded Gabbay 	kfree(hdev->kernel_queues);
1105*e65e175bSOded Gabbay 
1106*e65e175bSOded Gabbay 	return rc;
1107*e65e175bSOded Gabbay }
1108*e65e175bSOded Gabbay 
hl_hw_queues_destroy(struct hl_device * hdev)1109*e65e175bSOded Gabbay void hl_hw_queues_destroy(struct hl_device *hdev)
1110*e65e175bSOded Gabbay {
1111*e65e175bSOded Gabbay 	struct hl_hw_queue *q;
1112*e65e175bSOded Gabbay 	u32 max_queues = hdev->asic_prop.max_queues;
1113*e65e175bSOded Gabbay 	int i;
1114*e65e175bSOded Gabbay 
1115*e65e175bSOded Gabbay 	for (i = 0, q = hdev->kernel_queues ; i < max_queues ; i++, q++)
1116*e65e175bSOded Gabbay 		queue_fini(hdev, q);
1117*e65e175bSOded Gabbay 
1118*e65e175bSOded Gabbay 	kfree(hdev->kernel_queues);
1119*e65e175bSOded Gabbay }
1120*e65e175bSOded Gabbay 
hl_hw_queue_reset(struct hl_device * hdev,bool hard_reset)1121*e65e175bSOded Gabbay void hl_hw_queue_reset(struct hl_device *hdev, bool hard_reset)
1122*e65e175bSOded Gabbay {
1123*e65e175bSOded Gabbay 	struct hl_hw_queue *q;
1124*e65e175bSOded Gabbay 	u32 max_queues = hdev->asic_prop.max_queues;
1125*e65e175bSOded Gabbay 	int i;
1126*e65e175bSOded Gabbay 
1127*e65e175bSOded Gabbay 	for (i = 0, q = hdev->kernel_queues ; i < max_queues ; i++, q++) {
1128*e65e175bSOded Gabbay 		if ((!q->valid) ||
1129*e65e175bSOded Gabbay 			((!hard_reset) && (q->queue_type == QUEUE_TYPE_CPU)))
1130*e65e175bSOded Gabbay 			continue;
1131*e65e175bSOded Gabbay 		q->pi = 0;
1132*e65e175bSOded Gabbay 		atomic_set(&q->ci, 0);
1133*e65e175bSOded Gabbay 
1134*e65e175bSOded Gabbay 		if (q->supports_sync_stream)
1135*e65e175bSOded Gabbay 			sync_stream_queue_reset(hdev, q->hw_queue_id);
1136*e65e175bSOded Gabbay 	}
1137*e65e175bSOded Gabbay }
1138