1*e65e175bSOded Gabbay // SPDX-License-Identifier: GPL-2.0
2*e65e175bSOded Gabbay
3*e65e175bSOded Gabbay /*
4*e65e175bSOded Gabbay * Copyright 2016-2019 HabanaLabs, Ltd.
5*e65e175bSOded Gabbay * All Rights Reserved.
6*e65e175bSOded Gabbay */
7*e65e175bSOded Gabbay
8*e65e175bSOded Gabbay #include "habanalabs.h"
9*e65e175bSOded Gabbay
10*e65e175bSOded Gabbay #include <linux/slab.h>
11*e65e175bSOded Gabbay
12*e65e175bSOded Gabbay /*
13*e65e175bSOded Gabbay * hl_queue_add_ptr - add to pi or ci and checks if it wraps around
14*e65e175bSOded Gabbay *
15*e65e175bSOded Gabbay * @ptr: the current pi/ci value
16*e65e175bSOded Gabbay * @val: the amount to add
17*e65e175bSOded Gabbay *
18*e65e175bSOded Gabbay * Add val to ptr. It can go until twice the queue length.
19*e65e175bSOded Gabbay */
hl_hw_queue_add_ptr(u32 ptr,u16 val)20*e65e175bSOded Gabbay inline u32 hl_hw_queue_add_ptr(u32 ptr, u16 val)
21*e65e175bSOded Gabbay {
22*e65e175bSOded Gabbay ptr += val;
23*e65e175bSOded Gabbay ptr &= ((HL_QUEUE_LENGTH << 1) - 1);
24*e65e175bSOded Gabbay return ptr;
25*e65e175bSOded Gabbay }
queue_ci_get(atomic_t * ci,u32 queue_len)26*e65e175bSOded Gabbay static inline int queue_ci_get(atomic_t *ci, u32 queue_len)
27*e65e175bSOded Gabbay {
28*e65e175bSOded Gabbay return atomic_read(ci) & ((queue_len << 1) - 1);
29*e65e175bSOded Gabbay }
30*e65e175bSOded Gabbay
queue_free_slots(struct hl_hw_queue * q,u32 queue_len)31*e65e175bSOded Gabbay static inline int queue_free_slots(struct hl_hw_queue *q, u32 queue_len)
32*e65e175bSOded Gabbay {
33*e65e175bSOded Gabbay int delta = (q->pi - queue_ci_get(&q->ci, queue_len));
34*e65e175bSOded Gabbay
35*e65e175bSOded Gabbay if (delta >= 0)
36*e65e175bSOded Gabbay return (queue_len - delta);
37*e65e175bSOded Gabbay else
38*e65e175bSOded Gabbay return (abs(delta) - queue_len);
39*e65e175bSOded Gabbay }
40*e65e175bSOded Gabbay
hl_hw_queue_update_ci(struct hl_cs * cs)41*e65e175bSOded Gabbay void hl_hw_queue_update_ci(struct hl_cs *cs)
42*e65e175bSOded Gabbay {
43*e65e175bSOded Gabbay struct hl_device *hdev = cs->ctx->hdev;
44*e65e175bSOded Gabbay struct hl_hw_queue *q;
45*e65e175bSOded Gabbay int i;
46*e65e175bSOded Gabbay
47*e65e175bSOded Gabbay if (hdev->disabled)
48*e65e175bSOded Gabbay return;
49*e65e175bSOded Gabbay
50*e65e175bSOded Gabbay q = &hdev->kernel_queues[0];
51*e65e175bSOded Gabbay
52*e65e175bSOded Gabbay /* There are no internal queues if H/W queues are being used */
53*e65e175bSOded Gabbay if (!hdev->asic_prop.max_queues || q->queue_type == QUEUE_TYPE_HW)
54*e65e175bSOded Gabbay return;
55*e65e175bSOded Gabbay
56*e65e175bSOded Gabbay /* We must increment CI for every queue that will never get a
57*e65e175bSOded Gabbay * completion, there are 2 scenarios this can happen:
58*e65e175bSOded Gabbay * 1. All queues of a non completion CS will never get a completion.
59*e65e175bSOded Gabbay * 2. Internal queues never gets completion.
60*e65e175bSOded Gabbay */
61*e65e175bSOded Gabbay for (i = 0 ; i < hdev->asic_prop.max_queues ; i++, q++) {
62*e65e175bSOded Gabbay if (!cs_needs_completion(cs) || q->queue_type == QUEUE_TYPE_INT)
63*e65e175bSOded Gabbay atomic_add(cs->jobs_in_queue_cnt[i], &q->ci);
64*e65e175bSOded Gabbay }
65*e65e175bSOded Gabbay }
66*e65e175bSOded Gabbay
67*e65e175bSOded Gabbay /*
68*e65e175bSOded Gabbay * hl_hw_queue_submit_bd() - Submit a buffer descriptor to an external or a
69*e65e175bSOded Gabbay * H/W queue.
70*e65e175bSOded Gabbay * @hdev: pointer to habanalabs device structure
71*e65e175bSOded Gabbay * @q: pointer to habanalabs queue structure
72*e65e175bSOded Gabbay * @ctl: BD's control word
73*e65e175bSOded Gabbay * @len: BD's length
74*e65e175bSOded Gabbay * @ptr: BD's pointer
75*e65e175bSOded Gabbay *
76*e65e175bSOded Gabbay * This function assumes there is enough space on the queue to submit a new
77*e65e175bSOded Gabbay * BD to it. It initializes the next BD and calls the device specific
78*e65e175bSOded Gabbay * function to set the pi (and doorbell)
79*e65e175bSOded Gabbay *
80*e65e175bSOded Gabbay * This function must be called when the scheduler mutex is taken
81*e65e175bSOded Gabbay *
82*e65e175bSOded Gabbay */
hl_hw_queue_submit_bd(struct hl_device * hdev,struct hl_hw_queue * q,u32 ctl,u32 len,u64 ptr)83*e65e175bSOded Gabbay void hl_hw_queue_submit_bd(struct hl_device *hdev, struct hl_hw_queue *q,
84*e65e175bSOded Gabbay u32 ctl, u32 len, u64 ptr)
85*e65e175bSOded Gabbay {
86*e65e175bSOded Gabbay struct hl_bd *bd;
87*e65e175bSOded Gabbay
88*e65e175bSOded Gabbay bd = q->kernel_address;
89*e65e175bSOded Gabbay bd += hl_pi_2_offset(q->pi);
90*e65e175bSOded Gabbay bd->ctl = cpu_to_le32(ctl);
91*e65e175bSOded Gabbay bd->len = cpu_to_le32(len);
92*e65e175bSOded Gabbay bd->ptr = cpu_to_le64(ptr);
93*e65e175bSOded Gabbay
94*e65e175bSOded Gabbay q->pi = hl_queue_inc_ptr(q->pi);
95*e65e175bSOded Gabbay hdev->asic_funcs->ring_doorbell(hdev, q->hw_queue_id, q->pi);
96*e65e175bSOded Gabbay }
97*e65e175bSOded Gabbay
98*e65e175bSOded Gabbay /*
99*e65e175bSOded Gabbay * ext_queue_sanity_checks - perform some sanity checks on external queue
100*e65e175bSOded Gabbay *
101*e65e175bSOded Gabbay * @hdev : pointer to hl_device structure
102*e65e175bSOded Gabbay * @q : pointer to hl_hw_queue structure
103*e65e175bSOded Gabbay * @num_of_entries : how many entries to check for space
104*e65e175bSOded Gabbay * @reserve_cq_entry : whether to reserve an entry in the cq
105*e65e175bSOded Gabbay *
106*e65e175bSOded Gabbay * H/W queues spinlock should be taken before calling this function
107*e65e175bSOded Gabbay *
108*e65e175bSOded Gabbay * Perform the following:
109*e65e175bSOded Gabbay * - Make sure we have enough space in the h/w queue
110*e65e175bSOded Gabbay * - Make sure we have enough space in the completion queue
111*e65e175bSOded Gabbay * - Reserve space in the completion queue (needs to be reversed if there
112*e65e175bSOded Gabbay * is a failure down the road before the actual submission of work). Only
113*e65e175bSOded Gabbay * do this action if reserve_cq_entry is true
114*e65e175bSOded Gabbay *
115*e65e175bSOded Gabbay */
ext_queue_sanity_checks(struct hl_device * hdev,struct hl_hw_queue * q,int num_of_entries,bool reserve_cq_entry)116*e65e175bSOded Gabbay static int ext_queue_sanity_checks(struct hl_device *hdev,
117*e65e175bSOded Gabbay struct hl_hw_queue *q, int num_of_entries,
118*e65e175bSOded Gabbay bool reserve_cq_entry)
119*e65e175bSOded Gabbay {
120*e65e175bSOded Gabbay atomic_t *free_slots =
121*e65e175bSOded Gabbay &hdev->completion_queue[q->cq_id].free_slots_cnt;
122*e65e175bSOded Gabbay int free_slots_cnt;
123*e65e175bSOded Gabbay
124*e65e175bSOded Gabbay /* Check we have enough space in the queue */
125*e65e175bSOded Gabbay free_slots_cnt = queue_free_slots(q, HL_QUEUE_LENGTH);
126*e65e175bSOded Gabbay
127*e65e175bSOded Gabbay if (free_slots_cnt < num_of_entries) {
128*e65e175bSOded Gabbay dev_dbg(hdev->dev, "Queue %d doesn't have room for %d CBs\n",
129*e65e175bSOded Gabbay q->hw_queue_id, num_of_entries);
130*e65e175bSOded Gabbay return -EAGAIN;
131*e65e175bSOded Gabbay }
132*e65e175bSOded Gabbay
133*e65e175bSOded Gabbay if (reserve_cq_entry) {
134*e65e175bSOded Gabbay /*
135*e65e175bSOded Gabbay * Check we have enough space in the completion queue
136*e65e175bSOded Gabbay * Add -1 to counter (decrement) unless counter was already 0
137*e65e175bSOded Gabbay * In that case, CQ is full so we can't submit a new CB because
138*e65e175bSOded Gabbay * we won't get ack on its completion
139*e65e175bSOded Gabbay * atomic_add_unless will return 0 if counter was already 0
140*e65e175bSOded Gabbay */
141*e65e175bSOded Gabbay if (atomic_add_negative(num_of_entries * -1, free_slots)) {
142*e65e175bSOded Gabbay dev_dbg(hdev->dev, "No space for %d on CQ %d\n",
143*e65e175bSOded Gabbay num_of_entries, q->hw_queue_id);
144*e65e175bSOded Gabbay atomic_add(num_of_entries, free_slots);
145*e65e175bSOded Gabbay return -EAGAIN;
146*e65e175bSOded Gabbay }
147*e65e175bSOded Gabbay }
148*e65e175bSOded Gabbay
149*e65e175bSOded Gabbay return 0;
150*e65e175bSOded Gabbay }
151*e65e175bSOded Gabbay
152*e65e175bSOded Gabbay /*
153*e65e175bSOded Gabbay * int_queue_sanity_checks - perform some sanity checks on internal queue
154*e65e175bSOded Gabbay *
155*e65e175bSOded Gabbay * @hdev : pointer to hl_device structure
156*e65e175bSOded Gabbay * @q : pointer to hl_hw_queue structure
157*e65e175bSOded Gabbay * @num_of_entries : how many entries to check for space
158*e65e175bSOded Gabbay *
159*e65e175bSOded Gabbay * H/W queues spinlock should be taken before calling this function
160*e65e175bSOded Gabbay *
161*e65e175bSOded Gabbay * Perform the following:
162*e65e175bSOded Gabbay * - Make sure we have enough space in the h/w queue
163*e65e175bSOded Gabbay *
164*e65e175bSOded Gabbay */
int_queue_sanity_checks(struct hl_device * hdev,struct hl_hw_queue * q,int num_of_entries)165*e65e175bSOded Gabbay static int int_queue_sanity_checks(struct hl_device *hdev,
166*e65e175bSOded Gabbay struct hl_hw_queue *q,
167*e65e175bSOded Gabbay int num_of_entries)
168*e65e175bSOded Gabbay {
169*e65e175bSOded Gabbay int free_slots_cnt;
170*e65e175bSOded Gabbay
171*e65e175bSOded Gabbay if (num_of_entries > q->int_queue_len) {
172*e65e175bSOded Gabbay dev_err(hdev->dev,
173*e65e175bSOded Gabbay "Cannot populate queue %u with %u jobs\n",
174*e65e175bSOded Gabbay q->hw_queue_id, num_of_entries);
175*e65e175bSOded Gabbay return -ENOMEM;
176*e65e175bSOded Gabbay }
177*e65e175bSOded Gabbay
178*e65e175bSOded Gabbay /* Check we have enough space in the queue */
179*e65e175bSOded Gabbay free_slots_cnt = queue_free_slots(q, q->int_queue_len);
180*e65e175bSOded Gabbay
181*e65e175bSOded Gabbay if (free_slots_cnt < num_of_entries) {
182*e65e175bSOded Gabbay dev_dbg(hdev->dev, "Queue %d doesn't have room for %d CBs\n",
183*e65e175bSOded Gabbay q->hw_queue_id, num_of_entries);
184*e65e175bSOded Gabbay return -EAGAIN;
185*e65e175bSOded Gabbay }
186*e65e175bSOded Gabbay
187*e65e175bSOded Gabbay return 0;
188*e65e175bSOded Gabbay }
189*e65e175bSOded Gabbay
190*e65e175bSOded Gabbay /*
191*e65e175bSOded Gabbay * hw_queue_sanity_checks() - Make sure we have enough space in the h/w queue
192*e65e175bSOded Gabbay * @hdev: Pointer to hl_device structure.
193*e65e175bSOded Gabbay * @q: Pointer to hl_hw_queue structure.
194*e65e175bSOded Gabbay * @num_of_entries: How many entries to check for space.
195*e65e175bSOded Gabbay *
196*e65e175bSOded Gabbay * Notice: We do not reserve queue entries so this function mustn't be called
197*e65e175bSOded Gabbay * more than once per CS for the same queue
198*e65e175bSOded Gabbay *
199*e65e175bSOded Gabbay */
hw_queue_sanity_checks(struct hl_device * hdev,struct hl_hw_queue * q,int num_of_entries)200*e65e175bSOded Gabbay static int hw_queue_sanity_checks(struct hl_device *hdev, struct hl_hw_queue *q,
201*e65e175bSOded Gabbay int num_of_entries)
202*e65e175bSOded Gabbay {
203*e65e175bSOded Gabbay int free_slots_cnt;
204*e65e175bSOded Gabbay
205*e65e175bSOded Gabbay /* Check we have enough space in the queue */
206*e65e175bSOded Gabbay free_slots_cnt = queue_free_slots(q, HL_QUEUE_LENGTH);
207*e65e175bSOded Gabbay
208*e65e175bSOded Gabbay if (free_slots_cnt < num_of_entries) {
209*e65e175bSOded Gabbay dev_dbg(hdev->dev, "Queue %d doesn't have room for %d CBs\n",
210*e65e175bSOded Gabbay q->hw_queue_id, num_of_entries);
211*e65e175bSOded Gabbay return -EAGAIN;
212*e65e175bSOded Gabbay }
213*e65e175bSOded Gabbay
214*e65e175bSOded Gabbay return 0;
215*e65e175bSOded Gabbay }
216*e65e175bSOded Gabbay
217*e65e175bSOded Gabbay /*
218*e65e175bSOded Gabbay * hl_hw_queue_send_cb_no_cmpl - send a single CB (not a JOB) without completion
219*e65e175bSOded Gabbay *
220*e65e175bSOded Gabbay * @hdev: pointer to hl_device structure
221*e65e175bSOded Gabbay * @hw_queue_id: Queue's type
222*e65e175bSOded Gabbay * @cb_size: size of CB
223*e65e175bSOded Gabbay * @cb_ptr: pointer to CB location
224*e65e175bSOded Gabbay *
225*e65e175bSOded Gabbay * This function sends a single CB, that must NOT generate a completion entry.
226*e65e175bSOded Gabbay * Sending CPU messages can be done instead via 'hl_hw_queue_submit_bd()'
227*e65e175bSOded Gabbay */
hl_hw_queue_send_cb_no_cmpl(struct hl_device * hdev,u32 hw_queue_id,u32 cb_size,u64 cb_ptr)228*e65e175bSOded Gabbay int hl_hw_queue_send_cb_no_cmpl(struct hl_device *hdev, u32 hw_queue_id,
229*e65e175bSOded Gabbay u32 cb_size, u64 cb_ptr)
230*e65e175bSOded Gabbay {
231*e65e175bSOded Gabbay struct hl_hw_queue *q = &hdev->kernel_queues[hw_queue_id];
232*e65e175bSOded Gabbay int rc = 0;
233*e65e175bSOded Gabbay
234*e65e175bSOded Gabbay hdev->asic_funcs->hw_queues_lock(hdev);
235*e65e175bSOded Gabbay
236*e65e175bSOded Gabbay if (hdev->disabled) {
237*e65e175bSOded Gabbay rc = -EPERM;
238*e65e175bSOded Gabbay goto out;
239*e65e175bSOded Gabbay }
240*e65e175bSOded Gabbay
241*e65e175bSOded Gabbay /*
242*e65e175bSOded Gabbay * hl_hw_queue_send_cb_no_cmpl() is called for queues of a H/W queue
243*e65e175bSOded Gabbay * type only on init phase, when the queues are empty and being tested,
244*e65e175bSOded Gabbay * so there is no need for sanity checks.
245*e65e175bSOded Gabbay */
246*e65e175bSOded Gabbay if (q->queue_type != QUEUE_TYPE_HW) {
247*e65e175bSOded Gabbay rc = ext_queue_sanity_checks(hdev, q, 1, false);
248*e65e175bSOded Gabbay if (rc)
249*e65e175bSOded Gabbay goto out;
250*e65e175bSOded Gabbay }
251*e65e175bSOded Gabbay
252*e65e175bSOded Gabbay hl_hw_queue_submit_bd(hdev, q, 0, cb_size, cb_ptr);
253*e65e175bSOded Gabbay
254*e65e175bSOded Gabbay out:
255*e65e175bSOded Gabbay hdev->asic_funcs->hw_queues_unlock(hdev);
256*e65e175bSOded Gabbay
257*e65e175bSOded Gabbay return rc;
258*e65e175bSOded Gabbay }
259*e65e175bSOded Gabbay
260*e65e175bSOded Gabbay /*
261*e65e175bSOded Gabbay * ext_queue_schedule_job - submit a JOB to an external queue
262*e65e175bSOded Gabbay *
263*e65e175bSOded Gabbay * @job: pointer to the job that needs to be submitted to the queue
264*e65e175bSOded Gabbay *
265*e65e175bSOded Gabbay * This function must be called when the scheduler mutex is taken
266*e65e175bSOded Gabbay *
267*e65e175bSOded Gabbay */
ext_queue_schedule_job(struct hl_cs_job * job)268*e65e175bSOded Gabbay static void ext_queue_schedule_job(struct hl_cs_job *job)
269*e65e175bSOded Gabbay {
270*e65e175bSOded Gabbay struct hl_device *hdev = job->cs->ctx->hdev;
271*e65e175bSOded Gabbay struct hl_hw_queue *q = &hdev->kernel_queues[job->hw_queue_id];
272*e65e175bSOded Gabbay struct hl_cq_entry cq_pkt;
273*e65e175bSOded Gabbay struct hl_cq *cq;
274*e65e175bSOded Gabbay u64 cq_addr;
275*e65e175bSOded Gabbay struct hl_cb *cb;
276*e65e175bSOded Gabbay u32 ctl;
277*e65e175bSOded Gabbay u32 len;
278*e65e175bSOded Gabbay u64 ptr;
279*e65e175bSOded Gabbay
280*e65e175bSOded Gabbay /*
281*e65e175bSOded Gabbay * Update the JOB ID inside the BD CTL so the device would know what
282*e65e175bSOded Gabbay * to write in the completion queue
283*e65e175bSOded Gabbay */
284*e65e175bSOded Gabbay ctl = ((q->pi << BD_CTL_SHADOW_INDEX_SHIFT) & BD_CTL_SHADOW_INDEX_MASK);
285*e65e175bSOded Gabbay
286*e65e175bSOded Gabbay cb = job->patched_cb;
287*e65e175bSOded Gabbay len = job->job_cb_size;
288*e65e175bSOded Gabbay ptr = cb->bus_address;
289*e65e175bSOded Gabbay
290*e65e175bSOded Gabbay /* Skip completion flow in case this is a non completion CS */
291*e65e175bSOded Gabbay if (!cs_needs_completion(job->cs))
292*e65e175bSOded Gabbay goto submit_bd;
293*e65e175bSOded Gabbay
294*e65e175bSOded Gabbay cq_pkt.data = cpu_to_le32(
295*e65e175bSOded Gabbay ((q->pi << CQ_ENTRY_SHADOW_INDEX_SHIFT)
296*e65e175bSOded Gabbay & CQ_ENTRY_SHADOW_INDEX_MASK) |
297*e65e175bSOded Gabbay FIELD_PREP(CQ_ENTRY_SHADOW_INDEX_VALID_MASK, 1) |
298*e65e175bSOded Gabbay FIELD_PREP(CQ_ENTRY_READY_MASK, 1));
299*e65e175bSOded Gabbay
300*e65e175bSOded Gabbay /*
301*e65e175bSOded Gabbay * No need to protect pi_offset because scheduling to the
302*e65e175bSOded Gabbay * H/W queues is done under the scheduler mutex
303*e65e175bSOded Gabbay *
304*e65e175bSOded Gabbay * No need to check if CQ is full because it was already
305*e65e175bSOded Gabbay * checked in ext_queue_sanity_checks
306*e65e175bSOded Gabbay */
307*e65e175bSOded Gabbay cq = &hdev->completion_queue[q->cq_id];
308*e65e175bSOded Gabbay cq_addr = cq->bus_address + cq->pi * sizeof(struct hl_cq_entry);
309*e65e175bSOded Gabbay
310*e65e175bSOded Gabbay hdev->asic_funcs->add_end_of_cb_packets(hdev, cb->kernel_address, len,
311*e65e175bSOded Gabbay job->user_cb_size,
312*e65e175bSOded Gabbay cq_addr,
313*e65e175bSOded Gabbay le32_to_cpu(cq_pkt.data),
314*e65e175bSOded Gabbay q->msi_vec,
315*e65e175bSOded Gabbay job->contains_dma_pkt);
316*e65e175bSOded Gabbay
317*e65e175bSOded Gabbay q->shadow_queue[hl_pi_2_offset(q->pi)] = job;
318*e65e175bSOded Gabbay
319*e65e175bSOded Gabbay cq->pi = hl_cq_inc_ptr(cq->pi);
320*e65e175bSOded Gabbay
321*e65e175bSOded Gabbay submit_bd:
322*e65e175bSOded Gabbay hl_hw_queue_submit_bd(hdev, q, ctl, len, ptr);
323*e65e175bSOded Gabbay }
324*e65e175bSOded Gabbay
325*e65e175bSOded Gabbay /*
326*e65e175bSOded Gabbay * int_queue_schedule_job - submit a JOB to an internal queue
327*e65e175bSOded Gabbay *
328*e65e175bSOded Gabbay * @job: pointer to the job that needs to be submitted to the queue
329*e65e175bSOded Gabbay *
330*e65e175bSOded Gabbay * This function must be called when the scheduler mutex is taken
331*e65e175bSOded Gabbay *
332*e65e175bSOded Gabbay */
int_queue_schedule_job(struct hl_cs_job * job)333*e65e175bSOded Gabbay static void int_queue_schedule_job(struct hl_cs_job *job)
334*e65e175bSOded Gabbay {
335*e65e175bSOded Gabbay struct hl_device *hdev = job->cs->ctx->hdev;
336*e65e175bSOded Gabbay struct hl_hw_queue *q = &hdev->kernel_queues[job->hw_queue_id];
337*e65e175bSOded Gabbay struct hl_bd bd;
338*e65e175bSOded Gabbay __le64 *pi;
339*e65e175bSOded Gabbay
340*e65e175bSOded Gabbay bd.ctl = 0;
341*e65e175bSOded Gabbay bd.len = cpu_to_le32(job->job_cb_size);
342*e65e175bSOded Gabbay
343*e65e175bSOded Gabbay if (job->is_kernel_allocated_cb)
344*e65e175bSOded Gabbay /* bus_address is actually a mmu mapped address
345*e65e175bSOded Gabbay * allocated from an internal pool
346*e65e175bSOded Gabbay */
347*e65e175bSOded Gabbay bd.ptr = cpu_to_le64(job->user_cb->bus_address);
348*e65e175bSOded Gabbay else
349*e65e175bSOded Gabbay bd.ptr = cpu_to_le64((u64) (uintptr_t) job->user_cb);
350*e65e175bSOded Gabbay
351*e65e175bSOded Gabbay pi = q->kernel_address + (q->pi & (q->int_queue_len - 1)) * sizeof(bd);
352*e65e175bSOded Gabbay
353*e65e175bSOded Gabbay q->pi++;
354*e65e175bSOded Gabbay q->pi &= ((q->int_queue_len << 1) - 1);
355*e65e175bSOded Gabbay
356*e65e175bSOded Gabbay hdev->asic_funcs->pqe_write(hdev, pi, &bd);
357*e65e175bSOded Gabbay
358*e65e175bSOded Gabbay hdev->asic_funcs->ring_doorbell(hdev, q->hw_queue_id, q->pi);
359*e65e175bSOded Gabbay }
360*e65e175bSOded Gabbay
361*e65e175bSOded Gabbay /*
362*e65e175bSOded Gabbay * hw_queue_schedule_job - submit a JOB to a H/W queue
363*e65e175bSOded Gabbay *
364*e65e175bSOded Gabbay * @job: pointer to the job that needs to be submitted to the queue
365*e65e175bSOded Gabbay *
366*e65e175bSOded Gabbay * This function must be called when the scheduler mutex is taken
367*e65e175bSOded Gabbay *
368*e65e175bSOded Gabbay */
hw_queue_schedule_job(struct hl_cs_job * job)369*e65e175bSOded Gabbay static void hw_queue_schedule_job(struct hl_cs_job *job)
370*e65e175bSOded Gabbay {
371*e65e175bSOded Gabbay struct hl_device *hdev = job->cs->ctx->hdev;
372*e65e175bSOded Gabbay struct hl_hw_queue *q = &hdev->kernel_queues[job->hw_queue_id];
373*e65e175bSOded Gabbay u64 ptr;
374*e65e175bSOded Gabbay u32 offset, ctl, len;
375*e65e175bSOded Gabbay
376*e65e175bSOded Gabbay /*
377*e65e175bSOded Gabbay * Upon PQE completion, COMP_DATA is used as the write data to the
378*e65e175bSOded Gabbay * completion queue (QMAN HBW message), and COMP_OFFSET is used as the
379*e65e175bSOded Gabbay * write address offset in the SM block (QMAN LBW message).
380*e65e175bSOded Gabbay * The write address offset is calculated as "COMP_OFFSET << 2".
381*e65e175bSOded Gabbay */
382*e65e175bSOded Gabbay offset = job->cs->sequence & (hdev->asic_prop.max_pending_cs - 1);
383*e65e175bSOded Gabbay ctl = ((offset << BD_CTL_COMP_OFFSET_SHIFT) & BD_CTL_COMP_OFFSET_MASK) |
384*e65e175bSOded Gabbay ((q->pi << BD_CTL_COMP_DATA_SHIFT) & BD_CTL_COMP_DATA_MASK);
385*e65e175bSOded Gabbay
386*e65e175bSOded Gabbay len = job->job_cb_size;
387*e65e175bSOded Gabbay
388*e65e175bSOded Gabbay /*
389*e65e175bSOded Gabbay * A patched CB is created only if a user CB was allocated by driver and
390*e65e175bSOded Gabbay * MMU is disabled. If MMU is enabled, the user CB should be used
391*e65e175bSOded Gabbay * instead. If the user CB wasn't allocated by driver, assume that it
392*e65e175bSOded Gabbay * holds an address.
393*e65e175bSOded Gabbay */
394*e65e175bSOded Gabbay if (job->patched_cb)
395*e65e175bSOded Gabbay ptr = job->patched_cb->bus_address;
396*e65e175bSOded Gabbay else if (job->is_kernel_allocated_cb)
397*e65e175bSOded Gabbay ptr = job->user_cb->bus_address;
398*e65e175bSOded Gabbay else
399*e65e175bSOded Gabbay ptr = (u64) (uintptr_t) job->user_cb;
400*e65e175bSOded Gabbay
401*e65e175bSOded Gabbay hl_hw_queue_submit_bd(hdev, q, ctl, len, ptr);
402*e65e175bSOded Gabbay }
403*e65e175bSOded Gabbay
init_signal_cs(struct hl_device * hdev,struct hl_cs_job * job,struct hl_cs_compl * cs_cmpl)404*e65e175bSOded Gabbay static int init_signal_cs(struct hl_device *hdev,
405*e65e175bSOded Gabbay struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl)
406*e65e175bSOded Gabbay {
407*e65e175bSOded Gabbay struct hl_sync_stream_properties *prop;
408*e65e175bSOded Gabbay struct hl_hw_sob *hw_sob;
409*e65e175bSOded Gabbay u32 q_idx;
410*e65e175bSOded Gabbay int rc = 0;
411*e65e175bSOded Gabbay
412*e65e175bSOded Gabbay q_idx = job->hw_queue_id;
413*e65e175bSOded Gabbay prop = &hdev->kernel_queues[q_idx].sync_stream_prop;
414*e65e175bSOded Gabbay hw_sob = &prop->hw_sob[prop->curr_sob_offset];
415*e65e175bSOded Gabbay
416*e65e175bSOded Gabbay cs_cmpl->hw_sob = hw_sob;
417*e65e175bSOded Gabbay cs_cmpl->sob_val = prop->next_sob_val;
418*e65e175bSOded Gabbay
419*e65e175bSOded Gabbay dev_dbg(hdev->dev,
420*e65e175bSOded Gabbay "generate signal CB, sob_id: %d, sob val: %u, q_idx: %d, seq: %llu\n",
421*e65e175bSOded Gabbay cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val, q_idx,
422*e65e175bSOded Gabbay cs_cmpl->cs_seq);
423*e65e175bSOded Gabbay
424*e65e175bSOded Gabbay /* we set an EB since we must make sure all oeprations are done
425*e65e175bSOded Gabbay * when sending the signal
426*e65e175bSOded Gabbay */
427*e65e175bSOded Gabbay hdev->asic_funcs->gen_signal_cb(hdev, job->patched_cb,
428*e65e175bSOded Gabbay cs_cmpl->hw_sob->sob_id, 0, true);
429*e65e175bSOded Gabbay
430*e65e175bSOded Gabbay rc = hl_cs_signal_sob_wraparound_handler(hdev, q_idx, &hw_sob, 1,
431*e65e175bSOded Gabbay false);
432*e65e175bSOded Gabbay
433*e65e175bSOded Gabbay job->cs->sob_addr_offset = hw_sob->sob_addr;
434*e65e175bSOded Gabbay job->cs->initial_sob_count = prop->next_sob_val - 1;
435*e65e175bSOded Gabbay
436*e65e175bSOded Gabbay return rc;
437*e65e175bSOded Gabbay }
438*e65e175bSOded Gabbay
hl_hw_queue_encaps_sig_set_sob_info(struct hl_device * hdev,struct hl_cs * cs,struct hl_cs_job * job,struct hl_cs_compl * cs_cmpl)439*e65e175bSOded Gabbay void hl_hw_queue_encaps_sig_set_sob_info(struct hl_device *hdev,
440*e65e175bSOded Gabbay struct hl_cs *cs, struct hl_cs_job *job,
441*e65e175bSOded Gabbay struct hl_cs_compl *cs_cmpl)
442*e65e175bSOded Gabbay {
443*e65e175bSOded Gabbay struct hl_cs_encaps_sig_handle *handle = cs->encaps_sig_hdl;
444*e65e175bSOded Gabbay u32 offset = 0;
445*e65e175bSOded Gabbay
446*e65e175bSOded Gabbay cs_cmpl->hw_sob = handle->hw_sob;
447*e65e175bSOded Gabbay
448*e65e175bSOded Gabbay /* Note that encaps_sig_wait_offset was validated earlier in the flow
449*e65e175bSOded Gabbay * for offset value which exceeds the max reserved signal count.
450*e65e175bSOded Gabbay * always decrement 1 of the offset since when the user
451*e65e175bSOded Gabbay * set offset 1 for example he mean to wait only for the first
452*e65e175bSOded Gabbay * signal only, which will be pre_sob_val, and if he set offset 2
453*e65e175bSOded Gabbay * then the value required is (pre_sob_val + 1) and so on...
454*e65e175bSOded Gabbay * if user set wait offset to 0, then treat it as legacy wait cs,
455*e65e175bSOded Gabbay * wait for the next signal.
456*e65e175bSOded Gabbay */
457*e65e175bSOded Gabbay if (job->encaps_sig_wait_offset)
458*e65e175bSOded Gabbay offset = job->encaps_sig_wait_offset - 1;
459*e65e175bSOded Gabbay
460*e65e175bSOded Gabbay cs_cmpl->sob_val = handle->pre_sob_val + offset;
461*e65e175bSOded Gabbay }
462*e65e175bSOded Gabbay
init_wait_cs(struct hl_device * hdev,struct hl_cs * cs,struct hl_cs_job * job,struct hl_cs_compl * cs_cmpl)463*e65e175bSOded Gabbay static int init_wait_cs(struct hl_device *hdev, struct hl_cs *cs,
464*e65e175bSOded Gabbay struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl)
465*e65e175bSOded Gabbay {
466*e65e175bSOded Gabbay struct hl_gen_wait_properties wait_prop;
467*e65e175bSOded Gabbay struct hl_sync_stream_properties *prop;
468*e65e175bSOded Gabbay struct hl_cs_compl *signal_cs_cmpl;
469*e65e175bSOded Gabbay u32 q_idx;
470*e65e175bSOded Gabbay
471*e65e175bSOded Gabbay q_idx = job->hw_queue_id;
472*e65e175bSOded Gabbay prop = &hdev->kernel_queues[q_idx].sync_stream_prop;
473*e65e175bSOded Gabbay
474*e65e175bSOded Gabbay signal_cs_cmpl = container_of(cs->signal_fence,
475*e65e175bSOded Gabbay struct hl_cs_compl,
476*e65e175bSOded Gabbay base_fence);
477*e65e175bSOded Gabbay
478*e65e175bSOded Gabbay if (cs->encaps_signals) {
479*e65e175bSOded Gabbay /* use the encaps signal handle stored earlier in the flow
480*e65e175bSOded Gabbay * and set the SOB information from the encaps
481*e65e175bSOded Gabbay * signals handle
482*e65e175bSOded Gabbay */
483*e65e175bSOded Gabbay hl_hw_queue_encaps_sig_set_sob_info(hdev, cs, job, cs_cmpl);
484*e65e175bSOded Gabbay
485*e65e175bSOded Gabbay dev_dbg(hdev->dev, "Wait for encaps signals handle, qidx(%u), CS sequence(%llu), sob val: 0x%x, offset: %u\n",
486*e65e175bSOded Gabbay cs->encaps_sig_hdl->q_idx,
487*e65e175bSOded Gabbay cs->encaps_sig_hdl->cs_seq,
488*e65e175bSOded Gabbay cs_cmpl->sob_val,
489*e65e175bSOded Gabbay job->encaps_sig_wait_offset);
490*e65e175bSOded Gabbay } else {
491*e65e175bSOded Gabbay /* Copy the SOB id and value of the signal CS */
492*e65e175bSOded Gabbay cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob;
493*e65e175bSOded Gabbay cs_cmpl->sob_val = signal_cs_cmpl->sob_val;
494*e65e175bSOded Gabbay }
495*e65e175bSOded Gabbay
496*e65e175bSOded Gabbay /* check again if the signal cs already completed.
497*e65e175bSOded Gabbay * if yes then don't send any wait cs since the hw_sob
498*e65e175bSOded Gabbay * could be in reset already. if signal is not completed
499*e65e175bSOded Gabbay * then get refcount to hw_sob to prevent resetting the sob
500*e65e175bSOded Gabbay * while wait cs is not submitted.
501*e65e175bSOded Gabbay * note that this check is protected by two locks,
502*e65e175bSOded Gabbay * hw queue lock and completion object lock,
503*e65e175bSOded Gabbay * and the same completion object lock also protects
504*e65e175bSOded Gabbay * the hw_sob reset handler function.
505*e65e175bSOded Gabbay * The hw_queue lock prevent out of sync of hw_sob
506*e65e175bSOded Gabbay * refcount value, changed by signal/wait flows.
507*e65e175bSOded Gabbay */
508*e65e175bSOded Gabbay spin_lock(&signal_cs_cmpl->lock);
509*e65e175bSOded Gabbay
510*e65e175bSOded Gabbay if (completion_done(&cs->signal_fence->completion)) {
511*e65e175bSOded Gabbay spin_unlock(&signal_cs_cmpl->lock);
512*e65e175bSOded Gabbay return -EINVAL;
513*e65e175bSOded Gabbay }
514*e65e175bSOded Gabbay
515*e65e175bSOded Gabbay kref_get(&cs_cmpl->hw_sob->kref);
516*e65e175bSOded Gabbay
517*e65e175bSOded Gabbay spin_unlock(&signal_cs_cmpl->lock);
518*e65e175bSOded Gabbay
519*e65e175bSOded Gabbay dev_dbg(hdev->dev,
520*e65e175bSOded Gabbay "generate wait CB, sob_id: %d, sob_val: 0x%x, mon_id: %d, q_idx: %d, seq: %llu\n",
521*e65e175bSOded Gabbay cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val,
522*e65e175bSOded Gabbay prop->base_mon_id, q_idx, cs->sequence);
523*e65e175bSOded Gabbay
524*e65e175bSOded Gabbay wait_prop.data = (void *) job->patched_cb;
525*e65e175bSOded Gabbay wait_prop.sob_base = cs_cmpl->hw_sob->sob_id;
526*e65e175bSOded Gabbay wait_prop.sob_mask = 0x1;
527*e65e175bSOded Gabbay wait_prop.sob_val = cs_cmpl->sob_val;
528*e65e175bSOded Gabbay wait_prop.mon_id = prop->base_mon_id;
529*e65e175bSOded Gabbay wait_prop.q_idx = q_idx;
530*e65e175bSOded Gabbay wait_prop.size = 0;
531*e65e175bSOded Gabbay
532*e65e175bSOded Gabbay hdev->asic_funcs->gen_wait_cb(hdev, &wait_prop);
533*e65e175bSOded Gabbay
534*e65e175bSOded Gabbay mb();
535*e65e175bSOded Gabbay hl_fence_put(cs->signal_fence);
536*e65e175bSOded Gabbay cs->signal_fence = NULL;
537*e65e175bSOded Gabbay
538*e65e175bSOded Gabbay return 0;
539*e65e175bSOded Gabbay }
540*e65e175bSOded Gabbay
541*e65e175bSOded Gabbay /*
542*e65e175bSOded Gabbay * init_signal_wait_cs - initialize a signal/wait CS
543*e65e175bSOded Gabbay * @cs: pointer to the signal/wait CS
544*e65e175bSOded Gabbay *
545*e65e175bSOded Gabbay * H/W queues spinlock should be taken before calling this function
546*e65e175bSOded Gabbay */
init_signal_wait_cs(struct hl_cs * cs)547*e65e175bSOded Gabbay static int init_signal_wait_cs(struct hl_cs *cs)
548*e65e175bSOded Gabbay {
549*e65e175bSOded Gabbay struct hl_ctx *ctx = cs->ctx;
550*e65e175bSOded Gabbay struct hl_device *hdev = ctx->hdev;
551*e65e175bSOded Gabbay struct hl_cs_job *job;
552*e65e175bSOded Gabbay struct hl_cs_compl *cs_cmpl =
553*e65e175bSOded Gabbay container_of(cs->fence, struct hl_cs_compl, base_fence);
554*e65e175bSOded Gabbay int rc = 0;
555*e65e175bSOded Gabbay
556*e65e175bSOded Gabbay /* There is only one job in a signal/wait CS */
557*e65e175bSOded Gabbay job = list_first_entry(&cs->job_list, struct hl_cs_job,
558*e65e175bSOded Gabbay cs_node);
559*e65e175bSOded Gabbay
560*e65e175bSOded Gabbay if (cs->type & CS_TYPE_SIGNAL)
561*e65e175bSOded Gabbay rc = init_signal_cs(hdev, job, cs_cmpl);
562*e65e175bSOded Gabbay else if (cs->type & CS_TYPE_WAIT)
563*e65e175bSOded Gabbay rc = init_wait_cs(hdev, cs, job, cs_cmpl);
564*e65e175bSOded Gabbay
565*e65e175bSOded Gabbay return rc;
566*e65e175bSOded Gabbay }
567*e65e175bSOded Gabbay
encaps_sig_first_staged_cs_handler(struct hl_device * hdev,struct hl_cs * cs)568*e65e175bSOded Gabbay static int encaps_sig_first_staged_cs_handler
569*e65e175bSOded Gabbay (struct hl_device *hdev, struct hl_cs *cs)
570*e65e175bSOded Gabbay {
571*e65e175bSOded Gabbay struct hl_cs_compl *cs_cmpl =
572*e65e175bSOded Gabbay container_of(cs->fence,
573*e65e175bSOded Gabbay struct hl_cs_compl, base_fence);
574*e65e175bSOded Gabbay struct hl_cs_encaps_sig_handle *encaps_sig_hdl;
575*e65e175bSOded Gabbay struct hl_encaps_signals_mgr *mgr;
576*e65e175bSOded Gabbay int rc = 0;
577*e65e175bSOded Gabbay
578*e65e175bSOded Gabbay mgr = &cs->ctx->sig_mgr;
579*e65e175bSOded Gabbay
580*e65e175bSOded Gabbay spin_lock(&mgr->lock);
581*e65e175bSOded Gabbay encaps_sig_hdl = idr_find(&mgr->handles, cs->encaps_sig_hdl_id);
582*e65e175bSOded Gabbay if (encaps_sig_hdl) {
583*e65e175bSOded Gabbay /*
584*e65e175bSOded Gabbay * Set handler CS sequence,
585*e65e175bSOded Gabbay * the CS which contains the encapsulated signals.
586*e65e175bSOded Gabbay */
587*e65e175bSOded Gabbay encaps_sig_hdl->cs_seq = cs->sequence;
588*e65e175bSOded Gabbay /* store the handle and set encaps signal indication,
589*e65e175bSOded Gabbay * to be used later in cs_do_release to put the last
590*e65e175bSOded Gabbay * reference to encaps signals handlers.
591*e65e175bSOded Gabbay */
592*e65e175bSOded Gabbay cs_cmpl->encaps_signals = true;
593*e65e175bSOded Gabbay cs_cmpl->encaps_sig_hdl = encaps_sig_hdl;
594*e65e175bSOded Gabbay
595*e65e175bSOded Gabbay /* set hw_sob pointer in completion object
596*e65e175bSOded Gabbay * since it's used in cs_do_release flow to put
597*e65e175bSOded Gabbay * refcount to sob
598*e65e175bSOded Gabbay */
599*e65e175bSOded Gabbay cs_cmpl->hw_sob = encaps_sig_hdl->hw_sob;
600*e65e175bSOded Gabbay cs_cmpl->sob_val = encaps_sig_hdl->pre_sob_val +
601*e65e175bSOded Gabbay encaps_sig_hdl->count;
602*e65e175bSOded Gabbay
603*e65e175bSOded Gabbay dev_dbg(hdev->dev, "CS seq (%llu) added to encaps signal handler id (%u), count(%u), qidx(%u), sob(%u), val(%u)\n",
604*e65e175bSOded Gabbay cs->sequence, encaps_sig_hdl->id,
605*e65e175bSOded Gabbay encaps_sig_hdl->count,
606*e65e175bSOded Gabbay encaps_sig_hdl->q_idx,
607*e65e175bSOded Gabbay cs_cmpl->hw_sob->sob_id,
608*e65e175bSOded Gabbay cs_cmpl->sob_val);
609*e65e175bSOded Gabbay
610*e65e175bSOded Gabbay } else {
611*e65e175bSOded Gabbay dev_err(hdev->dev, "encaps handle id(%u) wasn't found!\n",
612*e65e175bSOded Gabbay cs->encaps_sig_hdl_id);
613*e65e175bSOded Gabbay rc = -EINVAL;
614*e65e175bSOded Gabbay }
615*e65e175bSOded Gabbay
616*e65e175bSOded Gabbay spin_unlock(&mgr->lock);
617*e65e175bSOded Gabbay
618*e65e175bSOded Gabbay return rc;
619*e65e175bSOded Gabbay }
620*e65e175bSOded Gabbay
621*e65e175bSOded Gabbay /*
622*e65e175bSOded Gabbay * hl_hw_queue_schedule_cs - schedule a command submission
623*e65e175bSOded Gabbay * @cs: pointer to the CS
624*e65e175bSOded Gabbay */
hl_hw_queue_schedule_cs(struct hl_cs * cs)625*e65e175bSOded Gabbay int hl_hw_queue_schedule_cs(struct hl_cs *cs)
626*e65e175bSOded Gabbay {
627*e65e175bSOded Gabbay enum hl_device_status status;
628*e65e175bSOded Gabbay struct hl_cs_counters_atomic *cntr;
629*e65e175bSOded Gabbay struct hl_ctx *ctx = cs->ctx;
630*e65e175bSOded Gabbay struct hl_device *hdev = ctx->hdev;
631*e65e175bSOded Gabbay struct hl_cs_job *job, *tmp;
632*e65e175bSOded Gabbay struct hl_hw_queue *q;
633*e65e175bSOded Gabbay int rc = 0, i, cq_cnt;
634*e65e175bSOded Gabbay bool first_entry;
635*e65e175bSOded Gabbay u32 max_queues;
636*e65e175bSOded Gabbay
637*e65e175bSOded Gabbay cntr = &hdev->aggregated_cs_counters;
638*e65e175bSOded Gabbay
639*e65e175bSOded Gabbay hdev->asic_funcs->hw_queues_lock(hdev);
640*e65e175bSOded Gabbay
641*e65e175bSOded Gabbay if (!hl_device_operational(hdev, &status)) {
642*e65e175bSOded Gabbay atomic64_inc(&cntr->device_in_reset_drop_cnt);
643*e65e175bSOded Gabbay atomic64_inc(&ctx->cs_counters.device_in_reset_drop_cnt);
644*e65e175bSOded Gabbay dev_err(hdev->dev,
645*e65e175bSOded Gabbay "device is %s, CS rejected!\n", hdev->status[status]);
646*e65e175bSOded Gabbay rc = -EPERM;
647*e65e175bSOded Gabbay goto out;
648*e65e175bSOded Gabbay }
649*e65e175bSOded Gabbay
650*e65e175bSOded Gabbay max_queues = hdev->asic_prop.max_queues;
651*e65e175bSOded Gabbay
652*e65e175bSOded Gabbay q = &hdev->kernel_queues[0];
653*e65e175bSOded Gabbay for (i = 0, cq_cnt = 0 ; i < max_queues ; i++, q++) {
654*e65e175bSOded Gabbay if (cs->jobs_in_queue_cnt[i]) {
655*e65e175bSOded Gabbay switch (q->queue_type) {
656*e65e175bSOded Gabbay case QUEUE_TYPE_EXT:
657*e65e175bSOded Gabbay rc = ext_queue_sanity_checks(hdev, q,
658*e65e175bSOded Gabbay cs->jobs_in_queue_cnt[i],
659*e65e175bSOded Gabbay cs_needs_completion(cs) ?
660*e65e175bSOded Gabbay true : false);
661*e65e175bSOded Gabbay break;
662*e65e175bSOded Gabbay case QUEUE_TYPE_INT:
663*e65e175bSOded Gabbay rc = int_queue_sanity_checks(hdev, q,
664*e65e175bSOded Gabbay cs->jobs_in_queue_cnt[i]);
665*e65e175bSOded Gabbay break;
666*e65e175bSOded Gabbay case QUEUE_TYPE_HW:
667*e65e175bSOded Gabbay rc = hw_queue_sanity_checks(hdev, q,
668*e65e175bSOded Gabbay cs->jobs_in_queue_cnt[i]);
669*e65e175bSOded Gabbay break;
670*e65e175bSOded Gabbay default:
671*e65e175bSOded Gabbay dev_err(hdev->dev, "Queue type %d is invalid\n",
672*e65e175bSOded Gabbay q->queue_type);
673*e65e175bSOded Gabbay rc = -EINVAL;
674*e65e175bSOded Gabbay break;
675*e65e175bSOded Gabbay }
676*e65e175bSOded Gabbay
677*e65e175bSOded Gabbay if (rc) {
678*e65e175bSOded Gabbay atomic64_inc(
679*e65e175bSOded Gabbay &ctx->cs_counters.queue_full_drop_cnt);
680*e65e175bSOded Gabbay atomic64_inc(&cntr->queue_full_drop_cnt);
681*e65e175bSOded Gabbay goto unroll_cq_resv;
682*e65e175bSOded Gabbay }
683*e65e175bSOded Gabbay
684*e65e175bSOded Gabbay if (q->queue_type == QUEUE_TYPE_EXT)
685*e65e175bSOded Gabbay cq_cnt++;
686*e65e175bSOded Gabbay }
687*e65e175bSOded Gabbay }
688*e65e175bSOded Gabbay
689*e65e175bSOded Gabbay if ((cs->type == CS_TYPE_SIGNAL) || (cs->type == CS_TYPE_WAIT)) {
690*e65e175bSOded Gabbay rc = init_signal_wait_cs(cs);
691*e65e175bSOded Gabbay if (rc)
692*e65e175bSOded Gabbay goto unroll_cq_resv;
693*e65e175bSOded Gabbay } else if (cs->type == CS_TYPE_COLLECTIVE_WAIT) {
694*e65e175bSOded Gabbay rc = hdev->asic_funcs->collective_wait_init_cs(cs);
695*e65e175bSOded Gabbay if (rc)
696*e65e175bSOded Gabbay goto unroll_cq_resv;
697*e65e175bSOded Gabbay }
698*e65e175bSOded Gabbay
699*e65e175bSOded Gabbay rc = hdev->asic_funcs->pre_schedule_cs(cs);
700*e65e175bSOded Gabbay if (rc) {
701*e65e175bSOded Gabbay dev_err(hdev->dev,
702*e65e175bSOded Gabbay "Failed in pre-submission operations of CS %d.%llu\n",
703*e65e175bSOded Gabbay ctx->asid, cs->sequence);
704*e65e175bSOded Gabbay goto unroll_cq_resv;
705*e65e175bSOded Gabbay }
706*e65e175bSOded Gabbay
707*e65e175bSOded Gabbay hdev->shadow_cs_queue[cs->sequence &
708*e65e175bSOded Gabbay (hdev->asic_prop.max_pending_cs - 1)] = cs;
709*e65e175bSOded Gabbay
710*e65e175bSOded Gabbay if (cs->encaps_signals && cs->staged_first) {
711*e65e175bSOded Gabbay rc = encaps_sig_first_staged_cs_handler(hdev, cs);
712*e65e175bSOded Gabbay if (rc)
713*e65e175bSOded Gabbay goto unroll_cq_resv;
714*e65e175bSOded Gabbay }
715*e65e175bSOded Gabbay
716*e65e175bSOded Gabbay spin_lock(&hdev->cs_mirror_lock);
717*e65e175bSOded Gabbay
718*e65e175bSOded Gabbay /* Verify staged CS exists and add to the staged list */
719*e65e175bSOded Gabbay if (cs->staged_cs && !cs->staged_first) {
720*e65e175bSOded Gabbay struct hl_cs *staged_cs;
721*e65e175bSOded Gabbay
722*e65e175bSOded Gabbay staged_cs = hl_staged_cs_find_first(hdev, cs->staged_sequence);
723*e65e175bSOded Gabbay if (!staged_cs) {
724*e65e175bSOded Gabbay dev_err(hdev->dev,
725*e65e175bSOded Gabbay "Cannot find staged submission sequence %llu",
726*e65e175bSOded Gabbay cs->staged_sequence);
727*e65e175bSOded Gabbay rc = -EINVAL;
728*e65e175bSOded Gabbay goto unlock_cs_mirror;
729*e65e175bSOded Gabbay }
730*e65e175bSOded Gabbay
731*e65e175bSOded Gabbay if (is_staged_cs_last_exists(hdev, staged_cs)) {
732*e65e175bSOded Gabbay dev_err(hdev->dev,
733*e65e175bSOded Gabbay "Staged submission sequence %llu already submitted",
734*e65e175bSOded Gabbay cs->staged_sequence);
735*e65e175bSOded Gabbay rc = -EINVAL;
736*e65e175bSOded Gabbay goto unlock_cs_mirror;
737*e65e175bSOded Gabbay }
738*e65e175bSOded Gabbay
739*e65e175bSOded Gabbay list_add_tail(&cs->staged_cs_node, &staged_cs->staged_cs_node);
740*e65e175bSOded Gabbay
741*e65e175bSOded Gabbay /* update stream map of the first CS */
742*e65e175bSOded Gabbay if (hdev->supports_wait_for_multi_cs)
743*e65e175bSOded Gabbay staged_cs->fence->stream_master_qid_map |=
744*e65e175bSOded Gabbay cs->fence->stream_master_qid_map;
745*e65e175bSOded Gabbay }
746*e65e175bSOded Gabbay
747*e65e175bSOded Gabbay list_add_tail(&cs->mirror_node, &hdev->cs_mirror_list);
748*e65e175bSOded Gabbay
749*e65e175bSOded Gabbay /* Queue TDR if the CS is the first entry and if timeout is wanted */
750*e65e175bSOded Gabbay first_entry = list_first_entry(&hdev->cs_mirror_list,
751*e65e175bSOded Gabbay struct hl_cs, mirror_node) == cs;
752*e65e175bSOded Gabbay if ((hdev->timeout_jiffies != MAX_SCHEDULE_TIMEOUT) &&
753*e65e175bSOded Gabbay first_entry && cs_needs_timeout(cs)) {
754*e65e175bSOded Gabbay cs->tdr_active = true;
755*e65e175bSOded Gabbay schedule_delayed_work(&cs->work_tdr, cs->timeout_jiffies);
756*e65e175bSOded Gabbay
757*e65e175bSOded Gabbay }
758*e65e175bSOded Gabbay
759*e65e175bSOded Gabbay spin_unlock(&hdev->cs_mirror_lock);
760*e65e175bSOded Gabbay
761*e65e175bSOded Gabbay list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node)
762*e65e175bSOded Gabbay switch (job->queue_type) {
763*e65e175bSOded Gabbay case QUEUE_TYPE_EXT:
764*e65e175bSOded Gabbay ext_queue_schedule_job(job);
765*e65e175bSOded Gabbay break;
766*e65e175bSOded Gabbay case QUEUE_TYPE_INT:
767*e65e175bSOded Gabbay int_queue_schedule_job(job);
768*e65e175bSOded Gabbay break;
769*e65e175bSOded Gabbay case QUEUE_TYPE_HW:
770*e65e175bSOded Gabbay hw_queue_schedule_job(job);
771*e65e175bSOded Gabbay break;
772*e65e175bSOded Gabbay default:
773*e65e175bSOded Gabbay break;
774*e65e175bSOded Gabbay }
775*e65e175bSOded Gabbay
776*e65e175bSOded Gabbay cs->submitted = true;
777*e65e175bSOded Gabbay
778*e65e175bSOded Gabbay goto out;
779*e65e175bSOded Gabbay
780*e65e175bSOded Gabbay unlock_cs_mirror:
781*e65e175bSOded Gabbay spin_unlock(&hdev->cs_mirror_lock);
782*e65e175bSOded Gabbay unroll_cq_resv:
783*e65e175bSOded Gabbay q = &hdev->kernel_queues[0];
784*e65e175bSOded Gabbay for (i = 0 ; (i < max_queues) && (cq_cnt > 0) ; i++, q++) {
785*e65e175bSOded Gabbay if ((q->queue_type == QUEUE_TYPE_EXT) &&
786*e65e175bSOded Gabbay (cs->jobs_in_queue_cnt[i])) {
787*e65e175bSOded Gabbay atomic_t *free_slots =
788*e65e175bSOded Gabbay &hdev->completion_queue[i].free_slots_cnt;
789*e65e175bSOded Gabbay atomic_add(cs->jobs_in_queue_cnt[i], free_slots);
790*e65e175bSOded Gabbay cq_cnt--;
791*e65e175bSOded Gabbay }
792*e65e175bSOded Gabbay }
793*e65e175bSOded Gabbay
794*e65e175bSOded Gabbay out:
795*e65e175bSOded Gabbay hdev->asic_funcs->hw_queues_unlock(hdev);
796*e65e175bSOded Gabbay
797*e65e175bSOded Gabbay return rc;
798*e65e175bSOded Gabbay }
799*e65e175bSOded Gabbay
800*e65e175bSOded Gabbay /*
801*e65e175bSOded Gabbay * hl_hw_queue_inc_ci_kernel - increment ci for kernel's queue
802*e65e175bSOded Gabbay *
803*e65e175bSOded Gabbay * @hdev: pointer to hl_device structure
804*e65e175bSOded Gabbay * @hw_queue_id: which queue to increment its ci
805*e65e175bSOded Gabbay */
hl_hw_queue_inc_ci_kernel(struct hl_device * hdev,u32 hw_queue_id)806*e65e175bSOded Gabbay void hl_hw_queue_inc_ci_kernel(struct hl_device *hdev, u32 hw_queue_id)
807*e65e175bSOded Gabbay {
808*e65e175bSOded Gabbay struct hl_hw_queue *q = &hdev->kernel_queues[hw_queue_id];
809*e65e175bSOded Gabbay
810*e65e175bSOded Gabbay atomic_inc(&q->ci);
811*e65e175bSOded Gabbay }
812*e65e175bSOded Gabbay
ext_and_cpu_queue_init(struct hl_device * hdev,struct hl_hw_queue * q,bool is_cpu_queue)813*e65e175bSOded Gabbay static int ext_and_cpu_queue_init(struct hl_device *hdev, struct hl_hw_queue *q,
814*e65e175bSOded Gabbay bool is_cpu_queue)
815*e65e175bSOded Gabbay {
816*e65e175bSOded Gabbay void *p;
817*e65e175bSOded Gabbay int rc;
818*e65e175bSOded Gabbay
819*e65e175bSOded Gabbay if (is_cpu_queue)
820*e65e175bSOded Gabbay p = hl_cpu_accessible_dma_pool_alloc(hdev, HL_QUEUE_SIZE_IN_BYTES, &q->bus_address);
821*e65e175bSOded Gabbay else
822*e65e175bSOded Gabbay p = hl_asic_dma_alloc_coherent(hdev, HL_QUEUE_SIZE_IN_BYTES, &q->bus_address,
823*e65e175bSOded Gabbay GFP_KERNEL | __GFP_ZERO);
824*e65e175bSOded Gabbay if (!p)
825*e65e175bSOded Gabbay return -ENOMEM;
826*e65e175bSOded Gabbay
827*e65e175bSOded Gabbay q->kernel_address = p;
828*e65e175bSOded Gabbay
829*e65e175bSOded Gabbay q->shadow_queue = kmalloc_array(HL_QUEUE_LENGTH, sizeof(struct hl_cs_job *), GFP_KERNEL);
830*e65e175bSOded Gabbay if (!q->shadow_queue) {
831*e65e175bSOded Gabbay dev_err(hdev->dev,
832*e65e175bSOded Gabbay "Failed to allocate shadow queue for H/W queue %d\n",
833*e65e175bSOded Gabbay q->hw_queue_id);
834*e65e175bSOded Gabbay rc = -ENOMEM;
835*e65e175bSOded Gabbay goto free_queue;
836*e65e175bSOded Gabbay }
837*e65e175bSOded Gabbay
838*e65e175bSOded Gabbay /* Make sure read/write pointers are initialized to start of queue */
839*e65e175bSOded Gabbay atomic_set(&q->ci, 0);
840*e65e175bSOded Gabbay q->pi = 0;
841*e65e175bSOded Gabbay
842*e65e175bSOded Gabbay return 0;
843*e65e175bSOded Gabbay
844*e65e175bSOded Gabbay free_queue:
845*e65e175bSOded Gabbay if (is_cpu_queue)
846*e65e175bSOded Gabbay hl_cpu_accessible_dma_pool_free(hdev, HL_QUEUE_SIZE_IN_BYTES, q->kernel_address);
847*e65e175bSOded Gabbay else
848*e65e175bSOded Gabbay hl_asic_dma_free_coherent(hdev, HL_QUEUE_SIZE_IN_BYTES, q->kernel_address,
849*e65e175bSOded Gabbay q->bus_address);
850*e65e175bSOded Gabbay
851*e65e175bSOded Gabbay return rc;
852*e65e175bSOded Gabbay }
853*e65e175bSOded Gabbay
int_queue_init(struct hl_device * hdev,struct hl_hw_queue * q)854*e65e175bSOded Gabbay static int int_queue_init(struct hl_device *hdev, struct hl_hw_queue *q)
855*e65e175bSOded Gabbay {
856*e65e175bSOded Gabbay void *p;
857*e65e175bSOded Gabbay
858*e65e175bSOded Gabbay p = hdev->asic_funcs->get_int_queue_base(hdev, q->hw_queue_id,
859*e65e175bSOded Gabbay &q->bus_address, &q->int_queue_len);
860*e65e175bSOded Gabbay if (!p) {
861*e65e175bSOded Gabbay dev_err(hdev->dev,
862*e65e175bSOded Gabbay "Failed to get base address for internal queue %d\n",
863*e65e175bSOded Gabbay q->hw_queue_id);
864*e65e175bSOded Gabbay return -EFAULT;
865*e65e175bSOded Gabbay }
866*e65e175bSOded Gabbay
867*e65e175bSOded Gabbay q->kernel_address = p;
868*e65e175bSOded Gabbay q->pi = 0;
869*e65e175bSOded Gabbay atomic_set(&q->ci, 0);
870*e65e175bSOded Gabbay
871*e65e175bSOded Gabbay return 0;
872*e65e175bSOded Gabbay }
873*e65e175bSOded Gabbay
cpu_queue_init(struct hl_device * hdev,struct hl_hw_queue * q)874*e65e175bSOded Gabbay static int cpu_queue_init(struct hl_device *hdev, struct hl_hw_queue *q)
875*e65e175bSOded Gabbay {
876*e65e175bSOded Gabbay return ext_and_cpu_queue_init(hdev, q, true);
877*e65e175bSOded Gabbay }
878*e65e175bSOded Gabbay
ext_queue_init(struct hl_device * hdev,struct hl_hw_queue * q)879*e65e175bSOded Gabbay static int ext_queue_init(struct hl_device *hdev, struct hl_hw_queue *q)
880*e65e175bSOded Gabbay {
881*e65e175bSOded Gabbay return ext_and_cpu_queue_init(hdev, q, false);
882*e65e175bSOded Gabbay }
883*e65e175bSOded Gabbay
hw_queue_init(struct hl_device * hdev,struct hl_hw_queue * q)884*e65e175bSOded Gabbay static int hw_queue_init(struct hl_device *hdev, struct hl_hw_queue *q)
885*e65e175bSOded Gabbay {
886*e65e175bSOded Gabbay void *p;
887*e65e175bSOded Gabbay
888*e65e175bSOded Gabbay p = hl_asic_dma_alloc_coherent(hdev, HL_QUEUE_SIZE_IN_BYTES, &q->bus_address,
889*e65e175bSOded Gabbay GFP_KERNEL | __GFP_ZERO);
890*e65e175bSOded Gabbay if (!p)
891*e65e175bSOded Gabbay return -ENOMEM;
892*e65e175bSOded Gabbay
893*e65e175bSOded Gabbay q->kernel_address = p;
894*e65e175bSOded Gabbay
895*e65e175bSOded Gabbay /* Make sure read/write pointers are initialized to start of queue */
896*e65e175bSOded Gabbay atomic_set(&q->ci, 0);
897*e65e175bSOded Gabbay q->pi = 0;
898*e65e175bSOded Gabbay
899*e65e175bSOded Gabbay return 0;
900*e65e175bSOded Gabbay }
901*e65e175bSOded Gabbay
sync_stream_queue_init(struct hl_device * hdev,u32 q_idx)902*e65e175bSOded Gabbay static void sync_stream_queue_init(struct hl_device *hdev, u32 q_idx)
903*e65e175bSOded Gabbay {
904*e65e175bSOded Gabbay struct hl_sync_stream_properties *sync_stream_prop;
905*e65e175bSOded Gabbay struct asic_fixed_properties *prop = &hdev->asic_prop;
906*e65e175bSOded Gabbay struct hl_hw_sob *hw_sob;
907*e65e175bSOded Gabbay int sob, reserved_mon_idx, queue_idx;
908*e65e175bSOded Gabbay
909*e65e175bSOded Gabbay sync_stream_prop = &hdev->kernel_queues[q_idx].sync_stream_prop;
910*e65e175bSOded Gabbay
911*e65e175bSOded Gabbay /* We use 'collective_mon_idx' as a running index in order to reserve
912*e65e175bSOded Gabbay * monitors for collective master/slave queues.
913*e65e175bSOded Gabbay * collective master queue gets 2 reserved monitors
914*e65e175bSOded Gabbay * collective slave queue gets 1 reserved monitor
915*e65e175bSOded Gabbay */
916*e65e175bSOded Gabbay if (hdev->kernel_queues[q_idx].collective_mode ==
917*e65e175bSOded Gabbay HL_COLLECTIVE_MASTER) {
918*e65e175bSOded Gabbay reserved_mon_idx = hdev->collective_mon_idx;
919*e65e175bSOded Gabbay
920*e65e175bSOded Gabbay /* reserve the first monitor for collective master queue */
921*e65e175bSOded Gabbay sync_stream_prop->collective_mstr_mon_id[0] =
922*e65e175bSOded Gabbay prop->collective_first_mon + reserved_mon_idx;
923*e65e175bSOded Gabbay
924*e65e175bSOded Gabbay /* reserve the second monitor for collective master queue */
925*e65e175bSOded Gabbay sync_stream_prop->collective_mstr_mon_id[1] =
926*e65e175bSOded Gabbay prop->collective_first_mon + reserved_mon_idx + 1;
927*e65e175bSOded Gabbay
928*e65e175bSOded Gabbay hdev->collective_mon_idx += HL_COLLECTIVE_RSVD_MSTR_MONS;
929*e65e175bSOded Gabbay } else if (hdev->kernel_queues[q_idx].collective_mode ==
930*e65e175bSOded Gabbay HL_COLLECTIVE_SLAVE) {
931*e65e175bSOded Gabbay reserved_mon_idx = hdev->collective_mon_idx++;
932*e65e175bSOded Gabbay
933*e65e175bSOded Gabbay /* reserve a monitor for collective slave queue */
934*e65e175bSOded Gabbay sync_stream_prop->collective_slave_mon_id =
935*e65e175bSOded Gabbay prop->collective_first_mon + reserved_mon_idx;
936*e65e175bSOded Gabbay }
937*e65e175bSOded Gabbay
938*e65e175bSOded Gabbay if (!hdev->kernel_queues[q_idx].supports_sync_stream)
939*e65e175bSOded Gabbay return;
940*e65e175bSOded Gabbay
941*e65e175bSOded Gabbay queue_idx = hdev->sync_stream_queue_idx++;
942*e65e175bSOded Gabbay
943*e65e175bSOded Gabbay sync_stream_prop->base_sob_id = prop->sync_stream_first_sob +
944*e65e175bSOded Gabbay (queue_idx * HL_RSVD_SOBS);
945*e65e175bSOded Gabbay sync_stream_prop->base_mon_id = prop->sync_stream_first_mon +
946*e65e175bSOded Gabbay (queue_idx * HL_RSVD_MONS);
947*e65e175bSOded Gabbay sync_stream_prop->next_sob_val = 1;
948*e65e175bSOded Gabbay sync_stream_prop->curr_sob_offset = 0;
949*e65e175bSOded Gabbay
950*e65e175bSOded Gabbay for (sob = 0 ; sob < HL_RSVD_SOBS ; sob++) {
951*e65e175bSOded Gabbay hw_sob = &sync_stream_prop->hw_sob[sob];
952*e65e175bSOded Gabbay hw_sob->hdev = hdev;
953*e65e175bSOded Gabbay hw_sob->sob_id = sync_stream_prop->base_sob_id + sob;
954*e65e175bSOded Gabbay hw_sob->sob_addr =
955*e65e175bSOded Gabbay hdev->asic_funcs->get_sob_addr(hdev, hw_sob->sob_id);
956*e65e175bSOded Gabbay hw_sob->q_idx = q_idx;
957*e65e175bSOded Gabbay kref_init(&hw_sob->kref);
958*e65e175bSOded Gabbay }
959*e65e175bSOded Gabbay }
960*e65e175bSOded Gabbay
sync_stream_queue_reset(struct hl_device * hdev,u32 q_idx)961*e65e175bSOded Gabbay static void sync_stream_queue_reset(struct hl_device *hdev, u32 q_idx)
962*e65e175bSOded Gabbay {
963*e65e175bSOded Gabbay struct hl_sync_stream_properties *prop =
964*e65e175bSOded Gabbay &hdev->kernel_queues[q_idx].sync_stream_prop;
965*e65e175bSOded Gabbay
966*e65e175bSOded Gabbay /*
967*e65e175bSOded Gabbay * In case we got here due to a stuck CS, the refcnt might be bigger
968*e65e175bSOded Gabbay * than 1 and therefore we reset it.
969*e65e175bSOded Gabbay */
970*e65e175bSOded Gabbay kref_init(&prop->hw_sob[prop->curr_sob_offset].kref);
971*e65e175bSOded Gabbay prop->curr_sob_offset = 0;
972*e65e175bSOded Gabbay prop->next_sob_val = 1;
973*e65e175bSOded Gabbay }
974*e65e175bSOded Gabbay
975*e65e175bSOded Gabbay /*
976*e65e175bSOded Gabbay * queue_init - main initialization function for H/W queue object
977*e65e175bSOded Gabbay *
978*e65e175bSOded Gabbay * @hdev: pointer to hl_device device structure
979*e65e175bSOded Gabbay * @q: pointer to hl_hw_queue queue structure
980*e65e175bSOded Gabbay * @hw_queue_id: The id of the H/W queue
981*e65e175bSOded Gabbay *
982*e65e175bSOded Gabbay * Allocate dma-able memory for the queue and initialize fields
983*e65e175bSOded Gabbay * Returns 0 on success
984*e65e175bSOded Gabbay */
queue_init(struct hl_device * hdev,struct hl_hw_queue * q,u32 hw_queue_id)985*e65e175bSOded Gabbay static int queue_init(struct hl_device *hdev, struct hl_hw_queue *q,
986*e65e175bSOded Gabbay u32 hw_queue_id)
987*e65e175bSOded Gabbay {
988*e65e175bSOded Gabbay int rc;
989*e65e175bSOded Gabbay
990*e65e175bSOded Gabbay q->hw_queue_id = hw_queue_id;
991*e65e175bSOded Gabbay
992*e65e175bSOded Gabbay switch (q->queue_type) {
993*e65e175bSOded Gabbay case QUEUE_TYPE_EXT:
994*e65e175bSOded Gabbay rc = ext_queue_init(hdev, q);
995*e65e175bSOded Gabbay break;
996*e65e175bSOded Gabbay case QUEUE_TYPE_INT:
997*e65e175bSOded Gabbay rc = int_queue_init(hdev, q);
998*e65e175bSOded Gabbay break;
999*e65e175bSOded Gabbay case QUEUE_TYPE_CPU:
1000*e65e175bSOded Gabbay rc = cpu_queue_init(hdev, q);
1001*e65e175bSOded Gabbay break;
1002*e65e175bSOded Gabbay case QUEUE_TYPE_HW:
1003*e65e175bSOded Gabbay rc = hw_queue_init(hdev, q);
1004*e65e175bSOded Gabbay break;
1005*e65e175bSOded Gabbay case QUEUE_TYPE_NA:
1006*e65e175bSOded Gabbay q->valid = 0;
1007*e65e175bSOded Gabbay return 0;
1008*e65e175bSOded Gabbay default:
1009*e65e175bSOded Gabbay dev_crit(hdev->dev, "wrong queue type %d during init\n",
1010*e65e175bSOded Gabbay q->queue_type);
1011*e65e175bSOded Gabbay rc = -EINVAL;
1012*e65e175bSOded Gabbay break;
1013*e65e175bSOded Gabbay }
1014*e65e175bSOded Gabbay
1015*e65e175bSOded Gabbay sync_stream_queue_init(hdev, q->hw_queue_id);
1016*e65e175bSOded Gabbay
1017*e65e175bSOded Gabbay if (rc)
1018*e65e175bSOded Gabbay return rc;
1019*e65e175bSOded Gabbay
1020*e65e175bSOded Gabbay q->valid = 1;
1021*e65e175bSOded Gabbay
1022*e65e175bSOded Gabbay return 0;
1023*e65e175bSOded Gabbay }
1024*e65e175bSOded Gabbay
1025*e65e175bSOded Gabbay /*
1026*e65e175bSOded Gabbay * hw_queue_fini - destroy queue
1027*e65e175bSOded Gabbay *
1028*e65e175bSOded Gabbay * @hdev: pointer to hl_device device structure
1029*e65e175bSOded Gabbay * @q: pointer to hl_hw_queue queue structure
1030*e65e175bSOded Gabbay *
1031*e65e175bSOded Gabbay * Free the queue memory
1032*e65e175bSOded Gabbay */
queue_fini(struct hl_device * hdev,struct hl_hw_queue * q)1033*e65e175bSOded Gabbay static void queue_fini(struct hl_device *hdev, struct hl_hw_queue *q)
1034*e65e175bSOded Gabbay {
1035*e65e175bSOded Gabbay if (!q->valid)
1036*e65e175bSOded Gabbay return;
1037*e65e175bSOded Gabbay
1038*e65e175bSOded Gabbay /*
1039*e65e175bSOded Gabbay * If we arrived here, there are no jobs waiting on this queue
1040*e65e175bSOded Gabbay * so we can safely remove it.
1041*e65e175bSOded Gabbay * This is because this function can only called when:
1042*e65e175bSOded Gabbay * 1. Either a context is deleted, which only can occur if all its
1043*e65e175bSOded Gabbay * jobs were finished
1044*e65e175bSOded Gabbay * 2. A context wasn't able to be created due to failure or timeout,
1045*e65e175bSOded Gabbay * which means there are no jobs on the queue yet
1046*e65e175bSOded Gabbay *
1047*e65e175bSOded Gabbay * The only exception are the queues of the kernel context, but
1048*e65e175bSOded Gabbay * if they are being destroyed, it means that the entire module is
1049*e65e175bSOded Gabbay * being removed. If the module is removed, it means there is no open
1050*e65e175bSOded Gabbay * user context. It also means that if a job was submitted by
1051*e65e175bSOded Gabbay * the kernel driver (e.g. context creation), the job itself was
1052*e65e175bSOded Gabbay * released by the kernel driver when a timeout occurred on its
1053*e65e175bSOded Gabbay * Completion. Thus, we don't need to release it again.
1054*e65e175bSOded Gabbay */
1055*e65e175bSOded Gabbay
1056*e65e175bSOded Gabbay if (q->queue_type == QUEUE_TYPE_INT)
1057*e65e175bSOded Gabbay return;
1058*e65e175bSOded Gabbay
1059*e65e175bSOded Gabbay kfree(q->shadow_queue);
1060*e65e175bSOded Gabbay
1061*e65e175bSOded Gabbay if (q->queue_type == QUEUE_TYPE_CPU)
1062*e65e175bSOded Gabbay hl_cpu_accessible_dma_pool_free(hdev, HL_QUEUE_SIZE_IN_BYTES, q->kernel_address);
1063*e65e175bSOded Gabbay else
1064*e65e175bSOded Gabbay hl_asic_dma_free_coherent(hdev, HL_QUEUE_SIZE_IN_BYTES, q->kernel_address,
1065*e65e175bSOded Gabbay q->bus_address);
1066*e65e175bSOded Gabbay }
1067*e65e175bSOded Gabbay
hl_hw_queues_create(struct hl_device * hdev)1068*e65e175bSOded Gabbay int hl_hw_queues_create(struct hl_device *hdev)
1069*e65e175bSOded Gabbay {
1070*e65e175bSOded Gabbay struct asic_fixed_properties *asic = &hdev->asic_prop;
1071*e65e175bSOded Gabbay struct hl_hw_queue *q;
1072*e65e175bSOded Gabbay int i, rc, q_ready_cnt;
1073*e65e175bSOded Gabbay
1074*e65e175bSOded Gabbay hdev->kernel_queues = kcalloc(asic->max_queues,
1075*e65e175bSOded Gabbay sizeof(*hdev->kernel_queues), GFP_KERNEL);
1076*e65e175bSOded Gabbay
1077*e65e175bSOded Gabbay if (!hdev->kernel_queues) {
1078*e65e175bSOded Gabbay dev_err(hdev->dev, "Not enough memory for H/W queues\n");
1079*e65e175bSOded Gabbay return -ENOMEM;
1080*e65e175bSOded Gabbay }
1081*e65e175bSOded Gabbay
1082*e65e175bSOded Gabbay /* Initialize the H/W queues */
1083*e65e175bSOded Gabbay for (i = 0, q_ready_cnt = 0, q = hdev->kernel_queues;
1084*e65e175bSOded Gabbay i < asic->max_queues ; i++, q_ready_cnt++, q++) {
1085*e65e175bSOded Gabbay
1086*e65e175bSOded Gabbay q->queue_type = asic->hw_queues_props[i].type;
1087*e65e175bSOded Gabbay q->supports_sync_stream =
1088*e65e175bSOded Gabbay asic->hw_queues_props[i].supports_sync_stream;
1089*e65e175bSOded Gabbay q->collective_mode = asic->hw_queues_props[i].collective_mode;
1090*e65e175bSOded Gabbay rc = queue_init(hdev, q, i);
1091*e65e175bSOded Gabbay if (rc) {
1092*e65e175bSOded Gabbay dev_err(hdev->dev,
1093*e65e175bSOded Gabbay "failed to initialize queue %d\n", i);
1094*e65e175bSOded Gabbay goto release_queues;
1095*e65e175bSOded Gabbay }
1096*e65e175bSOded Gabbay }
1097*e65e175bSOded Gabbay
1098*e65e175bSOded Gabbay return 0;
1099*e65e175bSOded Gabbay
1100*e65e175bSOded Gabbay release_queues:
1101*e65e175bSOded Gabbay for (i = 0, q = hdev->kernel_queues ; i < q_ready_cnt ; i++, q++)
1102*e65e175bSOded Gabbay queue_fini(hdev, q);
1103*e65e175bSOded Gabbay
1104*e65e175bSOded Gabbay kfree(hdev->kernel_queues);
1105*e65e175bSOded Gabbay
1106*e65e175bSOded Gabbay return rc;
1107*e65e175bSOded Gabbay }
1108*e65e175bSOded Gabbay
hl_hw_queues_destroy(struct hl_device * hdev)1109*e65e175bSOded Gabbay void hl_hw_queues_destroy(struct hl_device *hdev)
1110*e65e175bSOded Gabbay {
1111*e65e175bSOded Gabbay struct hl_hw_queue *q;
1112*e65e175bSOded Gabbay u32 max_queues = hdev->asic_prop.max_queues;
1113*e65e175bSOded Gabbay int i;
1114*e65e175bSOded Gabbay
1115*e65e175bSOded Gabbay for (i = 0, q = hdev->kernel_queues ; i < max_queues ; i++, q++)
1116*e65e175bSOded Gabbay queue_fini(hdev, q);
1117*e65e175bSOded Gabbay
1118*e65e175bSOded Gabbay kfree(hdev->kernel_queues);
1119*e65e175bSOded Gabbay }
1120*e65e175bSOded Gabbay
hl_hw_queue_reset(struct hl_device * hdev,bool hard_reset)1121*e65e175bSOded Gabbay void hl_hw_queue_reset(struct hl_device *hdev, bool hard_reset)
1122*e65e175bSOded Gabbay {
1123*e65e175bSOded Gabbay struct hl_hw_queue *q;
1124*e65e175bSOded Gabbay u32 max_queues = hdev->asic_prop.max_queues;
1125*e65e175bSOded Gabbay int i;
1126*e65e175bSOded Gabbay
1127*e65e175bSOded Gabbay for (i = 0, q = hdev->kernel_queues ; i < max_queues ; i++, q++) {
1128*e65e175bSOded Gabbay if ((!q->valid) ||
1129*e65e175bSOded Gabbay ((!hard_reset) && (q->queue_type == QUEUE_TYPE_CPU)))
1130*e65e175bSOded Gabbay continue;
1131*e65e175bSOded Gabbay q->pi = 0;
1132*e65e175bSOded Gabbay atomic_set(&q->ci, 0);
1133*e65e175bSOded Gabbay
1134*e65e175bSOded Gabbay if (q->supports_sync_stream)
1135*e65e175bSOded Gabbay sync_stream_queue_reset(hdev, q->hw_queue_id);
1136*e65e175bSOded Gabbay }
1137*e65e175bSOded Gabbay }
1138