xref: /openbmc/linux/drivers/ufs/core/ufs-mcq.c (revision 128f20c0)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (c) 2022 Qualcomm Innovation Center. All rights reserved.
4  *
5  * Authors:
6  *	Asutosh Das <quic_asutoshd@quicinc.com>
7  *	Can Guo <quic_cang@quicinc.com>
8  */
9 
10 #include <asm/unaligned.h>
11 #include <linux/dma-mapping.h>
12 #include <linux/module.h>
13 #include <linux/platform_device.h>
14 #include "ufshcd-priv.h"
15 
16 #define MAX_QUEUE_SUP GENMASK(7, 0)
17 #define UFS_MCQ_MIN_RW_QUEUES 2
18 #define UFS_MCQ_MIN_READ_QUEUES 0
19 #define UFS_MCQ_NUM_DEV_CMD_QUEUES 1
20 #define UFS_MCQ_MIN_POLL_QUEUES 0
21 #define QUEUE_EN_OFFSET 31
22 #define QUEUE_ID_OFFSET 16
23 
24 #define MAX_DEV_CMD_ENTRIES	2
25 #define MCQ_CFG_MAC_MASK	GENMASK(16, 8)
26 #define MCQ_QCFG_SIZE		0x40
27 #define MCQ_ENTRY_SIZE_IN_DWORD	8
28 #define CQE_UCD_BA GENMASK_ULL(63, 7)
29 
30 static int rw_queue_count_set(const char *val, const struct kernel_param *kp)
31 {
32 	return param_set_uint_minmax(val, kp, UFS_MCQ_MIN_RW_QUEUES,
33 				     num_possible_cpus());
34 }
35 
36 static const struct kernel_param_ops rw_queue_count_ops = {
37 	.set = rw_queue_count_set,
38 	.get = param_get_uint,
39 };
40 
41 static unsigned int rw_queues;
42 module_param_cb(rw_queues, &rw_queue_count_ops, &rw_queues, 0644);
43 MODULE_PARM_DESC(rw_queues,
44 		 "Number of interrupt driven I/O queues used for rw. Default value is nr_cpus");
45 
46 static int read_queue_count_set(const char *val, const struct kernel_param *kp)
47 {
48 	return param_set_uint_minmax(val, kp, UFS_MCQ_MIN_READ_QUEUES,
49 				     num_possible_cpus());
50 }
51 
52 static const struct kernel_param_ops read_queue_count_ops = {
53 	.set = read_queue_count_set,
54 	.get = param_get_uint,
55 };
56 
57 static unsigned int read_queues;
58 module_param_cb(read_queues, &read_queue_count_ops, &read_queues, 0644);
59 MODULE_PARM_DESC(read_queues,
60 		 "Number of interrupt driven read queues used for read. Default value is 0");
61 
62 static int poll_queue_count_set(const char *val, const struct kernel_param *kp)
63 {
64 	return param_set_uint_minmax(val, kp, UFS_MCQ_MIN_POLL_QUEUES,
65 				     num_possible_cpus());
66 }
67 
68 static const struct kernel_param_ops poll_queue_count_ops = {
69 	.set = poll_queue_count_set,
70 	.get = param_get_uint,
71 };
72 
73 static unsigned int poll_queues = 1;
74 module_param_cb(poll_queues, &poll_queue_count_ops, &poll_queues, 0644);
75 MODULE_PARM_DESC(poll_queues,
76 		 "Number of poll queues used for r/w. Default value is 1");
77 
78 /**
79  * ufshcd_mcq_config_mac - Set the #Max Activ Cmds.
80  * @hba: per adapter instance
81  * @max_active_cmds: maximum # of active commands to the device at any time.
82  *
83  * The controller won't send more than the max_active_cmds to the device at
84  * any time.
85  */
86 void ufshcd_mcq_config_mac(struct ufs_hba *hba, u32 max_active_cmds)
87 {
88 	u32 val;
89 
90 	val = ufshcd_readl(hba, REG_UFS_MCQ_CFG);
91 	val &= ~MCQ_CFG_MAC_MASK;
92 	val |= FIELD_PREP(MCQ_CFG_MAC_MASK, max_active_cmds);
93 	ufshcd_writel(hba, val, REG_UFS_MCQ_CFG);
94 }
95 
96 /**
97  * ufshcd_mcq_req_to_hwq - find the hardware queue on which the
98  * request would be issued.
99  * @hba: per adapter instance
100  * @req: pointer to the request to be issued
101  *
102  * Returns the hardware queue instance on which the request would
103  * be queued.
104  */
105 struct ufs_hw_queue *ufshcd_mcq_req_to_hwq(struct ufs_hba *hba,
106 					 struct request *req)
107 {
108 	u32 utag = blk_mq_unique_tag(req);
109 	u32 hwq = blk_mq_unique_tag_to_hwq(utag);
110 
111 	/* uhq[0] is used to serve device commands */
112 	return &hba->uhq[hwq + UFSHCD_MCQ_IO_QUEUE_OFFSET];
113 }
114 
115 /**
116  * ufshcd_mcq_decide_queue_depth - decide the queue depth
117  * @hba: per adapter instance
118  *
119  * Returns queue-depth on success, non-zero on error
120  *
121  * MAC - Max. Active Command of the Host Controller (HC)
122  * HC wouldn't send more than this commands to the device.
123  * It is mandatory to implement get_hba_mac() to enable MCQ mode.
124  * Calculates and adjusts the queue depth based on the depth
125  * supported by the HC and ufs device.
126  */
127 int ufshcd_mcq_decide_queue_depth(struct ufs_hba *hba)
128 {
129 	int mac;
130 
131 	/* Mandatory to implement get_hba_mac() */
132 	mac = ufshcd_mcq_vops_get_hba_mac(hba);
133 	if (mac < 0) {
134 		dev_err(hba->dev, "Failed to get mac, err=%d\n", mac);
135 		return mac;
136 	}
137 
138 	WARN_ON_ONCE(!hba->dev_info.bqueuedepth);
139 	/*
140 	 * max. value of bqueuedepth = 256, mac is host dependent.
141 	 * It is mandatory for UFS device to define bQueueDepth if
142 	 * shared queuing architecture is enabled.
143 	 */
144 	return min_t(int, mac, hba->dev_info.bqueuedepth);
145 }
146 
147 static int ufshcd_mcq_config_nr_queues(struct ufs_hba *hba)
148 {
149 	int i;
150 	u32 hba_maxq, rem, tot_queues;
151 	struct Scsi_Host *host = hba->host;
152 
153 	/* maxq is 0 based value */
154 	hba_maxq = FIELD_GET(MAX_QUEUE_SUP, hba->mcq_capabilities) + 1;
155 
156 	tot_queues = UFS_MCQ_NUM_DEV_CMD_QUEUES + read_queues + poll_queues +
157 			rw_queues;
158 
159 	if (hba_maxq < tot_queues) {
160 		dev_err(hba->dev, "Total queues (%d) exceeds HC capacity (%d)\n",
161 			tot_queues, hba_maxq);
162 		return -EOPNOTSUPP;
163 	}
164 
165 	rem = hba_maxq - UFS_MCQ_NUM_DEV_CMD_QUEUES;
166 
167 	if (rw_queues) {
168 		hba->nr_queues[HCTX_TYPE_DEFAULT] = rw_queues;
169 		rem -= hba->nr_queues[HCTX_TYPE_DEFAULT];
170 	} else {
171 		rw_queues = num_possible_cpus();
172 	}
173 
174 	if (poll_queues) {
175 		hba->nr_queues[HCTX_TYPE_POLL] = poll_queues;
176 		rem -= hba->nr_queues[HCTX_TYPE_POLL];
177 	}
178 
179 	if (read_queues) {
180 		hba->nr_queues[HCTX_TYPE_READ] = read_queues;
181 		rem -= hba->nr_queues[HCTX_TYPE_READ];
182 	}
183 
184 	if (!hba->nr_queues[HCTX_TYPE_DEFAULT])
185 		hba->nr_queues[HCTX_TYPE_DEFAULT] = min3(rem, rw_queues,
186 							 num_possible_cpus());
187 
188 	for (i = 0; i < HCTX_MAX_TYPES; i++)
189 		host->nr_hw_queues += hba->nr_queues[i];
190 
191 	hba->nr_hw_queues = host->nr_hw_queues + UFS_MCQ_NUM_DEV_CMD_QUEUES;
192 	return 0;
193 }
194 
195 int ufshcd_mcq_memory_alloc(struct ufs_hba *hba)
196 {
197 	struct ufs_hw_queue *hwq;
198 	size_t utrdl_size, cqe_size;
199 	int i;
200 
201 	for (i = 0; i < hba->nr_hw_queues; i++) {
202 		hwq = &hba->uhq[i];
203 
204 		utrdl_size = sizeof(struct utp_transfer_req_desc) *
205 			     hwq->max_entries;
206 		hwq->sqe_base_addr = dmam_alloc_coherent(hba->dev, utrdl_size,
207 							 &hwq->sqe_dma_addr,
208 							 GFP_KERNEL);
209 		if (!hwq->sqe_dma_addr) {
210 			dev_err(hba->dev, "SQE allocation failed\n");
211 			return -ENOMEM;
212 		}
213 
214 		cqe_size = sizeof(struct cq_entry) * hwq->max_entries;
215 		hwq->cqe_base_addr = dmam_alloc_coherent(hba->dev, cqe_size,
216 							 &hwq->cqe_dma_addr,
217 							 GFP_KERNEL);
218 		if (!hwq->cqe_dma_addr) {
219 			dev_err(hba->dev, "CQE allocation failed\n");
220 			return -ENOMEM;
221 		}
222 	}
223 
224 	return 0;
225 }
226 
227 
228 /* Operation and runtime registers configuration */
229 #define MCQ_CFG_n(r, i)	((r) + MCQ_QCFG_SIZE * (i))
230 #define MCQ_OPR_OFFSET_n(p, i) \
231 	(hba->mcq_opr[(p)].offset + hba->mcq_opr[(p)].stride * (i))
232 
233 static void __iomem *mcq_opr_base(struct ufs_hba *hba,
234 					 enum ufshcd_mcq_opr n, int i)
235 {
236 	struct ufshcd_mcq_opr_info_t *opr = &hba->mcq_opr[n];
237 
238 	return opr->base + opr->stride * i;
239 }
240 
241 u32 ufshcd_mcq_read_cqis(struct ufs_hba *hba, int i)
242 {
243 	return readl(mcq_opr_base(hba, OPR_CQIS, i) + REG_CQIS);
244 }
245 
246 void ufshcd_mcq_write_cqis(struct ufs_hba *hba, u32 val, int i)
247 {
248 	writel(val, mcq_opr_base(hba, OPR_CQIS, i) + REG_CQIS);
249 }
250 EXPORT_SYMBOL_GPL(ufshcd_mcq_write_cqis);
251 
252 /*
253  * Current MCQ specification doesn't provide a Task Tag or its equivalent in
254  * the Completion Queue Entry. Find the Task Tag using an indirect method.
255  */
256 static int ufshcd_mcq_get_tag(struct ufs_hba *hba,
257 				     struct ufs_hw_queue *hwq,
258 				     struct cq_entry *cqe)
259 {
260 	u64 addr;
261 
262 	/* sizeof(struct utp_transfer_cmd_desc) must be a multiple of 128 */
263 	BUILD_BUG_ON(sizeof(struct utp_transfer_cmd_desc) & GENMASK(6, 0));
264 
265 	/* Bits 63:7 UCD base address, 6:5 are reserved, 4:0 is SQ ID */
266 	addr = (le64_to_cpu(cqe->command_desc_base_addr) & CQE_UCD_BA) -
267 		hba->ucdl_dma_addr;
268 
269 	return div_u64(addr, ufshcd_get_ucd_size(hba));
270 }
271 
272 static void ufshcd_mcq_process_cqe(struct ufs_hba *hba,
273 					    struct ufs_hw_queue *hwq)
274 {
275 	struct cq_entry *cqe = ufshcd_mcq_cur_cqe(hwq);
276 	int tag = ufshcd_mcq_get_tag(hba, hwq, cqe);
277 
278 	ufshcd_compl_one_cqe(hba, tag, cqe);
279 }
280 
281 unsigned long ufshcd_mcq_poll_cqe_nolock(struct ufs_hba *hba,
282 					 struct ufs_hw_queue *hwq)
283 {
284 	unsigned long completed_reqs = 0;
285 
286 	ufshcd_mcq_update_cq_tail_slot(hwq);
287 	while (!ufshcd_mcq_is_cq_empty(hwq)) {
288 		ufshcd_mcq_process_cqe(hba, hwq);
289 		ufshcd_mcq_inc_cq_head_slot(hwq);
290 		completed_reqs++;
291 	}
292 
293 	if (completed_reqs)
294 		ufshcd_mcq_update_cq_head(hwq);
295 
296 	return completed_reqs;
297 }
298 EXPORT_SYMBOL_GPL(ufshcd_mcq_poll_cqe_nolock);
299 
300 unsigned long ufshcd_mcq_poll_cqe_lock(struct ufs_hba *hba,
301 				       struct ufs_hw_queue *hwq)
302 {
303 	unsigned long completed_reqs, flags;
304 
305 	spin_lock_irqsave(&hwq->cq_lock, flags);
306 	completed_reqs = ufshcd_mcq_poll_cqe_nolock(hba, hwq);
307 	spin_unlock_irqrestore(&hwq->cq_lock, flags);
308 
309 	return completed_reqs;
310 }
311 
312 void ufshcd_mcq_make_queues_operational(struct ufs_hba *hba)
313 {
314 	struct ufs_hw_queue *hwq;
315 	u16 qsize;
316 	int i;
317 
318 	for (i = 0; i < hba->nr_hw_queues; i++) {
319 		hwq = &hba->uhq[i];
320 		hwq->id = i;
321 		qsize = hwq->max_entries * MCQ_ENTRY_SIZE_IN_DWORD - 1;
322 
323 		/* Submission Queue Lower Base Address */
324 		ufsmcq_writelx(hba, lower_32_bits(hwq->sqe_dma_addr),
325 			      MCQ_CFG_n(REG_SQLBA, i));
326 		/* Submission Queue Upper Base Address */
327 		ufsmcq_writelx(hba, upper_32_bits(hwq->sqe_dma_addr),
328 			      MCQ_CFG_n(REG_SQUBA, i));
329 		/* Submission Queue Doorbell Address Offset */
330 		ufsmcq_writelx(hba, MCQ_OPR_OFFSET_n(OPR_SQD, i),
331 			      MCQ_CFG_n(REG_SQDAO, i));
332 		/* Submission Queue Interrupt Status Address Offset */
333 		ufsmcq_writelx(hba, MCQ_OPR_OFFSET_n(OPR_SQIS, i),
334 			      MCQ_CFG_n(REG_SQISAO, i));
335 
336 		/* Completion Queue Lower Base Address */
337 		ufsmcq_writelx(hba, lower_32_bits(hwq->cqe_dma_addr),
338 			      MCQ_CFG_n(REG_CQLBA, i));
339 		/* Completion Queue Upper Base Address */
340 		ufsmcq_writelx(hba, upper_32_bits(hwq->cqe_dma_addr),
341 			      MCQ_CFG_n(REG_CQUBA, i));
342 		/* Completion Queue Doorbell Address Offset */
343 		ufsmcq_writelx(hba, MCQ_OPR_OFFSET_n(OPR_CQD, i),
344 			      MCQ_CFG_n(REG_CQDAO, i));
345 		/* Completion Queue Interrupt Status Address Offset */
346 		ufsmcq_writelx(hba, MCQ_OPR_OFFSET_n(OPR_CQIS, i),
347 			      MCQ_CFG_n(REG_CQISAO, i));
348 
349 		/* Save the base addresses for quicker access */
350 		hwq->mcq_sq_head = mcq_opr_base(hba, OPR_SQD, i) + REG_SQHP;
351 		hwq->mcq_sq_tail = mcq_opr_base(hba, OPR_SQD, i) + REG_SQTP;
352 		hwq->mcq_cq_head = mcq_opr_base(hba, OPR_CQD, i) + REG_CQHP;
353 		hwq->mcq_cq_tail = mcq_opr_base(hba, OPR_CQD, i) + REG_CQTP;
354 
355 		/* Reinitializing is needed upon HC reset */
356 		hwq->sq_tail_slot = hwq->cq_tail_slot = hwq->cq_head_slot = 0;
357 
358 		/* Enable Tail Entry Push Status interrupt only for non-poll queues */
359 		if (i < hba->nr_hw_queues - hba->nr_queues[HCTX_TYPE_POLL])
360 			writel(1, mcq_opr_base(hba, OPR_CQIS, i) + REG_CQIE);
361 
362 		/* Completion Queue Enable|Size to Completion Queue Attribute */
363 		ufsmcq_writel(hba, (1 << QUEUE_EN_OFFSET) | qsize,
364 			      MCQ_CFG_n(REG_CQATTR, i));
365 
366 		/*
367 		 * Submission Qeueue Enable|Size|Completion Queue ID to
368 		 * Submission Queue Attribute
369 		 */
370 		ufsmcq_writel(hba, (1 << QUEUE_EN_OFFSET) | qsize |
371 			      (i << QUEUE_ID_OFFSET),
372 			      MCQ_CFG_n(REG_SQATTR, i));
373 	}
374 }
375 
376 void ufshcd_mcq_enable_esi(struct ufs_hba *hba)
377 {
378 	ufshcd_writel(hba, ufshcd_readl(hba, REG_UFS_MEM_CFG) | 0x2,
379 		      REG_UFS_MEM_CFG);
380 }
381 EXPORT_SYMBOL_GPL(ufshcd_mcq_enable_esi);
382 
383 void ufshcd_mcq_config_esi(struct ufs_hba *hba, struct msi_msg *msg)
384 {
385 	ufshcd_writel(hba, msg->address_lo, REG_UFS_ESILBA);
386 	ufshcd_writel(hba, msg->address_hi, REG_UFS_ESIUBA);
387 }
388 EXPORT_SYMBOL_GPL(ufshcd_mcq_config_esi);
389 
390 int ufshcd_mcq_init(struct ufs_hba *hba)
391 {
392 	struct Scsi_Host *host = hba->host;
393 	struct ufs_hw_queue *hwq;
394 	int ret, i;
395 
396 	ret = ufshcd_mcq_config_nr_queues(hba);
397 	if (ret)
398 		return ret;
399 
400 	ret = ufshcd_vops_mcq_config_resource(hba);
401 	if (ret)
402 		return ret;
403 
404 	ret = ufshcd_mcq_vops_op_runtime_config(hba);
405 	if (ret) {
406 		dev_err(hba->dev, "Operation runtime config failed, ret=%d\n",
407 			ret);
408 		return ret;
409 	}
410 	hba->uhq = devm_kzalloc(hba->dev,
411 				hba->nr_hw_queues * sizeof(struct ufs_hw_queue),
412 				GFP_KERNEL);
413 	if (!hba->uhq) {
414 		dev_err(hba->dev, "ufs hw queue memory allocation failed\n");
415 		return -ENOMEM;
416 	}
417 
418 	for (i = 0; i < hba->nr_hw_queues; i++) {
419 		hwq = &hba->uhq[i];
420 		hwq->max_entries = hba->nutrs;
421 		spin_lock_init(&hwq->sq_lock);
422 		spin_lock_init(&hwq->cq_lock);
423 	}
424 
425 	/* The very first HW queue serves device commands */
426 	hba->dev_cmd_queue = &hba->uhq[0];
427 	/* Give dev_cmd_queue the minimal number of entries */
428 	hba->dev_cmd_queue->max_entries = MAX_DEV_CMD_ENTRIES;
429 
430 	host->host_tagset = 1;
431 	return 0;
432 }
433