1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (c) 2022 Qualcomm Innovation Center. All rights reserved. 4 * 5 * Authors: 6 * Asutosh Das <quic_asutoshd@quicinc.com> 7 * Can Guo <quic_cang@quicinc.com> 8 */ 9 10 #include <asm/unaligned.h> 11 #include <linux/dma-mapping.h> 12 #include <linux/module.h> 13 #include <linux/platform_device.h> 14 #include "ufshcd-priv.h" 15 16 #define MAX_QUEUE_SUP GENMASK(7, 0) 17 #define UFS_MCQ_MIN_RW_QUEUES 2 18 #define UFS_MCQ_MIN_READ_QUEUES 0 19 #define UFS_MCQ_NUM_DEV_CMD_QUEUES 1 20 #define UFS_MCQ_MIN_POLL_QUEUES 0 21 #define QUEUE_EN_OFFSET 31 22 #define QUEUE_ID_OFFSET 16 23 24 #define MAX_DEV_CMD_ENTRIES 2 25 #define MCQ_CFG_MAC_MASK GENMASK(16, 8) 26 #define MCQ_QCFG_SIZE 0x40 27 #define MCQ_ENTRY_SIZE_IN_DWORD 8 28 #define CQE_UCD_BA GENMASK_ULL(63, 7) 29 30 static int rw_queue_count_set(const char *val, const struct kernel_param *kp) 31 { 32 return param_set_uint_minmax(val, kp, UFS_MCQ_MIN_RW_QUEUES, 33 num_possible_cpus()); 34 } 35 36 static const struct kernel_param_ops rw_queue_count_ops = { 37 .set = rw_queue_count_set, 38 .get = param_get_uint, 39 }; 40 41 static unsigned int rw_queues; 42 module_param_cb(rw_queues, &rw_queue_count_ops, &rw_queues, 0644); 43 MODULE_PARM_DESC(rw_queues, 44 "Number of interrupt driven I/O queues used for rw. Default value is nr_cpus"); 45 46 static int read_queue_count_set(const char *val, const struct kernel_param *kp) 47 { 48 return param_set_uint_minmax(val, kp, UFS_MCQ_MIN_READ_QUEUES, 49 num_possible_cpus()); 50 } 51 52 static const struct kernel_param_ops read_queue_count_ops = { 53 .set = read_queue_count_set, 54 .get = param_get_uint, 55 }; 56 57 static unsigned int read_queues; 58 module_param_cb(read_queues, &read_queue_count_ops, &read_queues, 0644); 59 MODULE_PARM_DESC(read_queues, 60 "Number of interrupt driven read queues used for read. Default value is 0"); 61 62 static int poll_queue_count_set(const char *val, const struct kernel_param *kp) 63 { 64 return param_set_uint_minmax(val, kp, UFS_MCQ_MIN_POLL_QUEUES, 65 num_possible_cpus()); 66 } 67 68 static const struct kernel_param_ops poll_queue_count_ops = { 69 .set = poll_queue_count_set, 70 .get = param_get_uint, 71 }; 72 73 static unsigned int poll_queues = 1; 74 module_param_cb(poll_queues, &poll_queue_count_ops, &poll_queues, 0644); 75 MODULE_PARM_DESC(poll_queues, 76 "Number of poll queues used for r/w. Default value is 1"); 77 78 /** 79 * ufshcd_mcq_config_mac - Set the #Max Activ Cmds. 80 * @hba: per adapter instance 81 * @max_active_cmds: maximum # of active commands to the device at any time. 82 * 83 * The controller won't send more than the max_active_cmds to the device at 84 * any time. 85 */ 86 void ufshcd_mcq_config_mac(struct ufs_hba *hba, u32 max_active_cmds) 87 { 88 u32 val; 89 90 val = ufshcd_readl(hba, REG_UFS_MCQ_CFG); 91 val &= ~MCQ_CFG_MAC_MASK; 92 val |= FIELD_PREP(MCQ_CFG_MAC_MASK, max_active_cmds); 93 ufshcd_writel(hba, val, REG_UFS_MCQ_CFG); 94 } 95 96 /** 97 * ufshcd_mcq_req_to_hwq - find the hardware queue on which the 98 * request would be issued. 99 * @hba: per adapter instance 100 * @req: pointer to the request to be issued 101 * 102 * Returns the hardware queue instance on which the request would 103 * be queued. 104 */ 105 struct ufs_hw_queue *ufshcd_mcq_req_to_hwq(struct ufs_hba *hba, 106 struct request *req) 107 { 108 u32 utag = blk_mq_unique_tag(req); 109 u32 hwq = blk_mq_unique_tag_to_hwq(utag); 110 111 /* uhq[0] is used to serve device commands */ 112 return &hba->uhq[hwq + UFSHCD_MCQ_IO_QUEUE_OFFSET]; 113 } 114 115 /** 116 * ufshcd_mcq_decide_queue_depth - decide the queue depth 117 * @hba: per adapter instance 118 * 119 * Returns queue-depth on success, non-zero on error 120 * 121 * MAC - Max. Active Command of the Host Controller (HC) 122 * HC wouldn't send more than this commands to the device. 123 * It is mandatory to implement get_hba_mac() to enable MCQ mode. 124 * Calculates and adjusts the queue depth based on the depth 125 * supported by the HC and ufs device. 126 */ 127 int ufshcd_mcq_decide_queue_depth(struct ufs_hba *hba) 128 { 129 int mac; 130 131 /* Mandatory to implement get_hba_mac() */ 132 mac = ufshcd_mcq_vops_get_hba_mac(hba); 133 if (mac < 0) { 134 dev_err(hba->dev, "Failed to get mac, err=%d\n", mac); 135 return mac; 136 } 137 138 WARN_ON_ONCE(!hba->dev_info.bqueuedepth); 139 /* 140 * max. value of bqueuedepth = 256, mac is host dependent. 141 * It is mandatory for UFS device to define bQueueDepth if 142 * shared queuing architecture is enabled. 143 */ 144 return min_t(int, mac, hba->dev_info.bqueuedepth); 145 } 146 147 static int ufshcd_mcq_config_nr_queues(struct ufs_hba *hba) 148 { 149 int i; 150 u32 hba_maxq, rem, tot_queues; 151 struct Scsi_Host *host = hba->host; 152 153 /* maxq is 0 based value */ 154 hba_maxq = FIELD_GET(MAX_QUEUE_SUP, hba->mcq_capabilities) + 1; 155 156 tot_queues = UFS_MCQ_NUM_DEV_CMD_QUEUES + read_queues + poll_queues + 157 rw_queues; 158 159 if (hba_maxq < tot_queues) { 160 dev_err(hba->dev, "Total queues (%d) exceeds HC capacity (%d)\n", 161 tot_queues, hba_maxq); 162 return -EOPNOTSUPP; 163 } 164 165 rem = hba_maxq - UFS_MCQ_NUM_DEV_CMD_QUEUES; 166 167 if (rw_queues) { 168 hba->nr_queues[HCTX_TYPE_DEFAULT] = rw_queues; 169 rem -= hba->nr_queues[HCTX_TYPE_DEFAULT]; 170 } else { 171 rw_queues = num_possible_cpus(); 172 } 173 174 if (poll_queues) { 175 hba->nr_queues[HCTX_TYPE_POLL] = poll_queues; 176 rem -= hba->nr_queues[HCTX_TYPE_POLL]; 177 } 178 179 if (read_queues) { 180 hba->nr_queues[HCTX_TYPE_READ] = read_queues; 181 rem -= hba->nr_queues[HCTX_TYPE_READ]; 182 } 183 184 if (!hba->nr_queues[HCTX_TYPE_DEFAULT]) 185 hba->nr_queues[HCTX_TYPE_DEFAULT] = min3(rem, rw_queues, 186 num_possible_cpus()); 187 188 for (i = 0; i < HCTX_MAX_TYPES; i++) 189 host->nr_hw_queues += hba->nr_queues[i]; 190 191 hba->nr_hw_queues = host->nr_hw_queues + UFS_MCQ_NUM_DEV_CMD_QUEUES; 192 return 0; 193 } 194 195 int ufshcd_mcq_memory_alloc(struct ufs_hba *hba) 196 { 197 struct ufs_hw_queue *hwq; 198 size_t utrdl_size, cqe_size; 199 int i; 200 201 for (i = 0; i < hba->nr_hw_queues; i++) { 202 hwq = &hba->uhq[i]; 203 204 utrdl_size = sizeof(struct utp_transfer_req_desc) * 205 hwq->max_entries; 206 hwq->sqe_base_addr = dmam_alloc_coherent(hba->dev, utrdl_size, 207 &hwq->sqe_dma_addr, 208 GFP_KERNEL); 209 if (!hwq->sqe_dma_addr) { 210 dev_err(hba->dev, "SQE allocation failed\n"); 211 return -ENOMEM; 212 } 213 214 cqe_size = sizeof(struct cq_entry) * hwq->max_entries; 215 hwq->cqe_base_addr = dmam_alloc_coherent(hba->dev, cqe_size, 216 &hwq->cqe_dma_addr, 217 GFP_KERNEL); 218 if (!hwq->cqe_dma_addr) { 219 dev_err(hba->dev, "CQE allocation failed\n"); 220 return -ENOMEM; 221 } 222 } 223 224 return 0; 225 } 226 227 228 /* Operation and runtime registers configuration */ 229 #define MCQ_CFG_n(r, i) ((r) + MCQ_QCFG_SIZE * (i)) 230 #define MCQ_OPR_OFFSET_n(p, i) \ 231 (hba->mcq_opr[(p)].offset + hba->mcq_opr[(p)].stride * (i)) 232 233 static void __iomem *mcq_opr_base(struct ufs_hba *hba, 234 enum ufshcd_mcq_opr n, int i) 235 { 236 struct ufshcd_mcq_opr_info_t *opr = &hba->mcq_opr[n]; 237 238 return opr->base + opr->stride * i; 239 } 240 241 u32 ufshcd_mcq_read_cqis(struct ufs_hba *hba, int i) 242 { 243 return readl(mcq_opr_base(hba, OPR_CQIS, i) + REG_CQIS); 244 } 245 246 void ufshcd_mcq_write_cqis(struct ufs_hba *hba, u32 val, int i) 247 { 248 writel(val, mcq_opr_base(hba, OPR_CQIS, i) + REG_CQIS); 249 } 250 EXPORT_SYMBOL_GPL(ufshcd_mcq_write_cqis); 251 252 /* 253 * Current MCQ specification doesn't provide a Task Tag or its equivalent in 254 * the Completion Queue Entry. Find the Task Tag using an indirect method. 255 */ 256 static int ufshcd_mcq_get_tag(struct ufs_hba *hba, 257 struct ufs_hw_queue *hwq, 258 struct cq_entry *cqe) 259 { 260 u64 addr; 261 262 /* sizeof(struct utp_transfer_cmd_desc) must be a multiple of 128 */ 263 BUILD_BUG_ON(sizeof(struct utp_transfer_cmd_desc) & GENMASK(6, 0)); 264 265 /* Bits 63:7 UCD base address, 6:5 are reserved, 4:0 is SQ ID */ 266 addr = (le64_to_cpu(cqe->command_desc_base_addr) & CQE_UCD_BA) - 267 hba->ucdl_dma_addr; 268 269 return div_u64(addr, ufshcd_get_ucd_size(hba)); 270 } 271 272 static void ufshcd_mcq_process_cqe(struct ufs_hba *hba, 273 struct ufs_hw_queue *hwq) 274 { 275 struct cq_entry *cqe = ufshcd_mcq_cur_cqe(hwq); 276 int tag = ufshcd_mcq_get_tag(hba, hwq, cqe); 277 278 ufshcd_compl_one_cqe(hba, tag, cqe); 279 } 280 281 unsigned long ufshcd_mcq_poll_cqe_nolock(struct ufs_hba *hba, 282 struct ufs_hw_queue *hwq) 283 { 284 unsigned long completed_reqs = 0; 285 286 ufshcd_mcq_update_cq_tail_slot(hwq); 287 while (!ufshcd_mcq_is_cq_empty(hwq)) { 288 ufshcd_mcq_process_cqe(hba, hwq); 289 ufshcd_mcq_inc_cq_head_slot(hwq); 290 completed_reqs++; 291 } 292 293 if (completed_reqs) 294 ufshcd_mcq_update_cq_head(hwq); 295 296 return completed_reqs; 297 } 298 EXPORT_SYMBOL_GPL(ufshcd_mcq_poll_cqe_nolock); 299 300 unsigned long ufshcd_mcq_poll_cqe_lock(struct ufs_hba *hba, 301 struct ufs_hw_queue *hwq) 302 { 303 unsigned long completed_reqs, flags; 304 305 spin_lock_irqsave(&hwq->cq_lock, flags); 306 completed_reqs = ufshcd_mcq_poll_cqe_nolock(hba, hwq); 307 spin_unlock_irqrestore(&hwq->cq_lock, flags); 308 309 return completed_reqs; 310 } 311 312 void ufshcd_mcq_make_queues_operational(struct ufs_hba *hba) 313 { 314 struct ufs_hw_queue *hwq; 315 u16 qsize; 316 int i; 317 318 for (i = 0; i < hba->nr_hw_queues; i++) { 319 hwq = &hba->uhq[i]; 320 hwq->id = i; 321 qsize = hwq->max_entries * MCQ_ENTRY_SIZE_IN_DWORD - 1; 322 323 /* Submission Queue Lower Base Address */ 324 ufsmcq_writelx(hba, lower_32_bits(hwq->sqe_dma_addr), 325 MCQ_CFG_n(REG_SQLBA, i)); 326 /* Submission Queue Upper Base Address */ 327 ufsmcq_writelx(hba, upper_32_bits(hwq->sqe_dma_addr), 328 MCQ_CFG_n(REG_SQUBA, i)); 329 /* Submission Queue Doorbell Address Offset */ 330 ufsmcq_writelx(hba, MCQ_OPR_OFFSET_n(OPR_SQD, i), 331 MCQ_CFG_n(REG_SQDAO, i)); 332 /* Submission Queue Interrupt Status Address Offset */ 333 ufsmcq_writelx(hba, MCQ_OPR_OFFSET_n(OPR_SQIS, i), 334 MCQ_CFG_n(REG_SQISAO, i)); 335 336 /* Completion Queue Lower Base Address */ 337 ufsmcq_writelx(hba, lower_32_bits(hwq->cqe_dma_addr), 338 MCQ_CFG_n(REG_CQLBA, i)); 339 /* Completion Queue Upper Base Address */ 340 ufsmcq_writelx(hba, upper_32_bits(hwq->cqe_dma_addr), 341 MCQ_CFG_n(REG_CQUBA, i)); 342 /* Completion Queue Doorbell Address Offset */ 343 ufsmcq_writelx(hba, MCQ_OPR_OFFSET_n(OPR_CQD, i), 344 MCQ_CFG_n(REG_CQDAO, i)); 345 /* Completion Queue Interrupt Status Address Offset */ 346 ufsmcq_writelx(hba, MCQ_OPR_OFFSET_n(OPR_CQIS, i), 347 MCQ_CFG_n(REG_CQISAO, i)); 348 349 /* Save the base addresses for quicker access */ 350 hwq->mcq_sq_head = mcq_opr_base(hba, OPR_SQD, i) + REG_SQHP; 351 hwq->mcq_sq_tail = mcq_opr_base(hba, OPR_SQD, i) + REG_SQTP; 352 hwq->mcq_cq_head = mcq_opr_base(hba, OPR_CQD, i) + REG_CQHP; 353 hwq->mcq_cq_tail = mcq_opr_base(hba, OPR_CQD, i) + REG_CQTP; 354 355 /* Reinitializing is needed upon HC reset */ 356 hwq->sq_tail_slot = hwq->cq_tail_slot = hwq->cq_head_slot = 0; 357 358 /* Enable Tail Entry Push Status interrupt only for non-poll queues */ 359 if (i < hba->nr_hw_queues - hba->nr_queues[HCTX_TYPE_POLL]) 360 writel(1, mcq_opr_base(hba, OPR_CQIS, i) + REG_CQIE); 361 362 /* Completion Queue Enable|Size to Completion Queue Attribute */ 363 ufsmcq_writel(hba, (1 << QUEUE_EN_OFFSET) | qsize, 364 MCQ_CFG_n(REG_CQATTR, i)); 365 366 /* 367 * Submission Qeueue Enable|Size|Completion Queue ID to 368 * Submission Queue Attribute 369 */ 370 ufsmcq_writel(hba, (1 << QUEUE_EN_OFFSET) | qsize | 371 (i << QUEUE_ID_OFFSET), 372 MCQ_CFG_n(REG_SQATTR, i)); 373 } 374 } 375 376 void ufshcd_mcq_enable_esi(struct ufs_hba *hba) 377 { 378 ufshcd_writel(hba, ufshcd_readl(hba, REG_UFS_MEM_CFG) | 0x2, 379 REG_UFS_MEM_CFG); 380 } 381 EXPORT_SYMBOL_GPL(ufshcd_mcq_enable_esi); 382 383 void ufshcd_mcq_config_esi(struct ufs_hba *hba, struct msi_msg *msg) 384 { 385 ufshcd_writel(hba, msg->address_lo, REG_UFS_ESILBA); 386 ufshcd_writel(hba, msg->address_hi, REG_UFS_ESIUBA); 387 } 388 EXPORT_SYMBOL_GPL(ufshcd_mcq_config_esi); 389 390 int ufshcd_mcq_init(struct ufs_hba *hba) 391 { 392 struct Scsi_Host *host = hba->host; 393 struct ufs_hw_queue *hwq; 394 int ret, i; 395 396 ret = ufshcd_mcq_config_nr_queues(hba); 397 if (ret) 398 return ret; 399 400 ret = ufshcd_vops_mcq_config_resource(hba); 401 if (ret) 402 return ret; 403 404 ret = ufshcd_mcq_vops_op_runtime_config(hba); 405 if (ret) { 406 dev_err(hba->dev, "Operation runtime config failed, ret=%d\n", 407 ret); 408 return ret; 409 } 410 hba->uhq = devm_kzalloc(hba->dev, 411 hba->nr_hw_queues * sizeof(struct ufs_hw_queue), 412 GFP_KERNEL); 413 if (!hba->uhq) { 414 dev_err(hba->dev, "ufs hw queue memory allocation failed\n"); 415 return -ENOMEM; 416 } 417 418 for (i = 0; i < hba->nr_hw_queues; i++) { 419 hwq = &hba->uhq[i]; 420 hwq->max_entries = hba->nutrs; 421 spin_lock_init(&hwq->sq_lock); 422 spin_lock_init(&hwq->cq_lock); 423 } 424 425 /* The very first HW queue serves device commands */ 426 hba->dev_cmd_queue = &hba->uhq[0]; 427 /* Give dev_cmd_queue the minimal number of entries */ 428 hba->dev_cmd_queue->max_entries = MAX_DEV_CMD_ENTRIES; 429 430 host->host_tagset = 1; 431 return 0; 432 } 433