1 /**********************************************************************
2  * Author: Cavium, Inc.
3  *
4  * Contact: support@cavium.com
5  *          Please include "LiquidIO" in the subject.
6  *
7  * Copyright (c) 2003-2016 Cavium, Inc.
8  *
9  * This file is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU General Public License, Version 2, as
11  * published by the Free Software Foundation.
12  *
13  * This file is distributed in the hope that it will be useful, but
14  * AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
15  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
16  * NONINFRINGEMENT.  See the GNU General Public License for more
17  * details.
18  **********************************************************************/
19 #include <linux/pci.h>
20 #include <linux/netdevice.h>
21 #include <linux/vmalloc.h>
22 #include "liquidio_common.h"
23 #include "octeon_droq.h"
24 #include "octeon_iq.h"
25 #include "response_manager.h"
26 #include "octeon_device.h"
27 #include "octeon_main.h"
28 #include "octeon_network.h"
29 #include "cn66xx_device.h"
30 #include "cn23xx_pf_device.h"
31 #include "cn23xx_vf_device.h"
32 
33 struct iq_post_status {
34 	int status;
35 	int index;
36 };
37 
38 static void check_db_timeout(struct work_struct *work);
39 static void  __check_db_timeout(struct octeon_device *oct, u64 iq_no);
40 
41 static void (*reqtype_free_fn[MAX_OCTEON_DEVICES][REQTYPE_LAST + 1]) (void *);
42 
43 /* Define this to return the request status comaptible to old code */
44 /*#define OCTEON_USE_OLD_REQ_STATUS*/
45 
46 /* Return 0 on success, 1 on failure */
47 int octeon_init_instr_queue(struct octeon_device *oct,
48 			    union oct_txpciq txpciq,
49 			    u32 num_descs)
50 {
51 	struct octeon_instr_queue *iq;
52 	struct octeon_iq_config *conf = NULL;
53 	u32 iq_no = (u32)txpciq.s.q_no;
54 	u32 q_size;
55 	struct cavium_wq *db_wq;
56 	int numa_node = dev_to_node(&oct->pci_dev->dev);
57 
58 	if (OCTEON_CN6XXX(oct))
59 		conf = &(CFG_GET_IQ_CFG(CHIP_CONF(oct, cn6xxx)));
60 	else if (OCTEON_CN23XX_PF(oct))
61 		conf = &(CFG_GET_IQ_CFG(CHIP_CONF(oct, cn23xx_pf)));
62 	else if (OCTEON_CN23XX_VF(oct))
63 		conf = &(CFG_GET_IQ_CFG(CHIP_CONF(oct, cn23xx_vf)));
64 
65 	if (!conf) {
66 		dev_err(&oct->pci_dev->dev, "Unsupported Chip %x\n",
67 			oct->chip_id);
68 		return 1;
69 	}
70 
71 	q_size = (u32)conf->instr_type * num_descs;
72 
73 	iq = oct->instr_queue[iq_no];
74 
75 	iq->oct_dev = oct;
76 
77 	iq->base_addr = lio_dma_alloc(oct, q_size, &iq->base_addr_dma);
78 	if (!iq->base_addr) {
79 		dev_err(&oct->pci_dev->dev, "Cannot allocate memory for instr queue %d\n",
80 			iq_no);
81 		return 1;
82 	}
83 
84 	iq->max_count = num_descs;
85 
86 	/* Initialize a list to holds requests that have been posted to Octeon
87 	 * but has yet to be fetched by octeon
88 	 */
89 	iq->request_list = vzalloc_node(array_size(num_descs, sizeof(*iq->request_list)),
90 					numa_node);
91 	if (!iq->request_list)
92 		iq->request_list = vzalloc(array_size(num_descs, sizeof(*iq->request_list)));
93 	if (!iq->request_list) {
94 		lio_dma_free(oct, q_size, iq->base_addr, iq->base_addr_dma);
95 		dev_err(&oct->pci_dev->dev, "Alloc failed for IQ[%d] nr free list\n",
96 			iq_no);
97 		return 1;
98 	}
99 
100 	dev_dbg(&oct->pci_dev->dev, "IQ[%d]: base: %p basedma: %pad count: %d\n",
101 		iq_no, iq->base_addr, &iq->base_addr_dma, iq->max_count);
102 
103 	iq->txpciq.u64 = txpciq.u64;
104 	iq->fill_threshold = (u32)conf->db_min;
105 	iq->fill_cnt = 0;
106 	iq->host_write_index = 0;
107 	iq->octeon_read_index = 0;
108 	iq->flush_index = 0;
109 	iq->last_db_time = 0;
110 	iq->do_auto_flush = 1;
111 	iq->db_timeout = (u32)conf->db_timeout;
112 	atomic_set(&iq->instr_pending, 0);
113 	iq->pkts_processed = 0;
114 
115 	/* Initialize the spinlock for this instruction queue */
116 	spin_lock_init(&iq->lock);
117 	if (iq_no == 0) {
118 		iq->allow_soft_cmds = true;
119 		spin_lock_init(&iq->post_lock);
120 	} else {
121 		iq->allow_soft_cmds = false;
122 	}
123 
124 	spin_lock_init(&iq->iq_flush_running_lock);
125 
126 	oct->io_qmask.iq |= BIT_ULL(iq_no);
127 
128 	/* Set the 32B/64B mode for each input queue */
129 	oct->io_qmask.iq64B |= ((conf->instr_type == 64) << iq_no);
130 	iq->iqcmd_64B = (conf->instr_type == 64);
131 
132 	oct->fn_list.setup_iq_regs(oct, iq_no);
133 
134 	oct->check_db_wq[iq_no].wq = alloc_workqueue("check_iq_db",
135 						     WQ_MEM_RECLAIM,
136 						     0);
137 	if (!oct->check_db_wq[iq_no].wq) {
138 		vfree(iq->request_list);
139 		iq->request_list = NULL;
140 		lio_dma_free(oct, q_size, iq->base_addr, iq->base_addr_dma);
141 		dev_err(&oct->pci_dev->dev, "check db wq create failed for iq %d\n",
142 			iq_no);
143 		return 1;
144 	}
145 
146 	db_wq = &oct->check_db_wq[iq_no];
147 
148 	INIT_DELAYED_WORK(&db_wq->wk.work, check_db_timeout);
149 	db_wq->wk.ctxptr = oct;
150 	db_wq->wk.ctxul = iq_no;
151 	queue_delayed_work(db_wq->wq, &db_wq->wk.work, msecs_to_jiffies(1));
152 
153 	return 0;
154 }
155 
156 int octeon_delete_instr_queue(struct octeon_device *oct, u32 iq_no)
157 {
158 	u64 desc_size = 0, q_size;
159 	struct octeon_instr_queue *iq = oct->instr_queue[iq_no];
160 
161 	cancel_delayed_work_sync(&oct->check_db_wq[iq_no].wk.work);
162 	destroy_workqueue(oct->check_db_wq[iq_no].wq);
163 
164 	if (OCTEON_CN6XXX(oct))
165 		desc_size =
166 		    CFG_GET_IQ_INSTR_TYPE(CHIP_CONF(oct, cn6xxx));
167 	else if (OCTEON_CN23XX_PF(oct))
168 		desc_size =
169 		    CFG_GET_IQ_INSTR_TYPE(CHIP_CONF(oct, cn23xx_pf));
170 	else if (OCTEON_CN23XX_VF(oct))
171 		desc_size =
172 		    CFG_GET_IQ_INSTR_TYPE(CHIP_CONF(oct, cn23xx_vf));
173 
174 	vfree(iq->request_list);
175 
176 	if (iq->base_addr) {
177 		q_size = iq->max_count * desc_size;
178 		lio_dma_free(oct, (u32)q_size, iq->base_addr,
179 			     iq->base_addr_dma);
180 		oct->io_qmask.iq &= ~(1ULL << iq_no);
181 		vfree(oct->instr_queue[iq_no]);
182 		oct->instr_queue[iq_no] = NULL;
183 		oct->num_iqs--;
184 		return 0;
185 	}
186 	return 1;
187 }
188 
189 /* Return 0 on success, 1 on failure */
190 int octeon_setup_iq(struct octeon_device *oct,
191 		    int ifidx,
192 		    int q_index,
193 		    union oct_txpciq txpciq,
194 		    u32 num_descs,
195 		    void *app_ctx)
196 {
197 	u32 iq_no = (u32)txpciq.s.q_no;
198 	int numa_node = dev_to_node(&oct->pci_dev->dev);
199 
200 	if (oct->instr_queue[iq_no]) {
201 		dev_dbg(&oct->pci_dev->dev, "IQ is in use. Cannot create the IQ: %d again\n",
202 			iq_no);
203 		oct->instr_queue[iq_no]->txpciq.u64 = txpciq.u64;
204 		oct->instr_queue[iq_no]->app_ctx = app_ctx;
205 		return 0;
206 	}
207 	oct->instr_queue[iq_no] =
208 	    vzalloc_node(sizeof(struct octeon_instr_queue), numa_node);
209 	if (!oct->instr_queue[iq_no])
210 		oct->instr_queue[iq_no] =
211 		    vzalloc(sizeof(struct octeon_instr_queue));
212 	if (!oct->instr_queue[iq_no])
213 		return 1;
214 
215 
216 	oct->instr_queue[iq_no]->q_index = q_index;
217 	oct->instr_queue[iq_no]->app_ctx = app_ctx;
218 	oct->instr_queue[iq_no]->ifidx = ifidx;
219 
220 	if (octeon_init_instr_queue(oct, txpciq, num_descs)) {
221 		vfree(oct->instr_queue[iq_no]);
222 		oct->instr_queue[iq_no] = NULL;
223 		return 1;
224 	}
225 
226 	oct->num_iqs++;
227 	if (oct->fn_list.enable_io_queues(oct)) {
228 		octeon_delete_instr_queue(oct, iq_no);
229 		return 1;
230 	}
231 
232 	return 0;
233 }
234 
235 int lio_wait_for_instr_fetch(struct octeon_device *oct)
236 {
237 	int i, retry = 1000, pending, instr_cnt = 0;
238 
239 	do {
240 		instr_cnt = 0;
241 
242 		for (i = 0; i < MAX_OCTEON_INSTR_QUEUES(oct); i++) {
243 			if (!(oct->io_qmask.iq & BIT_ULL(i)))
244 				continue;
245 			pending =
246 			    atomic_read(&oct->instr_queue[i]->instr_pending);
247 			if (pending)
248 				__check_db_timeout(oct, i);
249 			instr_cnt += pending;
250 		}
251 
252 		if (instr_cnt == 0)
253 			break;
254 
255 		schedule_timeout_uninterruptible(1);
256 
257 	} while (retry-- && instr_cnt);
258 
259 	return instr_cnt;
260 }
261 
262 static inline void
263 ring_doorbell(struct octeon_device *oct, struct octeon_instr_queue *iq)
264 {
265 	if (atomic_read(&oct->status) == OCT_DEV_RUNNING) {
266 		writel(iq->fill_cnt, iq->doorbell_reg);
267 		/* make sure doorbell write goes through */
268 		iq->fill_cnt = 0;
269 		iq->last_db_time = jiffies;
270 		return;
271 	}
272 }
273 
274 void
275 octeon_ring_doorbell_locked(struct octeon_device *oct, u32 iq_no)
276 {
277 	struct octeon_instr_queue *iq;
278 
279 	iq = oct->instr_queue[iq_no];
280 	spin_lock(&iq->post_lock);
281 	if (iq->fill_cnt)
282 		ring_doorbell(oct, iq);
283 	spin_unlock(&iq->post_lock);
284 }
285 
286 static inline void __copy_cmd_into_iq(struct octeon_instr_queue *iq,
287 				      u8 *cmd)
288 {
289 	u8 *iqptr, cmdsize;
290 
291 	cmdsize = ((iq->iqcmd_64B) ? 64 : 32);
292 	iqptr = iq->base_addr + (cmdsize * iq->host_write_index);
293 
294 	memcpy(iqptr, cmd, cmdsize);
295 }
296 
297 static inline struct iq_post_status
298 __post_command2(struct octeon_instr_queue *iq, u8 *cmd)
299 {
300 	struct iq_post_status st;
301 
302 	st.status = IQ_SEND_OK;
303 
304 	/* This ensures that the read index does not wrap around to the same
305 	 * position if queue gets full before Octeon could fetch any instr.
306 	 */
307 	if (atomic_read(&iq->instr_pending) >= (s32)(iq->max_count - 1)) {
308 		st.status = IQ_SEND_FAILED;
309 		st.index = -1;
310 		return st;
311 	}
312 
313 	if (atomic_read(&iq->instr_pending) >= (s32)(iq->max_count - 2))
314 		st.status = IQ_SEND_STOP;
315 
316 	__copy_cmd_into_iq(iq, cmd);
317 
318 	/* "index" is returned, host_write_index is modified. */
319 	st.index = iq->host_write_index;
320 	iq->host_write_index = incr_index(iq->host_write_index, 1,
321 					  iq->max_count);
322 	iq->fill_cnt++;
323 
324 	/* Flush the command into memory. We need to be sure the data is in
325 	 * memory before indicating that the instruction is pending.
326 	 */
327 	wmb();
328 
329 	atomic_inc(&iq->instr_pending);
330 
331 	return st;
332 }
333 
334 int
335 octeon_register_reqtype_free_fn(struct octeon_device *oct, int reqtype,
336 				void (*fn)(void *))
337 {
338 	if (reqtype > REQTYPE_LAST) {
339 		dev_err(&oct->pci_dev->dev, "%s: Invalid reqtype: %d\n",
340 			__func__, reqtype);
341 		return -EINVAL;
342 	}
343 
344 	reqtype_free_fn[oct->octeon_id][reqtype] = fn;
345 
346 	return 0;
347 }
348 
349 static inline void
350 __add_to_request_list(struct octeon_instr_queue *iq,
351 		      int idx, void *buf, int reqtype)
352 {
353 	iq->request_list[idx].buf = buf;
354 	iq->request_list[idx].reqtype = reqtype;
355 }
356 
357 /* Can only run in process context */
358 int
359 lio_process_iq_request_list(struct octeon_device *oct,
360 			    struct octeon_instr_queue *iq, u32 napi_budget)
361 {
362 	struct cavium_wq *cwq = &oct->dma_comp_wq;
363 	int reqtype;
364 	void *buf;
365 	u32 old = iq->flush_index;
366 	u32 inst_count = 0;
367 	unsigned int pkts_compl = 0, bytes_compl = 0;
368 	struct octeon_soft_command *sc;
369 	unsigned long flags;
370 
371 	while (old != iq->octeon_read_index) {
372 		reqtype = iq->request_list[old].reqtype;
373 		buf     = iq->request_list[old].buf;
374 
375 		if (reqtype == REQTYPE_NONE)
376 			goto skip_this;
377 
378 		octeon_update_tx_completion_counters(buf, reqtype, &pkts_compl,
379 						     &bytes_compl);
380 
381 		switch (reqtype) {
382 		case REQTYPE_NORESP_NET:
383 		case REQTYPE_NORESP_NET_SG:
384 		case REQTYPE_RESP_NET_SG:
385 			reqtype_free_fn[oct->octeon_id][reqtype](buf);
386 			break;
387 		case REQTYPE_RESP_NET:
388 		case REQTYPE_SOFT_COMMAND:
389 			sc = buf;
390 			/* We're expecting a response from Octeon.
391 			 * It's up to lio_process_ordered_list() to
392 			 * process  sc. Add sc to the ordered soft
393 			 * command response list because we expect
394 			 * a response from Octeon.
395 			 */
396 			spin_lock_irqsave(&oct->response_list
397 					  [OCTEON_ORDERED_SC_LIST].lock, flags);
398 			atomic_inc(&oct->response_list
399 				   [OCTEON_ORDERED_SC_LIST].pending_req_count);
400 			list_add_tail(&sc->node, &oct->response_list
401 				[OCTEON_ORDERED_SC_LIST].head);
402 			spin_unlock_irqrestore(&oct->response_list
403 					       [OCTEON_ORDERED_SC_LIST].lock,
404 					       flags);
405 			break;
406 		default:
407 			dev_err(&oct->pci_dev->dev,
408 				"%s Unknown reqtype: %d buf: %p at idx %d\n",
409 				__func__, reqtype, buf, old);
410 		}
411 
412 		iq->request_list[old].buf = NULL;
413 		iq->request_list[old].reqtype = 0;
414 
415  skip_this:
416 		inst_count++;
417 		old = incr_index(old, 1, iq->max_count);
418 
419 		if ((napi_budget) && (inst_count >= napi_budget))
420 			break;
421 	}
422 	if (bytes_compl)
423 		octeon_report_tx_completion_to_bql(iq->app_ctx, pkts_compl,
424 						   bytes_compl);
425 	iq->flush_index = old;
426 
427 	if (atomic_read(&oct->response_list
428 			[OCTEON_ORDERED_SC_LIST].pending_req_count))
429 		queue_work(cwq->wq, &cwq->wk.work.work);
430 
431 	return inst_count;
432 }
433 
434 /* Can only be called from process context */
435 int
436 octeon_flush_iq(struct octeon_device *oct, struct octeon_instr_queue *iq,
437 		u32 napi_budget)
438 {
439 	u32 inst_processed = 0;
440 	u32 tot_inst_processed = 0;
441 	int tx_done = 1;
442 
443 	if (!spin_trylock(&iq->iq_flush_running_lock))
444 		return tx_done;
445 
446 	spin_lock_bh(&iq->lock);
447 
448 	iq->octeon_read_index = oct->fn_list.update_iq_read_idx(iq);
449 
450 	do {
451 		/* Process any outstanding IQ packets. */
452 		if (iq->flush_index == iq->octeon_read_index)
453 			break;
454 
455 		if (napi_budget)
456 			inst_processed =
457 				lio_process_iq_request_list(oct, iq,
458 							    napi_budget -
459 							    tot_inst_processed);
460 		else
461 			inst_processed =
462 				lio_process_iq_request_list(oct, iq, 0);
463 
464 		if (inst_processed) {
465 			iq->pkts_processed += inst_processed;
466 			atomic_sub(inst_processed, &iq->instr_pending);
467 			iq->stats.instr_processed += inst_processed;
468 		}
469 
470 		tot_inst_processed += inst_processed;
471 	} while (tot_inst_processed < napi_budget);
472 
473 	if (napi_budget && (tot_inst_processed >= napi_budget))
474 		tx_done = 0;
475 
476 	iq->last_db_time = jiffies;
477 
478 	spin_unlock_bh(&iq->lock);
479 
480 	spin_unlock(&iq->iq_flush_running_lock);
481 
482 	return tx_done;
483 }
484 
485 /* Process instruction queue after timeout.
486  * This routine gets called from a workqueue or when removing the module.
487  */
488 static void __check_db_timeout(struct octeon_device *oct, u64 iq_no)
489 {
490 	struct octeon_instr_queue *iq;
491 	u64 next_time;
492 
493 	if (!oct)
494 		return;
495 
496 	iq = oct->instr_queue[iq_no];
497 	if (!iq)
498 		return;
499 
500 	/* return immediately, if no work pending */
501 	if (!atomic_read(&iq->instr_pending))
502 		return;
503 	/* If jiffies - last_db_time < db_timeout do nothing  */
504 	next_time = iq->last_db_time + iq->db_timeout;
505 	if (!time_after(jiffies, (unsigned long)next_time))
506 		return;
507 	iq->last_db_time = jiffies;
508 
509 	/* Flush the instruction queue */
510 	octeon_flush_iq(oct, iq, 0);
511 
512 	lio_enable_irq(NULL, iq);
513 }
514 
515 /* Called by the Poll thread at regular intervals to check the instruction
516  * queue for commands to be posted and for commands that were fetched by Octeon.
517  */
518 static void check_db_timeout(struct work_struct *work)
519 {
520 	struct cavium_wk *wk = (struct cavium_wk *)work;
521 	struct octeon_device *oct = (struct octeon_device *)wk->ctxptr;
522 	u64 iq_no = wk->ctxul;
523 	struct cavium_wq *db_wq = &oct->check_db_wq[iq_no];
524 	u32 delay = 10;
525 
526 	__check_db_timeout(oct, iq_no);
527 	queue_delayed_work(db_wq->wq, &db_wq->wk.work, msecs_to_jiffies(delay));
528 }
529 
530 int
531 octeon_send_command(struct octeon_device *oct, u32 iq_no,
532 		    u32 force_db, void *cmd, void *buf,
533 		    u32 datasize, u32 reqtype)
534 {
535 	int xmit_stopped;
536 	struct iq_post_status st;
537 	struct octeon_instr_queue *iq = oct->instr_queue[iq_no];
538 
539 	/* Get the lock and prevent other tasks and tx interrupt handler from
540 	 * running.
541 	 */
542 	if (iq->allow_soft_cmds)
543 		spin_lock_bh(&iq->post_lock);
544 
545 	st = __post_command2(iq, cmd);
546 
547 	if (st.status != IQ_SEND_FAILED) {
548 		xmit_stopped = octeon_report_sent_bytes_to_bql(buf, reqtype);
549 		__add_to_request_list(iq, st.index, buf, reqtype);
550 		INCR_INSTRQUEUE_PKT_COUNT(oct, iq_no, bytes_sent, datasize);
551 		INCR_INSTRQUEUE_PKT_COUNT(oct, iq_no, instr_posted, 1);
552 
553 		if (iq->fill_cnt >= MAX_OCTEON_FILL_COUNT || force_db ||
554 		    xmit_stopped || st.status == IQ_SEND_STOP)
555 			ring_doorbell(oct, iq);
556 	} else {
557 		INCR_INSTRQUEUE_PKT_COUNT(oct, iq_no, instr_dropped, 1);
558 	}
559 
560 	if (iq->allow_soft_cmds)
561 		spin_unlock_bh(&iq->post_lock);
562 
563 	/* This is only done here to expedite packets being flushed
564 	 * for cases where there are no IQ completion interrupts.
565 	 */
566 
567 	return st.status;
568 }
569 
570 void
571 octeon_prepare_soft_command(struct octeon_device *oct,
572 			    struct octeon_soft_command *sc,
573 			    u8 opcode,
574 			    u8 subcode,
575 			    u32 irh_ossp,
576 			    u64 ossp0,
577 			    u64 ossp1)
578 {
579 	struct octeon_config *oct_cfg;
580 	struct octeon_instr_ih2 *ih2;
581 	struct octeon_instr_ih3 *ih3;
582 	struct octeon_instr_pki_ih3 *pki_ih3;
583 	struct octeon_instr_irh *irh;
584 	struct octeon_instr_rdp *rdp;
585 
586 	WARN_ON(opcode > 15);
587 	WARN_ON(subcode > 127);
588 
589 	oct_cfg = octeon_get_conf(oct);
590 
591 	if (OCTEON_CN23XX_PF(oct) || OCTEON_CN23XX_VF(oct)) {
592 		ih3 = (struct octeon_instr_ih3 *)&sc->cmd.cmd3.ih3;
593 
594 		ih3->pkind = oct->instr_queue[sc->iq_no]->txpciq.s.pkind;
595 
596 		pki_ih3 = (struct octeon_instr_pki_ih3 *)&sc->cmd.cmd3.pki_ih3;
597 
598 		pki_ih3->w           = 1;
599 		pki_ih3->raw         = 1;
600 		pki_ih3->utag        = 1;
601 		pki_ih3->uqpg        =
602 			oct->instr_queue[sc->iq_no]->txpciq.s.use_qpg;
603 		pki_ih3->utt         = 1;
604 		pki_ih3->tag     = LIO_CONTROL;
605 		pki_ih3->tagtype = ATOMIC_TAG;
606 		pki_ih3->qpg         =
607 			oct->instr_queue[sc->iq_no]->txpciq.s.ctrl_qpg;
608 
609 		pki_ih3->pm          = 0x7;
610 		pki_ih3->sl          = 8;
611 
612 		if (sc->datasize)
613 			ih3->dlengsz = sc->datasize;
614 
615 		irh            = (struct octeon_instr_irh *)&sc->cmd.cmd3.irh;
616 		irh->opcode    = opcode;
617 		irh->subcode   = subcode;
618 
619 		/* opcode/subcode specific parameters (ossp) */
620 		irh->ossp       = irh_ossp;
621 		sc->cmd.cmd3.ossp[0] = ossp0;
622 		sc->cmd.cmd3.ossp[1] = ossp1;
623 
624 		if (sc->rdatasize) {
625 			rdp = (struct octeon_instr_rdp *)&sc->cmd.cmd3.rdp;
626 			rdp->pcie_port = oct->pcie_port;
627 			rdp->rlen      = sc->rdatasize;
628 
629 			irh->rflag =  1;
630 			/*PKI IH3*/
631 			/* pki_ih3 irh+ossp[0]+ossp[1]+rdp+rptr = 48 bytes */
632 			ih3->fsz    = LIO_SOFTCMDRESP_IH3;
633 		} else {
634 			irh->rflag =  0;
635 			/*PKI IH3*/
636 			/* pki_h3 + irh + ossp[0] + ossp[1] = 32 bytes */
637 			ih3->fsz    = LIO_PCICMD_O3;
638 		}
639 
640 	} else {
641 		ih2          = (struct octeon_instr_ih2 *)&sc->cmd.cmd2.ih2;
642 		ih2->tagtype = ATOMIC_TAG;
643 		ih2->tag     = LIO_CONTROL;
644 		ih2->raw     = 1;
645 		ih2->grp     = CFG_GET_CTRL_Q_GRP(oct_cfg);
646 
647 		if (sc->datasize) {
648 			ih2->dlengsz = sc->datasize;
649 			ih2->rs = 1;
650 		}
651 
652 		irh            = (struct octeon_instr_irh *)&sc->cmd.cmd2.irh;
653 		irh->opcode    = opcode;
654 		irh->subcode   = subcode;
655 
656 		/* opcode/subcode specific parameters (ossp) */
657 		irh->ossp       = irh_ossp;
658 		sc->cmd.cmd2.ossp[0] = ossp0;
659 		sc->cmd.cmd2.ossp[1] = ossp1;
660 
661 		if (sc->rdatasize) {
662 			rdp = (struct octeon_instr_rdp *)&sc->cmd.cmd2.rdp;
663 			rdp->pcie_port = oct->pcie_port;
664 			rdp->rlen      = sc->rdatasize;
665 
666 			irh->rflag =  1;
667 			/* irh+ossp[0]+ossp[1]+rdp+rptr = 40 bytes */
668 			ih2->fsz   = LIO_SOFTCMDRESP_IH2;
669 		} else {
670 			irh->rflag =  0;
671 			/* irh + ossp[0] + ossp[1] = 24 bytes */
672 			ih2->fsz   = LIO_PCICMD_O2;
673 		}
674 	}
675 }
676 
677 int octeon_send_soft_command(struct octeon_device *oct,
678 			     struct octeon_soft_command *sc)
679 {
680 	struct octeon_instr_queue *iq;
681 	struct octeon_instr_ih2 *ih2;
682 	struct octeon_instr_ih3 *ih3;
683 	struct octeon_instr_irh *irh;
684 	u32 len;
685 
686 	iq = oct->instr_queue[sc->iq_no];
687 	if (!iq->allow_soft_cmds) {
688 		dev_err(&oct->pci_dev->dev, "Soft commands are not allowed on Queue %d\n",
689 			sc->iq_no);
690 		INCR_INSTRQUEUE_PKT_COUNT(oct, sc->iq_no, instr_dropped, 1);
691 		return IQ_SEND_FAILED;
692 	}
693 
694 	if (OCTEON_CN23XX_PF(oct) || OCTEON_CN23XX_VF(oct)) {
695 		ih3 =  (struct octeon_instr_ih3 *)&sc->cmd.cmd3.ih3;
696 		if (ih3->dlengsz) {
697 			WARN_ON(!sc->dmadptr);
698 			sc->cmd.cmd3.dptr = sc->dmadptr;
699 		}
700 		irh = (struct octeon_instr_irh *)&sc->cmd.cmd3.irh;
701 		if (irh->rflag) {
702 			WARN_ON(!sc->dmarptr);
703 			WARN_ON(!sc->status_word);
704 			*sc->status_word = COMPLETION_WORD_INIT;
705 			sc->cmd.cmd3.rptr = sc->dmarptr;
706 		}
707 		len = (u32)ih3->dlengsz;
708 	} else {
709 		ih2 = (struct octeon_instr_ih2 *)&sc->cmd.cmd2.ih2;
710 		if (ih2->dlengsz) {
711 			WARN_ON(!sc->dmadptr);
712 			sc->cmd.cmd2.dptr = sc->dmadptr;
713 		}
714 		irh = (struct octeon_instr_irh *)&sc->cmd.cmd2.irh;
715 		if (irh->rflag) {
716 			WARN_ON(!sc->dmarptr);
717 			WARN_ON(!sc->status_word);
718 			*sc->status_word = COMPLETION_WORD_INIT;
719 			sc->cmd.cmd2.rptr = sc->dmarptr;
720 		}
721 		len = (u32)ih2->dlengsz;
722 	}
723 
724 	sc->expiry_time = jiffies + msecs_to_jiffies(LIO_SC_MAX_TMO_MS);
725 
726 	return (octeon_send_command(oct, sc->iq_no, 1, &sc->cmd, sc,
727 				    len, REQTYPE_SOFT_COMMAND));
728 }
729 
730 int octeon_setup_sc_buffer_pool(struct octeon_device *oct)
731 {
732 	int i;
733 	u64 dma_addr;
734 	struct octeon_soft_command *sc;
735 
736 	INIT_LIST_HEAD(&oct->sc_buf_pool.head);
737 	spin_lock_init(&oct->sc_buf_pool.lock);
738 	atomic_set(&oct->sc_buf_pool.alloc_buf_count, 0);
739 
740 	for (i = 0; i < MAX_SOFT_COMMAND_BUFFERS; i++) {
741 		sc = (struct octeon_soft_command *)
742 			lio_dma_alloc(oct,
743 				      SOFT_COMMAND_BUFFER_SIZE,
744 					  (dma_addr_t *)&dma_addr);
745 		if (!sc) {
746 			octeon_free_sc_buffer_pool(oct);
747 			return 1;
748 		}
749 
750 		sc->dma_addr = dma_addr;
751 		sc->size = SOFT_COMMAND_BUFFER_SIZE;
752 
753 		list_add_tail(&sc->node, &oct->sc_buf_pool.head);
754 	}
755 
756 	return 0;
757 }
758 
759 int octeon_free_sc_done_list(struct octeon_device *oct)
760 {
761 	struct octeon_response_list *done_sc_list, *zombie_sc_list;
762 	struct octeon_soft_command *sc;
763 	struct list_head *tmp, *tmp2;
764 	spinlock_t *sc_lists_lock; /* lock for response_list */
765 
766 	done_sc_list = &oct->response_list[OCTEON_DONE_SC_LIST];
767 	zombie_sc_list = &oct->response_list[OCTEON_ZOMBIE_SC_LIST];
768 
769 	if (!atomic_read(&done_sc_list->pending_req_count))
770 		return 0;
771 
772 	sc_lists_lock = &oct->response_list[OCTEON_ORDERED_SC_LIST].lock;
773 
774 	spin_lock_bh(sc_lists_lock);
775 
776 	list_for_each_safe(tmp, tmp2, &done_sc_list->head) {
777 		sc = list_entry(tmp, struct octeon_soft_command, node);
778 
779 		if (READ_ONCE(sc->caller_is_done)) {
780 			list_del(&sc->node);
781 			atomic_dec(&done_sc_list->pending_req_count);
782 
783 			if (*sc->status_word == COMPLETION_WORD_INIT) {
784 				/* timeout; move sc to zombie list */
785 				list_add_tail(&sc->node, &zombie_sc_list->head);
786 				atomic_inc(&zombie_sc_list->pending_req_count);
787 			} else {
788 				octeon_free_soft_command(oct, sc);
789 			}
790 		}
791 	}
792 
793 	spin_unlock_bh(sc_lists_lock);
794 
795 	return 0;
796 }
797 
798 int octeon_free_sc_zombie_list(struct octeon_device *oct)
799 {
800 	struct octeon_response_list *zombie_sc_list;
801 	struct octeon_soft_command *sc;
802 	struct list_head *tmp, *tmp2;
803 	spinlock_t *sc_lists_lock; /* lock for response_list */
804 
805 	zombie_sc_list = &oct->response_list[OCTEON_ZOMBIE_SC_LIST];
806 	sc_lists_lock = &oct->response_list[OCTEON_ORDERED_SC_LIST].lock;
807 
808 	spin_lock_bh(sc_lists_lock);
809 
810 	list_for_each_safe(tmp, tmp2, &zombie_sc_list->head) {
811 		list_del(tmp);
812 		atomic_dec(&zombie_sc_list->pending_req_count);
813 		sc = list_entry(tmp, struct octeon_soft_command, node);
814 		octeon_free_soft_command(oct, sc);
815 	}
816 
817 	spin_unlock_bh(sc_lists_lock);
818 
819 	return 0;
820 }
821 
822 int octeon_free_sc_buffer_pool(struct octeon_device *oct)
823 {
824 	struct list_head *tmp, *tmp2;
825 	struct octeon_soft_command *sc;
826 
827 	octeon_free_sc_zombie_list(oct);
828 
829 	spin_lock_bh(&oct->sc_buf_pool.lock);
830 
831 	list_for_each_safe(tmp, tmp2, &oct->sc_buf_pool.head) {
832 		list_del(tmp);
833 
834 		sc = (struct octeon_soft_command *)tmp;
835 
836 		lio_dma_free(oct, sc->size, sc, sc->dma_addr);
837 	}
838 
839 	INIT_LIST_HEAD(&oct->sc_buf_pool.head);
840 
841 	spin_unlock_bh(&oct->sc_buf_pool.lock);
842 
843 	return 0;
844 }
845 
846 struct octeon_soft_command *octeon_alloc_soft_command(struct octeon_device *oct,
847 						      u32 datasize,
848 						      u32 rdatasize,
849 						      u32 ctxsize)
850 {
851 	u64 dma_addr;
852 	u32 size;
853 	u32 offset = sizeof(struct octeon_soft_command);
854 	struct octeon_soft_command *sc = NULL;
855 	struct list_head *tmp;
856 
857 	if (!rdatasize)
858 		rdatasize = 16;
859 
860 	WARN_ON((offset + datasize + rdatasize + ctxsize) >
861 	       SOFT_COMMAND_BUFFER_SIZE);
862 
863 	spin_lock_bh(&oct->sc_buf_pool.lock);
864 
865 	if (list_empty(&oct->sc_buf_pool.head)) {
866 		spin_unlock_bh(&oct->sc_buf_pool.lock);
867 		return NULL;
868 	}
869 
870 	list_for_each(tmp, &oct->sc_buf_pool.head)
871 		break;
872 
873 	list_del(tmp);
874 
875 	atomic_inc(&oct->sc_buf_pool.alloc_buf_count);
876 
877 	spin_unlock_bh(&oct->sc_buf_pool.lock);
878 
879 	sc = (struct octeon_soft_command *)tmp;
880 
881 	dma_addr = sc->dma_addr;
882 	size = sc->size;
883 
884 	memset(sc, 0, sc->size);
885 
886 	sc->dma_addr = dma_addr;
887 	sc->size = size;
888 
889 	if (ctxsize) {
890 		sc->ctxptr = (u8 *)sc + offset;
891 		sc->ctxsize = ctxsize;
892 	}
893 
894 	/* Start data at 128 byte boundary */
895 	offset = (offset + ctxsize + 127) & 0xffffff80;
896 
897 	if (datasize) {
898 		sc->virtdptr = (u8 *)sc + offset;
899 		sc->dmadptr = dma_addr + offset;
900 		sc->datasize = datasize;
901 	}
902 
903 	/* Start rdata at 128 byte boundary */
904 	offset = (offset + datasize + 127) & 0xffffff80;
905 
906 	if (rdatasize) {
907 		WARN_ON(rdatasize < 16);
908 		sc->virtrptr = (u8 *)sc + offset;
909 		sc->dmarptr = dma_addr + offset;
910 		sc->rdatasize = rdatasize;
911 		sc->status_word = (u64 *)((u8 *)(sc->virtrptr) + rdatasize - 8);
912 	}
913 
914 	return sc;
915 }
916 
917 void octeon_free_soft_command(struct octeon_device *oct,
918 			      struct octeon_soft_command *sc)
919 {
920 	spin_lock_bh(&oct->sc_buf_pool.lock);
921 
922 	list_add_tail(&sc->node, &oct->sc_buf_pool.head);
923 
924 	atomic_dec(&oct->sc_buf_pool.alloc_buf_count);
925 
926 	spin_unlock_bh(&oct->sc_buf_pool.lock);
927 }
928