1 /**********************************************************************
2  * Author: Cavium, Inc.
3  *
4  * Contact: support@cavium.com
5  *          Please include "LiquidIO" in the subject.
6  *
7  * Copyright (c) 2003-2016 Cavium, Inc.
8  *
9  * This file is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU General Public License, Version 2, as
11  * published by the Free Software Foundation.
12  *
13  * This file is distributed in the hope that it will be useful, but
14  * AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
15  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
16  * NONINFRINGEMENT.  See the GNU General Public License for more
17  * details.
18  **********************************************************************/
19 #include <linux/pci.h>
20 #include <linux/netdevice.h>
21 #include <linux/vmalloc.h>
22 #include "liquidio_common.h"
23 #include "octeon_droq.h"
24 #include "octeon_iq.h"
25 #include "response_manager.h"
26 #include "octeon_device.h"
27 #include "octeon_main.h"
28 #include "octeon_network.h"
29 #include "cn66xx_device.h"
30 #include "cn23xx_pf_device.h"
31 #include "cn23xx_vf_device.h"
32 
33 struct iq_post_status {
34 	int status;
35 	int index;
36 };
37 
38 static void check_db_timeout(struct work_struct *work);
39 static void  __check_db_timeout(struct octeon_device *oct, u64 iq_no);
40 
41 static void (*reqtype_free_fn[MAX_OCTEON_DEVICES][REQTYPE_LAST + 1]) (void *);
42 
43 static inline int IQ_INSTR_MODE_64B(struct octeon_device *oct, int iq_no)
44 {
45 	struct octeon_instr_queue *iq =
46 	    (struct octeon_instr_queue *)oct->instr_queue[iq_no];
47 	return iq->iqcmd_64B;
48 }
49 
50 #define IQ_INSTR_MODE_32B(oct, iq_no)  (!IQ_INSTR_MODE_64B(oct, iq_no))
51 
52 /* Define this to return the request status comaptible to old code */
53 /*#define OCTEON_USE_OLD_REQ_STATUS*/
54 
55 /* Return 0 on success, 1 on failure */
56 int octeon_init_instr_queue(struct octeon_device *oct,
57 			    union oct_txpciq txpciq,
58 			    u32 num_descs)
59 {
60 	struct octeon_instr_queue *iq;
61 	struct octeon_iq_config *conf = NULL;
62 	u32 iq_no = (u32)txpciq.s.q_no;
63 	u32 q_size;
64 	struct cavium_wq *db_wq;
65 	int numa_node = dev_to_node(&oct->pci_dev->dev);
66 
67 	if (OCTEON_CN6XXX(oct))
68 		conf = &(CFG_GET_IQ_CFG(CHIP_CONF(oct, cn6xxx)));
69 	else if (OCTEON_CN23XX_PF(oct))
70 		conf = &(CFG_GET_IQ_CFG(CHIP_CONF(oct, cn23xx_pf)));
71 	else if (OCTEON_CN23XX_VF(oct))
72 		conf = &(CFG_GET_IQ_CFG(CHIP_CONF(oct, cn23xx_vf)));
73 
74 	if (!conf) {
75 		dev_err(&oct->pci_dev->dev, "Unsupported Chip %x\n",
76 			oct->chip_id);
77 		return 1;
78 	}
79 
80 	q_size = (u32)conf->instr_type * num_descs;
81 
82 	iq = oct->instr_queue[iq_no];
83 
84 	iq->oct_dev = oct;
85 
86 	iq->base_addr = lio_dma_alloc(oct, q_size, &iq->base_addr_dma);
87 	if (!iq->base_addr) {
88 		dev_err(&oct->pci_dev->dev, "Cannot allocate memory for instr queue %d\n",
89 			iq_no);
90 		return 1;
91 	}
92 
93 	iq->max_count = num_descs;
94 
95 	/* Initialize a list to holds requests that have been posted to Octeon
96 	 * but has yet to be fetched by octeon
97 	 */
98 	iq->request_list = vzalloc_node(array_size(num_descs, sizeof(*iq->request_list)),
99 					numa_node);
100 	if (!iq->request_list)
101 		iq->request_list = vzalloc(array_size(num_descs, sizeof(*iq->request_list)));
102 	if (!iq->request_list) {
103 		lio_dma_free(oct, q_size, iq->base_addr, iq->base_addr_dma);
104 		dev_err(&oct->pci_dev->dev, "Alloc failed for IQ[%d] nr free list\n",
105 			iq_no);
106 		return 1;
107 	}
108 
109 	dev_dbg(&oct->pci_dev->dev, "IQ[%d]: base: %p basedma: %pad count: %d\n",
110 		iq_no, iq->base_addr, &iq->base_addr_dma, iq->max_count);
111 
112 	iq->txpciq.u64 = txpciq.u64;
113 	iq->fill_threshold = (u32)conf->db_min;
114 	iq->fill_cnt = 0;
115 	iq->host_write_index = 0;
116 	iq->octeon_read_index = 0;
117 	iq->flush_index = 0;
118 	iq->last_db_time = 0;
119 	iq->do_auto_flush = 1;
120 	iq->db_timeout = (u32)conf->db_timeout;
121 	atomic_set(&iq->instr_pending, 0);
122 	iq->pkts_processed = 0;
123 
124 	/* Initialize the spinlock for this instruction queue */
125 	spin_lock_init(&iq->lock);
126 	if (iq_no == 0) {
127 		iq->allow_soft_cmds = true;
128 		spin_lock_init(&iq->post_lock);
129 	} else {
130 		iq->allow_soft_cmds = false;
131 	}
132 
133 	spin_lock_init(&iq->iq_flush_running_lock);
134 
135 	oct->io_qmask.iq |= BIT_ULL(iq_no);
136 
137 	/* Set the 32B/64B mode for each input queue */
138 	oct->io_qmask.iq64B |= ((conf->instr_type == 64) << iq_no);
139 	iq->iqcmd_64B = (conf->instr_type == 64);
140 
141 	oct->fn_list.setup_iq_regs(oct, iq_no);
142 
143 	oct->check_db_wq[iq_no].wq = alloc_workqueue("check_iq_db",
144 						     WQ_MEM_RECLAIM,
145 						     0);
146 	if (!oct->check_db_wq[iq_no].wq) {
147 		vfree(iq->request_list);
148 		iq->request_list = NULL;
149 		lio_dma_free(oct, q_size, iq->base_addr, iq->base_addr_dma);
150 		dev_err(&oct->pci_dev->dev, "check db wq create failed for iq %d\n",
151 			iq_no);
152 		return 1;
153 	}
154 
155 	db_wq = &oct->check_db_wq[iq_no];
156 
157 	INIT_DELAYED_WORK(&db_wq->wk.work, check_db_timeout);
158 	db_wq->wk.ctxptr = oct;
159 	db_wq->wk.ctxul = iq_no;
160 	queue_delayed_work(db_wq->wq, &db_wq->wk.work, msecs_to_jiffies(1));
161 
162 	return 0;
163 }
164 
165 int octeon_delete_instr_queue(struct octeon_device *oct, u32 iq_no)
166 {
167 	u64 desc_size = 0, q_size;
168 	struct octeon_instr_queue *iq = oct->instr_queue[iq_no];
169 
170 	cancel_delayed_work_sync(&oct->check_db_wq[iq_no].wk.work);
171 	destroy_workqueue(oct->check_db_wq[iq_no].wq);
172 
173 	if (OCTEON_CN6XXX(oct))
174 		desc_size =
175 		    CFG_GET_IQ_INSTR_TYPE(CHIP_CONF(oct, cn6xxx));
176 	else if (OCTEON_CN23XX_PF(oct))
177 		desc_size =
178 		    CFG_GET_IQ_INSTR_TYPE(CHIP_CONF(oct, cn23xx_pf));
179 	else if (OCTEON_CN23XX_VF(oct))
180 		desc_size =
181 		    CFG_GET_IQ_INSTR_TYPE(CHIP_CONF(oct, cn23xx_vf));
182 
183 	vfree(iq->request_list);
184 
185 	if (iq->base_addr) {
186 		q_size = iq->max_count * desc_size;
187 		lio_dma_free(oct, (u32)q_size, iq->base_addr,
188 			     iq->base_addr_dma);
189 		oct->io_qmask.iq &= ~(1ULL << iq_no);
190 		vfree(oct->instr_queue[iq_no]);
191 		oct->instr_queue[iq_no] = NULL;
192 		oct->num_iqs--;
193 		return 0;
194 	}
195 	return 1;
196 }
197 
198 /* Return 0 on success, 1 on failure */
199 int octeon_setup_iq(struct octeon_device *oct,
200 		    int ifidx,
201 		    int q_index,
202 		    union oct_txpciq txpciq,
203 		    u32 num_descs,
204 		    void *app_ctx)
205 {
206 	u32 iq_no = (u32)txpciq.s.q_no;
207 	int numa_node = dev_to_node(&oct->pci_dev->dev);
208 
209 	if (oct->instr_queue[iq_no]) {
210 		dev_dbg(&oct->pci_dev->dev, "IQ is in use. Cannot create the IQ: %d again\n",
211 			iq_no);
212 		oct->instr_queue[iq_no]->txpciq.u64 = txpciq.u64;
213 		oct->instr_queue[iq_no]->app_ctx = app_ctx;
214 		return 0;
215 	}
216 	oct->instr_queue[iq_no] =
217 	    vzalloc_node(sizeof(struct octeon_instr_queue), numa_node);
218 	if (!oct->instr_queue[iq_no])
219 		oct->instr_queue[iq_no] =
220 		    vzalloc(sizeof(struct octeon_instr_queue));
221 	if (!oct->instr_queue[iq_no])
222 		return 1;
223 
224 
225 	oct->instr_queue[iq_no]->q_index = q_index;
226 	oct->instr_queue[iq_no]->app_ctx = app_ctx;
227 	oct->instr_queue[iq_no]->ifidx = ifidx;
228 
229 	if (octeon_init_instr_queue(oct, txpciq, num_descs)) {
230 		vfree(oct->instr_queue[iq_no]);
231 		oct->instr_queue[iq_no] = NULL;
232 		return 1;
233 	}
234 
235 	oct->num_iqs++;
236 	if (oct->fn_list.enable_io_queues(oct)) {
237 		octeon_delete_instr_queue(oct, iq_no);
238 		return 1;
239 	}
240 
241 	return 0;
242 }
243 
244 int lio_wait_for_instr_fetch(struct octeon_device *oct)
245 {
246 	int i, retry = 1000, pending, instr_cnt = 0;
247 
248 	do {
249 		instr_cnt = 0;
250 
251 		for (i = 0; i < MAX_OCTEON_INSTR_QUEUES(oct); i++) {
252 			if (!(oct->io_qmask.iq & BIT_ULL(i)))
253 				continue;
254 			pending =
255 			    atomic_read(&oct->instr_queue[i]->instr_pending);
256 			if (pending)
257 				__check_db_timeout(oct, i);
258 			instr_cnt += pending;
259 		}
260 
261 		if (instr_cnt == 0)
262 			break;
263 
264 		schedule_timeout_uninterruptible(1);
265 
266 	} while (retry-- && instr_cnt);
267 
268 	return instr_cnt;
269 }
270 
271 static inline void
272 ring_doorbell(struct octeon_device *oct, struct octeon_instr_queue *iq)
273 {
274 	if (atomic_read(&oct->status) == OCT_DEV_RUNNING) {
275 		writel(iq->fill_cnt, iq->doorbell_reg);
276 		/* make sure doorbell write goes through */
277 		iq->fill_cnt = 0;
278 		iq->last_db_time = jiffies;
279 		return;
280 	}
281 }
282 
283 void
284 octeon_ring_doorbell_locked(struct octeon_device *oct, u32 iq_no)
285 {
286 	struct octeon_instr_queue *iq;
287 
288 	iq = oct->instr_queue[iq_no];
289 	spin_lock(&iq->post_lock);
290 	if (iq->fill_cnt)
291 		ring_doorbell(oct, iq);
292 	spin_unlock(&iq->post_lock);
293 }
294 
295 static inline void __copy_cmd_into_iq(struct octeon_instr_queue *iq,
296 				      u8 *cmd)
297 {
298 	u8 *iqptr, cmdsize;
299 
300 	cmdsize = ((iq->iqcmd_64B) ? 64 : 32);
301 	iqptr = iq->base_addr + (cmdsize * iq->host_write_index);
302 
303 	memcpy(iqptr, cmd, cmdsize);
304 }
305 
306 static inline struct iq_post_status
307 __post_command2(struct octeon_instr_queue *iq, u8 *cmd)
308 {
309 	struct iq_post_status st;
310 
311 	st.status = IQ_SEND_OK;
312 
313 	/* This ensures that the read index does not wrap around to the same
314 	 * position if queue gets full before Octeon could fetch any instr.
315 	 */
316 	if (atomic_read(&iq->instr_pending) >= (s32)(iq->max_count - 1)) {
317 		st.status = IQ_SEND_FAILED;
318 		st.index = -1;
319 		return st;
320 	}
321 
322 	if (atomic_read(&iq->instr_pending) >= (s32)(iq->max_count - 2))
323 		st.status = IQ_SEND_STOP;
324 
325 	__copy_cmd_into_iq(iq, cmd);
326 
327 	/* "index" is returned, host_write_index is modified. */
328 	st.index = iq->host_write_index;
329 	iq->host_write_index = incr_index(iq->host_write_index, 1,
330 					  iq->max_count);
331 	iq->fill_cnt++;
332 
333 	/* Flush the command into memory. We need to be sure the data is in
334 	 * memory before indicating that the instruction is pending.
335 	 */
336 	wmb();
337 
338 	atomic_inc(&iq->instr_pending);
339 
340 	return st;
341 }
342 
343 int
344 octeon_register_reqtype_free_fn(struct octeon_device *oct, int reqtype,
345 				void (*fn)(void *))
346 {
347 	if (reqtype > REQTYPE_LAST) {
348 		dev_err(&oct->pci_dev->dev, "%s: Invalid reqtype: %d\n",
349 			__func__, reqtype);
350 		return -EINVAL;
351 	}
352 
353 	reqtype_free_fn[oct->octeon_id][reqtype] = fn;
354 
355 	return 0;
356 }
357 
358 static inline void
359 __add_to_request_list(struct octeon_instr_queue *iq,
360 		      int idx, void *buf, int reqtype)
361 {
362 	iq->request_list[idx].buf = buf;
363 	iq->request_list[idx].reqtype = reqtype;
364 }
365 
366 /* Can only run in process context */
367 int
368 lio_process_iq_request_list(struct octeon_device *oct,
369 			    struct octeon_instr_queue *iq, u32 napi_budget)
370 {
371 	struct cavium_wq *cwq = &oct->dma_comp_wq;
372 	int reqtype;
373 	void *buf;
374 	u32 old = iq->flush_index;
375 	u32 inst_count = 0;
376 	unsigned int pkts_compl = 0, bytes_compl = 0;
377 	struct octeon_soft_command *sc;
378 	unsigned long flags;
379 
380 	while (old != iq->octeon_read_index) {
381 		reqtype = iq->request_list[old].reqtype;
382 		buf     = iq->request_list[old].buf;
383 
384 		if (reqtype == REQTYPE_NONE)
385 			goto skip_this;
386 
387 		octeon_update_tx_completion_counters(buf, reqtype, &pkts_compl,
388 						     &bytes_compl);
389 
390 		switch (reqtype) {
391 		case REQTYPE_NORESP_NET:
392 		case REQTYPE_NORESP_NET_SG:
393 		case REQTYPE_RESP_NET_SG:
394 			reqtype_free_fn[oct->octeon_id][reqtype](buf);
395 			break;
396 		case REQTYPE_RESP_NET:
397 		case REQTYPE_SOFT_COMMAND:
398 			sc = buf;
399 			/* We're expecting a response from Octeon.
400 			 * It's up to lio_process_ordered_list() to
401 			 * process  sc. Add sc to the ordered soft
402 			 * command response list because we expect
403 			 * a response from Octeon.
404 			 */
405 			spin_lock_irqsave(&oct->response_list
406 					  [OCTEON_ORDERED_SC_LIST].lock, flags);
407 			atomic_inc(&oct->response_list
408 				   [OCTEON_ORDERED_SC_LIST].pending_req_count);
409 			list_add_tail(&sc->node, &oct->response_list
410 				[OCTEON_ORDERED_SC_LIST].head);
411 			spin_unlock_irqrestore(&oct->response_list
412 					       [OCTEON_ORDERED_SC_LIST].lock,
413 					       flags);
414 			break;
415 		default:
416 			dev_err(&oct->pci_dev->dev,
417 				"%s Unknown reqtype: %d buf: %p at idx %d\n",
418 				__func__, reqtype, buf, old);
419 		}
420 
421 		iq->request_list[old].buf = NULL;
422 		iq->request_list[old].reqtype = 0;
423 
424  skip_this:
425 		inst_count++;
426 		old = incr_index(old, 1, iq->max_count);
427 
428 		if ((napi_budget) && (inst_count >= napi_budget))
429 			break;
430 	}
431 	if (bytes_compl)
432 		octeon_report_tx_completion_to_bql(iq->app_ctx, pkts_compl,
433 						   bytes_compl);
434 	iq->flush_index = old;
435 
436 	if (atomic_read(&oct->response_list
437 			[OCTEON_ORDERED_SC_LIST].pending_req_count))
438 		queue_work(cwq->wq, &cwq->wk.work.work);
439 
440 	return inst_count;
441 }
442 
443 /* Can only be called from process context */
444 int
445 octeon_flush_iq(struct octeon_device *oct, struct octeon_instr_queue *iq,
446 		u32 napi_budget)
447 {
448 	u32 inst_processed = 0;
449 	u32 tot_inst_processed = 0;
450 	int tx_done = 1;
451 
452 	if (!spin_trylock(&iq->iq_flush_running_lock))
453 		return tx_done;
454 
455 	spin_lock_bh(&iq->lock);
456 
457 	iq->octeon_read_index = oct->fn_list.update_iq_read_idx(iq);
458 
459 	do {
460 		/* Process any outstanding IQ packets. */
461 		if (iq->flush_index == iq->octeon_read_index)
462 			break;
463 
464 		if (napi_budget)
465 			inst_processed =
466 				lio_process_iq_request_list(oct, iq,
467 							    napi_budget -
468 							    tot_inst_processed);
469 		else
470 			inst_processed =
471 				lio_process_iq_request_list(oct, iq, 0);
472 
473 		if (inst_processed) {
474 			iq->pkts_processed += inst_processed;
475 			atomic_sub(inst_processed, &iq->instr_pending);
476 			iq->stats.instr_processed += inst_processed;
477 		}
478 
479 		tot_inst_processed += inst_processed;
480 	} while (tot_inst_processed < napi_budget);
481 
482 	if (napi_budget && (tot_inst_processed >= napi_budget))
483 		tx_done = 0;
484 
485 	iq->last_db_time = jiffies;
486 
487 	spin_unlock_bh(&iq->lock);
488 
489 	spin_unlock(&iq->iq_flush_running_lock);
490 
491 	return tx_done;
492 }
493 
494 /* Process instruction queue after timeout.
495  * This routine gets called from a workqueue or when removing the module.
496  */
497 static void __check_db_timeout(struct octeon_device *oct, u64 iq_no)
498 {
499 	struct octeon_instr_queue *iq;
500 	u64 next_time;
501 
502 	if (!oct)
503 		return;
504 
505 	iq = oct->instr_queue[iq_no];
506 	if (!iq)
507 		return;
508 
509 	/* return immediately, if no work pending */
510 	if (!atomic_read(&iq->instr_pending))
511 		return;
512 	/* If jiffies - last_db_time < db_timeout do nothing  */
513 	next_time = iq->last_db_time + iq->db_timeout;
514 	if (!time_after(jiffies, (unsigned long)next_time))
515 		return;
516 	iq->last_db_time = jiffies;
517 
518 	/* Flush the instruction queue */
519 	octeon_flush_iq(oct, iq, 0);
520 
521 	lio_enable_irq(NULL, iq);
522 }
523 
524 /* Called by the Poll thread at regular intervals to check the instruction
525  * queue for commands to be posted and for commands that were fetched by Octeon.
526  */
527 static void check_db_timeout(struct work_struct *work)
528 {
529 	struct cavium_wk *wk = (struct cavium_wk *)work;
530 	struct octeon_device *oct = (struct octeon_device *)wk->ctxptr;
531 	u64 iq_no = wk->ctxul;
532 	struct cavium_wq *db_wq = &oct->check_db_wq[iq_no];
533 	u32 delay = 10;
534 
535 	__check_db_timeout(oct, iq_no);
536 	queue_delayed_work(db_wq->wq, &db_wq->wk.work, msecs_to_jiffies(delay));
537 }
538 
539 int
540 octeon_send_command(struct octeon_device *oct, u32 iq_no,
541 		    u32 force_db, void *cmd, void *buf,
542 		    u32 datasize, u32 reqtype)
543 {
544 	int xmit_stopped;
545 	struct iq_post_status st;
546 	struct octeon_instr_queue *iq = oct->instr_queue[iq_no];
547 
548 	/* Get the lock and prevent other tasks and tx interrupt handler from
549 	 * running.
550 	 */
551 	if (iq->allow_soft_cmds)
552 		spin_lock_bh(&iq->post_lock);
553 
554 	st = __post_command2(iq, cmd);
555 
556 	if (st.status != IQ_SEND_FAILED) {
557 		xmit_stopped = octeon_report_sent_bytes_to_bql(buf, reqtype);
558 		__add_to_request_list(iq, st.index, buf, reqtype);
559 		INCR_INSTRQUEUE_PKT_COUNT(oct, iq_no, bytes_sent, datasize);
560 		INCR_INSTRQUEUE_PKT_COUNT(oct, iq_no, instr_posted, 1);
561 
562 		if (iq->fill_cnt >= MAX_OCTEON_FILL_COUNT || force_db ||
563 		    xmit_stopped || st.status == IQ_SEND_STOP)
564 			ring_doorbell(oct, iq);
565 	} else {
566 		INCR_INSTRQUEUE_PKT_COUNT(oct, iq_no, instr_dropped, 1);
567 	}
568 
569 	if (iq->allow_soft_cmds)
570 		spin_unlock_bh(&iq->post_lock);
571 
572 	/* This is only done here to expedite packets being flushed
573 	 * for cases where there are no IQ completion interrupts.
574 	 */
575 
576 	return st.status;
577 }
578 
579 void
580 octeon_prepare_soft_command(struct octeon_device *oct,
581 			    struct octeon_soft_command *sc,
582 			    u8 opcode,
583 			    u8 subcode,
584 			    u32 irh_ossp,
585 			    u64 ossp0,
586 			    u64 ossp1)
587 {
588 	struct octeon_config *oct_cfg;
589 	struct octeon_instr_ih2 *ih2;
590 	struct octeon_instr_ih3 *ih3;
591 	struct octeon_instr_pki_ih3 *pki_ih3;
592 	struct octeon_instr_irh *irh;
593 	struct octeon_instr_rdp *rdp;
594 
595 	WARN_ON(opcode > 15);
596 	WARN_ON(subcode > 127);
597 
598 	oct_cfg = octeon_get_conf(oct);
599 
600 	if (OCTEON_CN23XX_PF(oct) || OCTEON_CN23XX_VF(oct)) {
601 		ih3 = (struct octeon_instr_ih3 *)&sc->cmd.cmd3.ih3;
602 
603 		ih3->pkind = oct->instr_queue[sc->iq_no]->txpciq.s.pkind;
604 
605 		pki_ih3 = (struct octeon_instr_pki_ih3 *)&sc->cmd.cmd3.pki_ih3;
606 
607 		pki_ih3->w           = 1;
608 		pki_ih3->raw         = 1;
609 		pki_ih3->utag        = 1;
610 		pki_ih3->uqpg        =
611 			oct->instr_queue[sc->iq_no]->txpciq.s.use_qpg;
612 		pki_ih3->utt         = 1;
613 		pki_ih3->tag     = LIO_CONTROL;
614 		pki_ih3->tagtype = ATOMIC_TAG;
615 		pki_ih3->qpg         =
616 			oct->instr_queue[sc->iq_no]->txpciq.s.ctrl_qpg;
617 
618 		pki_ih3->pm          = 0x7;
619 		pki_ih3->sl          = 8;
620 
621 		if (sc->datasize)
622 			ih3->dlengsz = sc->datasize;
623 
624 		irh            = (struct octeon_instr_irh *)&sc->cmd.cmd3.irh;
625 		irh->opcode    = opcode;
626 		irh->subcode   = subcode;
627 
628 		/* opcode/subcode specific parameters (ossp) */
629 		irh->ossp       = irh_ossp;
630 		sc->cmd.cmd3.ossp[0] = ossp0;
631 		sc->cmd.cmd3.ossp[1] = ossp1;
632 
633 		if (sc->rdatasize) {
634 			rdp = (struct octeon_instr_rdp *)&sc->cmd.cmd3.rdp;
635 			rdp->pcie_port = oct->pcie_port;
636 			rdp->rlen      = sc->rdatasize;
637 
638 			irh->rflag =  1;
639 			/*PKI IH3*/
640 			/* pki_ih3 irh+ossp[0]+ossp[1]+rdp+rptr = 48 bytes */
641 			ih3->fsz    = LIO_SOFTCMDRESP_IH3;
642 		} else {
643 			irh->rflag =  0;
644 			/*PKI IH3*/
645 			/* pki_h3 + irh + ossp[0] + ossp[1] = 32 bytes */
646 			ih3->fsz    = LIO_PCICMD_O3;
647 		}
648 
649 	} else {
650 		ih2          = (struct octeon_instr_ih2 *)&sc->cmd.cmd2.ih2;
651 		ih2->tagtype = ATOMIC_TAG;
652 		ih2->tag     = LIO_CONTROL;
653 		ih2->raw     = 1;
654 		ih2->grp     = CFG_GET_CTRL_Q_GRP(oct_cfg);
655 
656 		if (sc->datasize) {
657 			ih2->dlengsz = sc->datasize;
658 			ih2->rs = 1;
659 		}
660 
661 		irh            = (struct octeon_instr_irh *)&sc->cmd.cmd2.irh;
662 		irh->opcode    = opcode;
663 		irh->subcode   = subcode;
664 
665 		/* opcode/subcode specific parameters (ossp) */
666 		irh->ossp       = irh_ossp;
667 		sc->cmd.cmd2.ossp[0] = ossp0;
668 		sc->cmd.cmd2.ossp[1] = ossp1;
669 
670 		if (sc->rdatasize) {
671 			rdp = (struct octeon_instr_rdp *)&sc->cmd.cmd2.rdp;
672 			rdp->pcie_port = oct->pcie_port;
673 			rdp->rlen      = sc->rdatasize;
674 
675 			irh->rflag =  1;
676 			/* irh+ossp[0]+ossp[1]+rdp+rptr = 40 bytes */
677 			ih2->fsz   = LIO_SOFTCMDRESP_IH2;
678 		} else {
679 			irh->rflag =  0;
680 			/* irh + ossp[0] + ossp[1] = 24 bytes */
681 			ih2->fsz   = LIO_PCICMD_O2;
682 		}
683 	}
684 }
685 
686 int octeon_send_soft_command(struct octeon_device *oct,
687 			     struct octeon_soft_command *sc)
688 {
689 	struct octeon_instr_queue *iq;
690 	struct octeon_instr_ih2 *ih2;
691 	struct octeon_instr_ih3 *ih3;
692 	struct octeon_instr_irh *irh;
693 	u32 len;
694 
695 	iq = oct->instr_queue[sc->iq_no];
696 	if (!iq->allow_soft_cmds) {
697 		dev_err(&oct->pci_dev->dev, "Soft commands are not allowed on Queue %d\n",
698 			sc->iq_no);
699 		INCR_INSTRQUEUE_PKT_COUNT(oct, sc->iq_no, instr_dropped, 1);
700 		return IQ_SEND_FAILED;
701 	}
702 
703 	if (OCTEON_CN23XX_PF(oct) || OCTEON_CN23XX_VF(oct)) {
704 		ih3 =  (struct octeon_instr_ih3 *)&sc->cmd.cmd3.ih3;
705 		if (ih3->dlengsz) {
706 			WARN_ON(!sc->dmadptr);
707 			sc->cmd.cmd3.dptr = sc->dmadptr;
708 		}
709 		irh = (struct octeon_instr_irh *)&sc->cmd.cmd3.irh;
710 		if (irh->rflag) {
711 			WARN_ON(!sc->dmarptr);
712 			WARN_ON(!sc->status_word);
713 			*sc->status_word = COMPLETION_WORD_INIT;
714 			sc->cmd.cmd3.rptr = sc->dmarptr;
715 		}
716 		len = (u32)ih3->dlengsz;
717 	} else {
718 		ih2 = (struct octeon_instr_ih2 *)&sc->cmd.cmd2.ih2;
719 		if (ih2->dlengsz) {
720 			WARN_ON(!sc->dmadptr);
721 			sc->cmd.cmd2.dptr = sc->dmadptr;
722 		}
723 		irh = (struct octeon_instr_irh *)&sc->cmd.cmd2.irh;
724 		if (irh->rflag) {
725 			WARN_ON(!sc->dmarptr);
726 			WARN_ON(!sc->status_word);
727 			*sc->status_word = COMPLETION_WORD_INIT;
728 			sc->cmd.cmd2.rptr = sc->dmarptr;
729 		}
730 		len = (u32)ih2->dlengsz;
731 	}
732 
733 	sc->expiry_time = jiffies + msecs_to_jiffies(LIO_SC_MAX_TMO_MS);
734 
735 	return (octeon_send_command(oct, sc->iq_no, 1, &sc->cmd, sc,
736 				    len, REQTYPE_SOFT_COMMAND));
737 }
738 
739 int octeon_setup_sc_buffer_pool(struct octeon_device *oct)
740 {
741 	int i;
742 	u64 dma_addr;
743 	struct octeon_soft_command *sc;
744 
745 	INIT_LIST_HEAD(&oct->sc_buf_pool.head);
746 	spin_lock_init(&oct->sc_buf_pool.lock);
747 	atomic_set(&oct->sc_buf_pool.alloc_buf_count, 0);
748 
749 	for (i = 0; i < MAX_SOFT_COMMAND_BUFFERS; i++) {
750 		sc = (struct octeon_soft_command *)
751 			lio_dma_alloc(oct,
752 				      SOFT_COMMAND_BUFFER_SIZE,
753 					  (dma_addr_t *)&dma_addr);
754 		if (!sc) {
755 			octeon_free_sc_buffer_pool(oct);
756 			return 1;
757 		}
758 
759 		sc->dma_addr = dma_addr;
760 		sc->size = SOFT_COMMAND_BUFFER_SIZE;
761 
762 		list_add_tail(&sc->node, &oct->sc_buf_pool.head);
763 	}
764 
765 	return 0;
766 }
767 
768 int octeon_free_sc_done_list(struct octeon_device *oct)
769 {
770 	struct octeon_response_list *done_sc_list, *zombie_sc_list;
771 	struct octeon_soft_command *sc;
772 	struct list_head *tmp, *tmp2;
773 	spinlock_t *sc_lists_lock; /* lock for response_list */
774 
775 	done_sc_list = &oct->response_list[OCTEON_DONE_SC_LIST];
776 	zombie_sc_list = &oct->response_list[OCTEON_ZOMBIE_SC_LIST];
777 
778 	if (!atomic_read(&done_sc_list->pending_req_count))
779 		return 0;
780 
781 	sc_lists_lock = &oct->response_list[OCTEON_ORDERED_SC_LIST].lock;
782 
783 	spin_lock_bh(sc_lists_lock);
784 
785 	list_for_each_safe(tmp, tmp2, &done_sc_list->head) {
786 		sc = list_entry(tmp, struct octeon_soft_command, node);
787 
788 		if (READ_ONCE(sc->caller_is_done)) {
789 			list_del(&sc->node);
790 			atomic_dec(&done_sc_list->pending_req_count);
791 
792 			if (*sc->status_word == COMPLETION_WORD_INIT) {
793 				/* timeout; move sc to zombie list */
794 				list_add_tail(&sc->node, &zombie_sc_list->head);
795 				atomic_inc(&zombie_sc_list->pending_req_count);
796 			} else {
797 				octeon_free_soft_command(oct, sc);
798 			}
799 		}
800 	}
801 
802 	spin_unlock_bh(sc_lists_lock);
803 
804 	return 0;
805 }
806 
807 int octeon_free_sc_zombie_list(struct octeon_device *oct)
808 {
809 	struct octeon_response_list *zombie_sc_list;
810 	struct octeon_soft_command *sc;
811 	struct list_head *tmp, *tmp2;
812 	spinlock_t *sc_lists_lock; /* lock for response_list */
813 
814 	zombie_sc_list = &oct->response_list[OCTEON_ZOMBIE_SC_LIST];
815 	sc_lists_lock = &oct->response_list[OCTEON_ORDERED_SC_LIST].lock;
816 
817 	spin_lock_bh(sc_lists_lock);
818 
819 	list_for_each_safe(tmp, tmp2, &zombie_sc_list->head) {
820 		list_del(tmp);
821 		atomic_dec(&zombie_sc_list->pending_req_count);
822 		sc = list_entry(tmp, struct octeon_soft_command, node);
823 		octeon_free_soft_command(oct, sc);
824 	}
825 
826 	spin_unlock_bh(sc_lists_lock);
827 
828 	return 0;
829 }
830 
831 int octeon_free_sc_buffer_pool(struct octeon_device *oct)
832 {
833 	struct list_head *tmp, *tmp2;
834 	struct octeon_soft_command *sc;
835 
836 	octeon_free_sc_zombie_list(oct);
837 
838 	spin_lock_bh(&oct->sc_buf_pool.lock);
839 
840 	list_for_each_safe(tmp, tmp2, &oct->sc_buf_pool.head) {
841 		list_del(tmp);
842 
843 		sc = (struct octeon_soft_command *)tmp;
844 
845 		lio_dma_free(oct, sc->size, sc, sc->dma_addr);
846 	}
847 
848 	INIT_LIST_HEAD(&oct->sc_buf_pool.head);
849 
850 	spin_unlock_bh(&oct->sc_buf_pool.lock);
851 
852 	return 0;
853 }
854 
855 struct octeon_soft_command *octeon_alloc_soft_command(struct octeon_device *oct,
856 						      u32 datasize,
857 						      u32 rdatasize,
858 						      u32 ctxsize)
859 {
860 	u64 dma_addr;
861 	u32 size;
862 	u32 offset = sizeof(struct octeon_soft_command);
863 	struct octeon_soft_command *sc = NULL;
864 	struct list_head *tmp;
865 
866 	if (!rdatasize)
867 		rdatasize = 16;
868 
869 	WARN_ON((offset + datasize + rdatasize + ctxsize) >
870 	       SOFT_COMMAND_BUFFER_SIZE);
871 
872 	spin_lock_bh(&oct->sc_buf_pool.lock);
873 
874 	if (list_empty(&oct->sc_buf_pool.head)) {
875 		spin_unlock_bh(&oct->sc_buf_pool.lock);
876 		return NULL;
877 	}
878 
879 	list_for_each(tmp, &oct->sc_buf_pool.head)
880 		break;
881 
882 	list_del(tmp);
883 
884 	atomic_inc(&oct->sc_buf_pool.alloc_buf_count);
885 
886 	spin_unlock_bh(&oct->sc_buf_pool.lock);
887 
888 	sc = (struct octeon_soft_command *)tmp;
889 
890 	dma_addr = sc->dma_addr;
891 	size = sc->size;
892 
893 	memset(sc, 0, sc->size);
894 
895 	sc->dma_addr = dma_addr;
896 	sc->size = size;
897 
898 	if (ctxsize) {
899 		sc->ctxptr = (u8 *)sc + offset;
900 		sc->ctxsize = ctxsize;
901 	}
902 
903 	/* Start data at 128 byte boundary */
904 	offset = (offset + ctxsize + 127) & 0xffffff80;
905 
906 	if (datasize) {
907 		sc->virtdptr = (u8 *)sc + offset;
908 		sc->dmadptr = dma_addr + offset;
909 		sc->datasize = datasize;
910 	}
911 
912 	/* Start rdata at 128 byte boundary */
913 	offset = (offset + datasize + 127) & 0xffffff80;
914 
915 	if (rdatasize) {
916 		WARN_ON(rdatasize < 16);
917 		sc->virtrptr = (u8 *)sc + offset;
918 		sc->dmarptr = dma_addr + offset;
919 		sc->rdatasize = rdatasize;
920 		sc->status_word = (u64 *)((u8 *)(sc->virtrptr) + rdatasize - 8);
921 	}
922 
923 	return sc;
924 }
925 
926 void octeon_free_soft_command(struct octeon_device *oct,
927 			      struct octeon_soft_command *sc)
928 {
929 	spin_lock_bh(&oct->sc_buf_pool.lock);
930 
931 	list_add_tail(&sc->node, &oct->sc_buf_pool.head);
932 
933 	atomic_dec(&oct->sc_buf_pool.alloc_buf_count);
934 
935 	spin_unlock_bh(&oct->sc_buf_pool.lock);
936 }
937