1 /**********************************************************************
2  * Author: Cavium, Inc.
3  *
4  * Contact: support@cavium.com
5  *          Please include "LiquidIO" in the subject.
6  *
7  * Copyright (c) 2003-2016 Cavium, Inc.
8  *
9  * This file is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU General Public License, Version 2, as
11  * published by the Free Software Foundation.
12  *
13  * This file is distributed in the hope that it will be useful, but
14  * AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
15  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
16  * NONINFRINGEMENT.  See the GNU General Public License for more
17  * details.
18  **********************************************************************/
19 #include <linux/pci.h>
20 #include <linux/netdevice.h>
21 #include <linux/vmalloc.h>
22 #include "liquidio_common.h"
23 #include "octeon_droq.h"
24 #include "octeon_iq.h"
25 #include "response_manager.h"
26 #include "octeon_device.h"
27 #include "octeon_main.h"
28 #include "octeon_network.h"
29 #include "cn66xx_device.h"
30 #include "cn23xx_pf_device.h"
31 #include "cn23xx_vf_device.h"
32 
33 struct iq_post_status {
34 	int status;
35 	int index;
36 };
37 
38 static void check_db_timeout(struct work_struct *work);
39 static void  __check_db_timeout(struct octeon_device *oct, u64 iq_no);
40 
41 static void (*reqtype_free_fn[MAX_OCTEON_DEVICES][REQTYPE_LAST + 1]) (void *);
42 
43 static inline int IQ_INSTR_MODE_64B(struct octeon_device *oct, int iq_no)
44 {
45 	struct octeon_instr_queue *iq =
46 	    (struct octeon_instr_queue *)oct->instr_queue[iq_no];
47 	return iq->iqcmd_64B;
48 }
49 
50 #define IQ_INSTR_MODE_32B(oct, iq_no)  (!IQ_INSTR_MODE_64B(oct, iq_no))
51 
52 /* Define this to return the request status comaptible to old code */
53 /*#define OCTEON_USE_OLD_REQ_STATUS*/
54 
55 /* Return 0 on success, 1 on failure */
56 int octeon_init_instr_queue(struct octeon_device *oct,
57 			    union oct_txpciq txpciq,
58 			    u32 num_descs)
59 {
60 	struct octeon_instr_queue *iq;
61 	struct octeon_iq_config *conf = NULL;
62 	u32 iq_no = (u32)txpciq.s.q_no;
63 	u32 q_size;
64 	struct cavium_wq *db_wq;
65 	int orig_node = dev_to_node(&oct->pci_dev->dev);
66 	int numa_node = cpu_to_node(iq_no % num_online_cpus());
67 
68 	if (OCTEON_CN6XXX(oct))
69 		conf = &(CFG_GET_IQ_CFG(CHIP_CONF(oct, cn6xxx)));
70 	else if (OCTEON_CN23XX_PF(oct))
71 		conf = &(CFG_GET_IQ_CFG(CHIP_CONF(oct, cn23xx_pf)));
72 	else if (OCTEON_CN23XX_VF(oct))
73 		conf = &(CFG_GET_IQ_CFG(CHIP_CONF(oct, cn23xx_vf)));
74 
75 	if (!conf) {
76 		dev_err(&oct->pci_dev->dev, "Unsupported Chip %x\n",
77 			oct->chip_id);
78 		return 1;
79 	}
80 
81 	if (num_descs & (num_descs - 1)) {
82 		dev_err(&oct->pci_dev->dev,
83 			"Number of descriptors for instr queue %d not in power of 2.\n",
84 			iq_no);
85 		return 1;
86 	}
87 
88 	q_size = (u32)conf->instr_type * num_descs;
89 
90 	iq = oct->instr_queue[iq_no];
91 
92 	iq->oct_dev = oct;
93 
94 	set_dev_node(&oct->pci_dev->dev, numa_node);
95 	iq->base_addr = lio_dma_alloc(oct, q_size,
96 				      (dma_addr_t *)&iq->base_addr_dma);
97 	set_dev_node(&oct->pci_dev->dev, orig_node);
98 	if (!iq->base_addr)
99 		iq->base_addr = lio_dma_alloc(oct, q_size,
100 					      (dma_addr_t *)&iq->base_addr_dma);
101 	if (!iq->base_addr) {
102 		dev_err(&oct->pci_dev->dev, "Cannot allocate memory for instr queue %d\n",
103 			iq_no);
104 		return 1;
105 	}
106 
107 	iq->max_count = num_descs;
108 
109 	/* Initialize a list to holds requests that have been posted to Octeon
110 	 * but has yet to be fetched by octeon
111 	 */
112 	iq->request_list = vmalloc_node((sizeof(*iq->request_list) * num_descs),
113 					       numa_node);
114 	if (!iq->request_list)
115 		iq->request_list = vmalloc(sizeof(*iq->request_list) *
116 						  num_descs);
117 	if (!iq->request_list) {
118 		lio_dma_free(oct, q_size, iq->base_addr, iq->base_addr_dma);
119 		dev_err(&oct->pci_dev->dev, "Alloc failed for IQ[%d] nr free list\n",
120 			iq_no);
121 		return 1;
122 	}
123 
124 	memset(iq->request_list, 0, sizeof(*iq->request_list) * num_descs);
125 
126 	dev_dbg(&oct->pci_dev->dev, "IQ[%d]: base: %p basedma: %llx count: %d\n",
127 		iq_no, iq->base_addr, iq->base_addr_dma, iq->max_count);
128 
129 	iq->txpciq.u64 = txpciq.u64;
130 	iq->fill_threshold = (u32)conf->db_min;
131 	iq->fill_cnt = 0;
132 	iq->host_write_index = 0;
133 	iq->octeon_read_index = 0;
134 	iq->flush_index = 0;
135 	iq->last_db_time = 0;
136 	iq->do_auto_flush = 1;
137 	iq->db_timeout = (u32)conf->db_timeout;
138 	atomic_set(&iq->instr_pending, 0);
139 
140 	/* Initialize the spinlock for this instruction queue */
141 	spin_lock_init(&iq->lock);
142 	spin_lock_init(&iq->post_lock);
143 
144 	spin_lock_init(&iq->iq_flush_running_lock);
145 
146 	oct->io_qmask.iq |= BIT_ULL(iq_no);
147 
148 	/* Set the 32B/64B mode for each input queue */
149 	oct->io_qmask.iq64B |= ((conf->instr_type == 64) << iq_no);
150 	iq->iqcmd_64B = (conf->instr_type == 64);
151 
152 	oct->fn_list.setup_iq_regs(oct, iq_no);
153 
154 	oct->check_db_wq[iq_no].wq = alloc_workqueue("check_iq_db",
155 						     WQ_MEM_RECLAIM,
156 						     0);
157 	if (!oct->check_db_wq[iq_no].wq) {
158 		vfree(iq->request_list);
159 		iq->request_list = NULL;
160 		lio_dma_free(oct, q_size, iq->base_addr, iq->base_addr_dma);
161 		dev_err(&oct->pci_dev->dev, "check db wq create failed for iq %d\n",
162 			iq_no);
163 		return 1;
164 	}
165 
166 	db_wq = &oct->check_db_wq[iq_no];
167 
168 	INIT_DELAYED_WORK(&db_wq->wk.work, check_db_timeout);
169 	db_wq->wk.ctxptr = oct;
170 	db_wq->wk.ctxul = iq_no;
171 	queue_delayed_work(db_wq->wq, &db_wq->wk.work, msecs_to_jiffies(1));
172 
173 	return 0;
174 }
175 
176 int octeon_delete_instr_queue(struct octeon_device *oct, u32 iq_no)
177 {
178 	u64 desc_size = 0, q_size;
179 	struct octeon_instr_queue *iq = oct->instr_queue[iq_no];
180 
181 	cancel_delayed_work_sync(&oct->check_db_wq[iq_no].wk.work);
182 	destroy_workqueue(oct->check_db_wq[iq_no].wq);
183 
184 	if (OCTEON_CN6XXX(oct))
185 		desc_size =
186 		    CFG_GET_IQ_INSTR_TYPE(CHIP_CONF(oct, cn6xxx));
187 	else if (OCTEON_CN23XX_PF(oct))
188 		desc_size =
189 		    CFG_GET_IQ_INSTR_TYPE(CHIP_CONF(oct, cn23xx_pf));
190 	else if (OCTEON_CN23XX_VF(oct))
191 		desc_size =
192 		    CFG_GET_IQ_INSTR_TYPE(CHIP_CONF(oct, cn23xx_vf));
193 
194 	vfree(iq->request_list);
195 
196 	if (iq->base_addr) {
197 		q_size = iq->max_count * desc_size;
198 		lio_dma_free(oct, (u32)q_size, iq->base_addr,
199 			     iq->base_addr_dma);
200 		return 0;
201 	}
202 	return 1;
203 }
204 
205 /* Return 0 on success, 1 on failure */
206 int octeon_setup_iq(struct octeon_device *oct,
207 		    int ifidx,
208 		    int q_index,
209 		    union oct_txpciq txpciq,
210 		    u32 num_descs,
211 		    void *app_ctx)
212 {
213 	u32 iq_no = (u32)txpciq.s.q_no;
214 	int numa_node = cpu_to_node(iq_no % num_online_cpus());
215 
216 	if (oct->instr_queue[iq_no]) {
217 		dev_dbg(&oct->pci_dev->dev, "IQ is in use. Cannot create the IQ: %d again\n",
218 			iq_no);
219 		oct->instr_queue[iq_no]->txpciq.u64 = txpciq.u64;
220 		oct->instr_queue[iq_no]->app_ctx = app_ctx;
221 		return 0;
222 	}
223 	oct->instr_queue[iq_no] =
224 	    vmalloc_node(sizeof(struct octeon_instr_queue), numa_node);
225 	if (!oct->instr_queue[iq_no])
226 		oct->instr_queue[iq_no] =
227 		    vmalloc(sizeof(struct octeon_instr_queue));
228 	if (!oct->instr_queue[iq_no])
229 		return 1;
230 
231 	memset(oct->instr_queue[iq_no], 0,
232 	       sizeof(struct octeon_instr_queue));
233 
234 	oct->instr_queue[iq_no]->q_index = q_index;
235 	oct->instr_queue[iq_no]->app_ctx = app_ctx;
236 	oct->instr_queue[iq_no]->ifidx = ifidx;
237 
238 	if (octeon_init_instr_queue(oct, txpciq, num_descs)) {
239 		vfree(oct->instr_queue[iq_no]);
240 		oct->instr_queue[iq_no] = NULL;
241 		return 1;
242 	}
243 
244 	oct->num_iqs++;
245 	if (oct->fn_list.enable_io_queues(oct))
246 		return 1;
247 
248 	return 0;
249 }
250 
251 int lio_wait_for_instr_fetch(struct octeon_device *oct)
252 {
253 	int i, retry = 1000, pending, instr_cnt = 0;
254 
255 	do {
256 		instr_cnt = 0;
257 
258 		for (i = 0; i < MAX_OCTEON_INSTR_QUEUES(oct); i++) {
259 			if (!(oct->io_qmask.iq & BIT_ULL(i)))
260 				continue;
261 			pending =
262 			    atomic_read(&oct->
263 					       instr_queue[i]->instr_pending);
264 			if (pending)
265 				__check_db_timeout(oct, i);
266 			instr_cnt += pending;
267 		}
268 
269 		if (instr_cnt == 0)
270 			break;
271 
272 		schedule_timeout_uninterruptible(1);
273 
274 	} while (retry-- && instr_cnt);
275 
276 	return instr_cnt;
277 }
278 
279 static inline void
280 ring_doorbell(struct octeon_device *oct, struct octeon_instr_queue *iq)
281 {
282 	if (atomic_read(&oct->status) == OCT_DEV_RUNNING) {
283 		writel(iq->fill_cnt, iq->doorbell_reg);
284 		/* make sure doorbell write goes through */
285 		mmiowb();
286 		iq->fill_cnt = 0;
287 		iq->last_db_time = jiffies;
288 		return;
289 	}
290 }
291 
292 static inline void __copy_cmd_into_iq(struct octeon_instr_queue *iq,
293 				      u8 *cmd)
294 {
295 	u8 *iqptr, cmdsize;
296 
297 	cmdsize = ((iq->iqcmd_64B) ? 64 : 32);
298 	iqptr = iq->base_addr + (cmdsize * iq->host_write_index);
299 
300 	memcpy(iqptr, cmd, cmdsize);
301 }
302 
303 static inline struct iq_post_status
304 __post_command2(struct octeon_instr_queue *iq, u8 *cmd)
305 {
306 	struct iq_post_status st;
307 
308 	st.status = IQ_SEND_OK;
309 
310 	/* This ensures that the read index does not wrap around to the same
311 	 * position if queue gets full before Octeon could fetch any instr.
312 	 */
313 	if (atomic_read(&iq->instr_pending) >= (s32)(iq->max_count - 1)) {
314 		st.status = IQ_SEND_FAILED;
315 		st.index = -1;
316 		return st;
317 	}
318 
319 	if (atomic_read(&iq->instr_pending) >= (s32)(iq->max_count - 2))
320 		st.status = IQ_SEND_STOP;
321 
322 	__copy_cmd_into_iq(iq, cmd);
323 
324 	/* "index" is returned, host_write_index is modified. */
325 	st.index = iq->host_write_index;
326 	iq->host_write_index = incr_index(iq->host_write_index, 1,
327 					  iq->max_count);
328 	iq->fill_cnt++;
329 
330 	/* Flush the command into memory. We need to be sure the data is in
331 	 * memory before indicating that the instruction is pending.
332 	 */
333 	wmb();
334 
335 	atomic_inc(&iq->instr_pending);
336 
337 	return st;
338 }
339 
340 int
341 octeon_register_reqtype_free_fn(struct octeon_device *oct, int reqtype,
342 				void (*fn)(void *))
343 {
344 	if (reqtype > REQTYPE_LAST) {
345 		dev_err(&oct->pci_dev->dev, "%s: Invalid reqtype: %d\n",
346 			__func__, reqtype);
347 		return -EINVAL;
348 	}
349 
350 	reqtype_free_fn[oct->octeon_id][reqtype] = fn;
351 
352 	return 0;
353 }
354 
355 static inline void
356 __add_to_request_list(struct octeon_instr_queue *iq,
357 		      int idx, void *buf, int reqtype)
358 {
359 	iq->request_list[idx].buf = buf;
360 	iq->request_list[idx].reqtype = reqtype;
361 }
362 
363 /* Can only run in process context */
364 int
365 lio_process_iq_request_list(struct octeon_device *oct,
366 			    struct octeon_instr_queue *iq, u32 napi_budget)
367 {
368 	int reqtype;
369 	void *buf;
370 	u32 old = iq->flush_index;
371 	u32 inst_count = 0;
372 	unsigned int pkts_compl = 0, bytes_compl = 0;
373 	struct octeon_soft_command *sc;
374 	struct octeon_instr_irh *irh;
375 	unsigned long flags;
376 
377 	while (old != iq->octeon_read_index) {
378 		reqtype = iq->request_list[old].reqtype;
379 		buf     = iq->request_list[old].buf;
380 
381 		if (reqtype == REQTYPE_NONE)
382 			goto skip_this;
383 
384 		octeon_update_tx_completion_counters(buf, reqtype, &pkts_compl,
385 						     &bytes_compl);
386 
387 		switch (reqtype) {
388 		case REQTYPE_NORESP_NET:
389 		case REQTYPE_NORESP_NET_SG:
390 		case REQTYPE_RESP_NET_SG:
391 			reqtype_free_fn[oct->octeon_id][reqtype](buf);
392 			break;
393 		case REQTYPE_RESP_NET:
394 		case REQTYPE_SOFT_COMMAND:
395 			sc = buf;
396 
397 			if (OCTEON_CN23XX_PF(oct) || OCTEON_CN23XX_VF(oct))
398 				irh = (struct octeon_instr_irh *)
399 					&sc->cmd.cmd3.irh;
400 			else
401 				irh = (struct octeon_instr_irh *)
402 					&sc->cmd.cmd2.irh;
403 			if (irh->rflag) {
404 				/* We're expecting a response from Octeon.
405 				 * It's up to lio_process_ordered_list() to
406 				 * process  sc. Add sc to the ordered soft
407 				 * command response list because we expect
408 				 * a response from Octeon.
409 				 */
410 				spin_lock_irqsave
411 					(&oct->response_list
412 					 [OCTEON_ORDERED_SC_LIST].lock,
413 					 flags);
414 				atomic_inc(&oct->response_list
415 					[OCTEON_ORDERED_SC_LIST].
416 					pending_req_count);
417 				list_add_tail(&sc->node, &oct->response_list
418 					[OCTEON_ORDERED_SC_LIST].head);
419 				spin_unlock_irqrestore
420 					(&oct->response_list
421 					 [OCTEON_ORDERED_SC_LIST].lock,
422 					 flags);
423 			} else {
424 				if (sc->callback) {
425 					/* This callback must not sleep */
426 					sc->callback(oct, OCTEON_REQUEST_DONE,
427 						     sc->callback_arg);
428 				}
429 			}
430 			break;
431 		default:
432 			dev_err(&oct->pci_dev->dev,
433 				"%s Unknown reqtype: %d buf: %p at idx %d\n",
434 				__func__, reqtype, buf, old);
435 		}
436 
437 		iq->request_list[old].buf = NULL;
438 		iq->request_list[old].reqtype = 0;
439 
440  skip_this:
441 		inst_count++;
442 		old = incr_index(old, 1, iq->max_count);
443 
444 		if ((napi_budget) && (inst_count >= napi_budget))
445 			break;
446 	}
447 	if (bytes_compl)
448 		octeon_report_tx_completion_to_bql(iq->app_ctx, pkts_compl,
449 						   bytes_compl);
450 	iq->flush_index = old;
451 
452 	return inst_count;
453 }
454 
455 /* Can only be called from process context */
456 int
457 octeon_flush_iq(struct octeon_device *oct, struct octeon_instr_queue *iq,
458 		u32 pending_thresh, u32 napi_budget)
459 {
460 	u32 inst_processed = 0;
461 	u32 tot_inst_processed = 0;
462 	int tx_done = 1;
463 
464 	if (!spin_trylock(&iq->iq_flush_running_lock))
465 		return tx_done;
466 
467 	spin_lock_bh(&iq->lock);
468 
469 	iq->octeon_read_index = oct->fn_list.update_iq_read_idx(iq);
470 
471 	if (atomic_read(&iq->instr_pending) >= (s32)pending_thresh) {
472 		do {
473 			/* Process any outstanding IQ packets. */
474 			if (iq->flush_index == iq->octeon_read_index)
475 				break;
476 
477 			if (napi_budget)
478 				inst_processed = lio_process_iq_request_list
479 					(oct, iq,
480 					 napi_budget - tot_inst_processed);
481 			else
482 				inst_processed =
483 					lio_process_iq_request_list(oct, iq, 0);
484 
485 			if (inst_processed) {
486 				atomic_sub(inst_processed, &iq->instr_pending);
487 				iq->stats.instr_processed += inst_processed;
488 			}
489 
490 			tot_inst_processed += inst_processed;
491 			inst_processed = 0;
492 
493 		} while (tot_inst_processed < napi_budget);
494 
495 		if (napi_budget && (tot_inst_processed >= napi_budget))
496 			tx_done = 0;
497 	}
498 
499 	iq->last_db_time = jiffies;
500 
501 	spin_unlock_bh(&iq->lock);
502 
503 	spin_unlock(&iq->iq_flush_running_lock);
504 
505 	return tx_done;
506 }
507 
508 /* Process instruction queue after timeout.
509  * This routine gets called from a workqueue or when removing the module.
510  */
511 static void __check_db_timeout(struct octeon_device *oct, u64 iq_no)
512 {
513 	struct octeon_instr_queue *iq;
514 	u64 next_time;
515 
516 	if (!oct)
517 		return;
518 
519 	iq = oct->instr_queue[iq_no];
520 	if (!iq)
521 		return;
522 
523 	/* return immediately, if no work pending */
524 	if (!atomic_read(&iq->instr_pending))
525 		return;
526 	/* If jiffies - last_db_time < db_timeout do nothing  */
527 	next_time = iq->last_db_time + iq->db_timeout;
528 	if (!time_after(jiffies, (unsigned long)next_time))
529 		return;
530 	iq->last_db_time = jiffies;
531 
532 	/* Flush the instruction queue */
533 	octeon_flush_iq(oct, iq, 1, 0);
534 
535 	lio_enable_irq(NULL, iq);
536 }
537 
538 /* Called by the Poll thread at regular intervals to check the instruction
539  * queue for commands to be posted and for commands that were fetched by Octeon.
540  */
541 static void check_db_timeout(struct work_struct *work)
542 {
543 	struct cavium_wk *wk = (struct cavium_wk *)work;
544 	struct octeon_device *oct = (struct octeon_device *)wk->ctxptr;
545 	u64 iq_no = wk->ctxul;
546 	struct cavium_wq *db_wq = &oct->check_db_wq[iq_no];
547 	u32 delay = 10;
548 
549 	__check_db_timeout(oct, iq_no);
550 	queue_delayed_work(db_wq->wq, &db_wq->wk.work, msecs_to_jiffies(delay));
551 }
552 
553 int
554 octeon_send_command(struct octeon_device *oct, u32 iq_no,
555 		    u32 force_db, void *cmd, void *buf,
556 		    u32 datasize, u32 reqtype)
557 {
558 	struct iq_post_status st;
559 	struct octeon_instr_queue *iq = oct->instr_queue[iq_no];
560 
561 	/* Get the lock and prevent other tasks and tx interrupt handler from
562 	 * running.
563 	 */
564 	spin_lock_bh(&iq->post_lock);
565 
566 	st = __post_command2(iq, cmd);
567 
568 	if (st.status != IQ_SEND_FAILED) {
569 		octeon_report_sent_bytes_to_bql(buf, reqtype);
570 		__add_to_request_list(iq, st.index, buf, reqtype);
571 		INCR_INSTRQUEUE_PKT_COUNT(oct, iq_no, bytes_sent, datasize);
572 		INCR_INSTRQUEUE_PKT_COUNT(oct, iq_no, instr_posted, 1);
573 
574 		if (force_db)
575 			ring_doorbell(oct, iq);
576 	} else {
577 		INCR_INSTRQUEUE_PKT_COUNT(oct, iq_no, instr_dropped, 1);
578 	}
579 
580 	spin_unlock_bh(&iq->post_lock);
581 
582 	/* This is only done here to expedite packets being flushed
583 	 * for cases where there are no IQ completion interrupts.
584 	 */
585 
586 	return st.status;
587 }
588 
589 void
590 octeon_prepare_soft_command(struct octeon_device *oct,
591 			    struct octeon_soft_command *sc,
592 			    u8 opcode,
593 			    u8 subcode,
594 			    u32 irh_ossp,
595 			    u64 ossp0,
596 			    u64 ossp1)
597 {
598 	struct octeon_config *oct_cfg;
599 	struct octeon_instr_ih2 *ih2;
600 	struct octeon_instr_ih3 *ih3;
601 	struct octeon_instr_pki_ih3 *pki_ih3;
602 	struct octeon_instr_irh *irh;
603 	struct octeon_instr_rdp *rdp;
604 
605 	WARN_ON(opcode > 15);
606 	WARN_ON(subcode > 127);
607 
608 	oct_cfg = octeon_get_conf(oct);
609 
610 	if (OCTEON_CN23XX_PF(oct) || OCTEON_CN23XX_VF(oct)) {
611 		ih3 = (struct octeon_instr_ih3 *)&sc->cmd.cmd3.ih3;
612 
613 		ih3->pkind = oct->instr_queue[sc->iq_no]->txpciq.s.pkind;
614 
615 		pki_ih3 = (struct octeon_instr_pki_ih3 *)&sc->cmd.cmd3.pki_ih3;
616 
617 		pki_ih3->w           = 1;
618 		pki_ih3->raw         = 1;
619 		pki_ih3->utag        = 1;
620 		pki_ih3->uqpg        =
621 			oct->instr_queue[sc->iq_no]->txpciq.s.use_qpg;
622 		pki_ih3->utt         = 1;
623 		pki_ih3->tag     = LIO_CONTROL;
624 		pki_ih3->tagtype = ATOMIC_TAG;
625 		pki_ih3->qpg         =
626 			oct->instr_queue[sc->iq_no]->txpciq.s.qpg;
627 		pki_ih3->pm          = 0x7;
628 		pki_ih3->sl          = 8;
629 
630 		if (sc->datasize)
631 			ih3->dlengsz = sc->datasize;
632 
633 		irh            = (struct octeon_instr_irh *)&sc->cmd.cmd3.irh;
634 		irh->opcode    = opcode;
635 		irh->subcode   = subcode;
636 
637 		/* opcode/subcode specific parameters (ossp) */
638 		irh->ossp       = irh_ossp;
639 		sc->cmd.cmd3.ossp[0] = ossp0;
640 		sc->cmd.cmd3.ossp[1] = ossp1;
641 
642 		if (sc->rdatasize) {
643 			rdp = (struct octeon_instr_rdp *)&sc->cmd.cmd3.rdp;
644 			rdp->pcie_port = oct->pcie_port;
645 			rdp->rlen      = sc->rdatasize;
646 
647 			irh->rflag =  1;
648 			/*PKI IH3*/
649 			/* pki_ih3 irh+ossp[0]+ossp[1]+rdp+rptr = 48 bytes */
650 			ih3->fsz    = LIO_SOFTCMDRESP_IH3;
651 		} else {
652 			irh->rflag =  0;
653 			/*PKI IH3*/
654 			/* pki_h3 + irh + ossp[0] + ossp[1] = 32 bytes */
655 			ih3->fsz    = LIO_PCICMD_O3;
656 		}
657 
658 	} else {
659 		ih2          = (struct octeon_instr_ih2 *)&sc->cmd.cmd2.ih2;
660 		ih2->tagtype = ATOMIC_TAG;
661 		ih2->tag     = LIO_CONTROL;
662 		ih2->raw     = 1;
663 		ih2->grp     = CFG_GET_CTRL_Q_GRP(oct_cfg);
664 
665 		if (sc->datasize) {
666 			ih2->dlengsz = sc->datasize;
667 			ih2->rs = 1;
668 		}
669 
670 		irh            = (struct octeon_instr_irh *)&sc->cmd.cmd2.irh;
671 		irh->opcode    = opcode;
672 		irh->subcode   = subcode;
673 
674 		/* opcode/subcode specific parameters (ossp) */
675 		irh->ossp       = irh_ossp;
676 		sc->cmd.cmd2.ossp[0] = ossp0;
677 		sc->cmd.cmd2.ossp[1] = ossp1;
678 
679 		if (sc->rdatasize) {
680 			rdp = (struct octeon_instr_rdp *)&sc->cmd.cmd2.rdp;
681 			rdp->pcie_port = oct->pcie_port;
682 			rdp->rlen      = sc->rdatasize;
683 
684 			irh->rflag =  1;
685 			/* irh+ossp[0]+ossp[1]+rdp+rptr = 40 bytes */
686 			ih2->fsz   = LIO_SOFTCMDRESP_IH2;
687 		} else {
688 			irh->rflag =  0;
689 			/* irh + ossp[0] + ossp[1] = 24 bytes */
690 			ih2->fsz   = LIO_PCICMD_O2;
691 		}
692 	}
693 }
694 
695 int octeon_send_soft_command(struct octeon_device *oct,
696 			     struct octeon_soft_command *sc)
697 {
698 	struct octeon_instr_ih2 *ih2;
699 	struct octeon_instr_ih3 *ih3;
700 	struct octeon_instr_irh *irh;
701 	u32 len;
702 
703 	if (OCTEON_CN23XX_PF(oct) || OCTEON_CN23XX_VF(oct)) {
704 		ih3 =  (struct octeon_instr_ih3 *)&sc->cmd.cmd3.ih3;
705 		if (ih3->dlengsz) {
706 			WARN_ON(!sc->dmadptr);
707 			sc->cmd.cmd3.dptr = sc->dmadptr;
708 		}
709 		irh = (struct octeon_instr_irh *)&sc->cmd.cmd3.irh;
710 		if (irh->rflag) {
711 			WARN_ON(!sc->dmarptr);
712 			WARN_ON(!sc->status_word);
713 			*sc->status_word = COMPLETION_WORD_INIT;
714 			sc->cmd.cmd3.rptr = sc->dmarptr;
715 		}
716 		len = (u32)ih3->dlengsz;
717 	} else {
718 		ih2 = (struct octeon_instr_ih2 *)&sc->cmd.cmd2.ih2;
719 		if (ih2->dlengsz) {
720 			WARN_ON(!sc->dmadptr);
721 			sc->cmd.cmd2.dptr = sc->dmadptr;
722 		}
723 		irh = (struct octeon_instr_irh *)&sc->cmd.cmd2.irh;
724 		if (irh->rflag) {
725 			WARN_ON(!sc->dmarptr);
726 			WARN_ON(!sc->status_word);
727 			*sc->status_word = COMPLETION_WORD_INIT;
728 			sc->cmd.cmd2.rptr = sc->dmarptr;
729 		}
730 		len = (u32)ih2->dlengsz;
731 	}
732 
733 	if (sc->wait_time)
734 		sc->timeout = jiffies + sc->wait_time;
735 
736 	return (octeon_send_command(oct, sc->iq_no, 1, &sc->cmd, sc,
737 				    len, REQTYPE_SOFT_COMMAND));
738 }
739 
740 int octeon_setup_sc_buffer_pool(struct octeon_device *oct)
741 {
742 	int i;
743 	u64 dma_addr;
744 	struct octeon_soft_command *sc;
745 
746 	INIT_LIST_HEAD(&oct->sc_buf_pool.head);
747 	spin_lock_init(&oct->sc_buf_pool.lock);
748 	atomic_set(&oct->sc_buf_pool.alloc_buf_count, 0);
749 
750 	for (i = 0; i < MAX_SOFT_COMMAND_BUFFERS; i++) {
751 		sc = (struct octeon_soft_command *)
752 			lio_dma_alloc(oct,
753 				      SOFT_COMMAND_BUFFER_SIZE,
754 					  (dma_addr_t *)&dma_addr);
755 		if (!sc) {
756 			octeon_free_sc_buffer_pool(oct);
757 			return 1;
758 		}
759 
760 		sc->dma_addr = dma_addr;
761 		sc->size = SOFT_COMMAND_BUFFER_SIZE;
762 
763 		list_add_tail(&sc->node, &oct->sc_buf_pool.head);
764 	}
765 
766 	return 0;
767 }
768 
769 int octeon_free_sc_buffer_pool(struct octeon_device *oct)
770 {
771 	struct list_head *tmp, *tmp2;
772 	struct octeon_soft_command *sc;
773 
774 	spin_lock_bh(&oct->sc_buf_pool.lock);
775 
776 	list_for_each_safe(tmp, tmp2, &oct->sc_buf_pool.head) {
777 		list_del(tmp);
778 
779 		sc = (struct octeon_soft_command *)tmp;
780 
781 		lio_dma_free(oct, sc->size, sc, sc->dma_addr);
782 	}
783 
784 	INIT_LIST_HEAD(&oct->sc_buf_pool.head);
785 
786 	spin_unlock_bh(&oct->sc_buf_pool.lock);
787 
788 	return 0;
789 }
790 
791 struct octeon_soft_command *octeon_alloc_soft_command(struct octeon_device *oct,
792 						      u32 datasize,
793 						      u32 rdatasize,
794 						      u32 ctxsize)
795 {
796 	u64 dma_addr;
797 	u32 size;
798 	u32 offset = sizeof(struct octeon_soft_command);
799 	struct octeon_soft_command *sc = NULL;
800 	struct list_head *tmp;
801 
802 	WARN_ON((offset + datasize + rdatasize + ctxsize) >
803 	       SOFT_COMMAND_BUFFER_SIZE);
804 
805 	spin_lock_bh(&oct->sc_buf_pool.lock);
806 
807 	if (list_empty(&oct->sc_buf_pool.head)) {
808 		spin_unlock_bh(&oct->sc_buf_pool.lock);
809 		return NULL;
810 	}
811 
812 	list_for_each(tmp, &oct->sc_buf_pool.head)
813 		break;
814 
815 	list_del(tmp);
816 
817 	atomic_inc(&oct->sc_buf_pool.alloc_buf_count);
818 
819 	spin_unlock_bh(&oct->sc_buf_pool.lock);
820 
821 	sc = (struct octeon_soft_command *)tmp;
822 
823 	dma_addr = sc->dma_addr;
824 	size = sc->size;
825 
826 	memset(sc, 0, sc->size);
827 
828 	sc->dma_addr = dma_addr;
829 	sc->size = size;
830 
831 	if (ctxsize) {
832 		sc->ctxptr = (u8 *)sc + offset;
833 		sc->ctxsize = ctxsize;
834 	}
835 
836 	/* Start data at 128 byte boundary */
837 	offset = (offset + ctxsize + 127) & 0xffffff80;
838 
839 	if (datasize) {
840 		sc->virtdptr = (u8 *)sc + offset;
841 		sc->dmadptr = dma_addr + offset;
842 		sc->datasize = datasize;
843 	}
844 
845 	/* Start rdata at 128 byte boundary */
846 	offset = (offset + datasize + 127) & 0xffffff80;
847 
848 	if (rdatasize) {
849 		WARN_ON(rdatasize < 16);
850 		sc->virtrptr = (u8 *)sc + offset;
851 		sc->dmarptr = dma_addr + offset;
852 		sc->rdatasize = rdatasize;
853 		sc->status_word = (u64 *)((u8 *)(sc->virtrptr) + rdatasize - 8);
854 	}
855 
856 	return sc;
857 }
858 
859 void octeon_free_soft_command(struct octeon_device *oct,
860 			      struct octeon_soft_command *sc)
861 {
862 	spin_lock_bh(&oct->sc_buf_pool.lock);
863 
864 	list_add_tail(&sc->node, &oct->sc_buf_pool.head);
865 
866 	atomic_dec(&oct->sc_buf_pool.alloc_buf_count);
867 
868 	spin_unlock_bh(&oct->sc_buf_pool.lock);
869 }
870