1 /**********************************************************************
2  * Author: Cavium, Inc.
3  *
4  * Contact: support@cavium.com
5  *          Please include "LiquidIO" in the subject.
6  *
7  * Copyright (c) 2003-2016 Cavium, Inc.
8  *
9  * This file is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU General Public License, Version 2, as
11  * published by the Free Software Foundation.
12  *
13  * This file is distributed in the hope that it will be useful, but
14  * AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
15  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
16  * NONINFRINGEMENT.  See the GNU General Public License for more
17  * details.
18  **********************************************************************/
19 #include <linux/pci.h>
20 #include <linux/netdevice.h>
21 #include <linux/vmalloc.h>
22 #include "liquidio_common.h"
23 #include "octeon_droq.h"
24 #include "octeon_iq.h"
25 #include "response_manager.h"
26 #include "octeon_device.h"
27 #include "octeon_main.h"
28 #include "octeon_network.h"
29 #include "cn66xx_device.h"
30 #include "cn23xx_pf_device.h"
31 #include "cn23xx_vf_device.h"
32 
33 struct iq_post_status {
34 	int status;
35 	int index;
36 };
37 
38 static void check_db_timeout(struct work_struct *work);
39 static void  __check_db_timeout(struct octeon_device *oct, u64 iq_no);
40 
41 static void (*reqtype_free_fn[MAX_OCTEON_DEVICES][REQTYPE_LAST + 1]) (void *);
42 
43 static inline int IQ_INSTR_MODE_64B(struct octeon_device *oct, int iq_no)
44 {
45 	struct octeon_instr_queue *iq =
46 	    (struct octeon_instr_queue *)oct->instr_queue[iq_no];
47 	return iq->iqcmd_64B;
48 }
49 
50 #define IQ_INSTR_MODE_32B(oct, iq_no)  (!IQ_INSTR_MODE_64B(oct, iq_no))
51 
52 /* Define this to return the request status comaptible to old code */
53 /*#define OCTEON_USE_OLD_REQ_STATUS*/
54 
55 /* Return 0 on success, 1 on failure */
56 int octeon_init_instr_queue(struct octeon_device *oct,
57 			    union oct_txpciq txpciq,
58 			    u32 num_descs)
59 {
60 	struct octeon_instr_queue *iq;
61 	struct octeon_iq_config *conf = NULL;
62 	u32 iq_no = (u32)txpciq.s.q_no;
63 	u32 q_size;
64 	struct cavium_wq *db_wq;
65 	int orig_node = dev_to_node(&oct->pci_dev->dev);
66 	int numa_node = cpu_to_node(iq_no % num_online_cpus());
67 
68 	if (OCTEON_CN6XXX(oct))
69 		conf = &(CFG_GET_IQ_CFG(CHIP_CONF(oct, cn6xxx)));
70 	else if (OCTEON_CN23XX_PF(oct))
71 		conf = &(CFG_GET_IQ_CFG(CHIP_CONF(oct, cn23xx_pf)));
72 	else if (OCTEON_CN23XX_VF(oct))
73 		conf = &(CFG_GET_IQ_CFG(CHIP_CONF(oct, cn23xx_vf)));
74 
75 	if (!conf) {
76 		dev_err(&oct->pci_dev->dev, "Unsupported Chip %x\n",
77 			oct->chip_id);
78 		return 1;
79 	}
80 
81 	if (num_descs & (num_descs - 1)) {
82 		dev_err(&oct->pci_dev->dev,
83 			"Number of descriptors for instr queue %d not in power of 2.\n",
84 			iq_no);
85 		return 1;
86 	}
87 
88 	q_size = (u32)conf->instr_type * num_descs;
89 
90 	iq = oct->instr_queue[iq_no];
91 
92 	iq->oct_dev = oct;
93 
94 	set_dev_node(&oct->pci_dev->dev, numa_node);
95 	iq->base_addr = lio_dma_alloc(oct, q_size,
96 				      (dma_addr_t *)&iq->base_addr_dma);
97 	set_dev_node(&oct->pci_dev->dev, orig_node);
98 	if (!iq->base_addr)
99 		iq->base_addr = lio_dma_alloc(oct, q_size,
100 					      (dma_addr_t *)&iq->base_addr_dma);
101 	if (!iq->base_addr) {
102 		dev_err(&oct->pci_dev->dev, "Cannot allocate memory for instr queue %d\n",
103 			iq_no);
104 		return 1;
105 	}
106 
107 	iq->max_count = num_descs;
108 
109 	/* Initialize a list to holds requests that have been posted to Octeon
110 	 * but has yet to be fetched by octeon
111 	 */
112 	iq->request_list = vmalloc_node((sizeof(*iq->request_list) * num_descs),
113 					       numa_node);
114 	if (!iq->request_list)
115 		iq->request_list = vmalloc(sizeof(*iq->request_list) *
116 						  num_descs);
117 	if (!iq->request_list) {
118 		lio_dma_free(oct, q_size, iq->base_addr, iq->base_addr_dma);
119 		dev_err(&oct->pci_dev->dev, "Alloc failed for IQ[%d] nr free list\n",
120 			iq_no);
121 		return 1;
122 	}
123 
124 	memset(iq->request_list, 0, sizeof(*iq->request_list) * num_descs);
125 
126 	dev_dbg(&oct->pci_dev->dev, "IQ[%d]: base: %p basedma: %llx count: %d\n",
127 		iq_no, iq->base_addr, iq->base_addr_dma, iq->max_count);
128 
129 	iq->txpciq.u64 = txpciq.u64;
130 	iq->fill_threshold = (u32)conf->db_min;
131 	iq->fill_cnt = 0;
132 	iq->host_write_index = 0;
133 	iq->octeon_read_index = 0;
134 	iq->flush_index = 0;
135 	iq->last_db_time = 0;
136 	iq->do_auto_flush = 1;
137 	iq->db_timeout = (u32)conf->db_timeout;
138 	atomic_set(&iq->instr_pending, 0);
139 
140 	/* Initialize the spinlock for this instruction queue */
141 	spin_lock_init(&iq->lock);
142 	spin_lock_init(&iq->post_lock);
143 
144 	spin_lock_init(&iq->iq_flush_running_lock);
145 
146 	oct->io_qmask.iq |= BIT_ULL(iq_no);
147 
148 	/* Set the 32B/64B mode for each input queue */
149 	oct->io_qmask.iq64B |= ((conf->instr_type == 64) << iq_no);
150 	iq->iqcmd_64B = (conf->instr_type == 64);
151 
152 	oct->fn_list.setup_iq_regs(oct, iq_no);
153 
154 	oct->check_db_wq[iq_no].wq = alloc_workqueue("check_iq_db",
155 						     WQ_MEM_RECLAIM,
156 						     0);
157 	if (!oct->check_db_wq[iq_no].wq) {
158 		vfree(iq->request_list);
159 		iq->request_list = NULL;
160 		lio_dma_free(oct, q_size, iq->base_addr, iq->base_addr_dma);
161 		dev_err(&oct->pci_dev->dev, "check db wq create failed for iq %d\n",
162 			iq_no);
163 		return 1;
164 	}
165 
166 	db_wq = &oct->check_db_wq[iq_no];
167 
168 	INIT_DELAYED_WORK(&db_wq->wk.work, check_db_timeout);
169 	db_wq->wk.ctxptr = oct;
170 	db_wq->wk.ctxul = iq_no;
171 	queue_delayed_work(db_wq->wq, &db_wq->wk.work, msecs_to_jiffies(1));
172 
173 	return 0;
174 }
175 
176 int octeon_delete_instr_queue(struct octeon_device *oct, u32 iq_no)
177 {
178 	u64 desc_size = 0, q_size;
179 	struct octeon_instr_queue *iq = oct->instr_queue[iq_no];
180 
181 	cancel_delayed_work_sync(&oct->check_db_wq[iq_no].wk.work);
182 	destroy_workqueue(oct->check_db_wq[iq_no].wq);
183 
184 	if (OCTEON_CN6XXX(oct))
185 		desc_size =
186 		    CFG_GET_IQ_INSTR_TYPE(CHIP_CONF(oct, cn6xxx));
187 	else if (OCTEON_CN23XX_PF(oct))
188 		desc_size =
189 		    CFG_GET_IQ_INSTR_TYPE(CHIP_CONF(oct, cn23xx_pf));
190 	else if (OCTEON_CN23XX_VF(oct))
191 		desc_size =
192 		    CFG_GET_IQ_INSTR_TYPE(CHIP_CONF(oct, cn23xx_vf));
193 
194 	vfree(iq->request_list);
195 
196 	if (iq->base_addr) {
197 		q_size = iq->max_count * desc_size;
198 		lio_dma_free(oct, (u32)q_size, iq->base_addr,
199 			     iq->base_addr_dma);
200 		return 0;
201 	}
202 	return 1;
203 }
204 
205 /* Return 0 on success, 1 on failure */
206 int octeon_setup_iq(struct octeon_device *oct,
207 		    int ifidx,
208 		    int q_index,
209 		    union oct_txpciq txpciq,
210 		    u32 num_descs,
211 		    void *app_ctx)
212 {
213 	u32 iq_no = (u32)txpciq.s.q_no;
214 	int numa_node = cpu_to_node(iq_no % num_online_cpus());
215 
216 	if (oct->instr_queue[iq_no]) {
217 		dev_dbg(&oct->pci_dev->dev, "IQ is in use. Cannot create the IQ: %d again\n",
218 			iq_no);
219 		oct->instr_queue[iq_no]->txpciq.u64 = txpciq.u64;
220 		oct->instr_queue[iq_no]->app_ctx = app_ctx;
221 		return 0;
222 	}
223 	oct->instr_queue[iq_no] =
224 	    vmalloc_node(sizeof(struct octeon_instr_queue), numa_node);
225 	if (!oct->instr_queue[iq_no])
226 		oct->instr_queue[iq_no] =
227 		    vmalloc(sizeof(struct octeon_instr_queue));
228 	if (!oct->instr_queue[iq_no])
229 		return 1;
230 
231 	memset(oct->instr_queue[iq_no], 0,
232 	       sizeof(struct octeon_instr_queue));
233 
234 	oct->instr_queue[iq_no]->q_index = q_index;
235 	oct->instr_queue[iq_no]->app_ctx = app_ctx;
236 	oct->instr_queue[iq_no]->ifidx = ifidx;
237 
238 	if (octeon_init_instr_queue(oct, txpciq, num_descs)) {
239 		vfree(oct->instr_queue[iq_no]);
240 		oct->instr_queue[iq_no] = NULL;
241 		return 1;
242 	}
243 
244 	oct->num_iqs++;
245 	if (oct->fn_list.enable_io_queues(oct))
246 		return 1;
247 
248 	return 0;
249 }
250 
251 int lio_wait_for_instr_fetch(struct octeon_device *oct)
252 {
253 	int i, retry = 1000, pending, instr_cnt = 0;
254 
255 	do {
256 		instr_cnt = 0;
257 
258 		for (i = 0; i < MAX_OCTEON_INSTR_QUEUES(oct); i++) {
259 			if (!(oct->io_qmask.iq & BIT_ULL(i)))
260 				continue;
261 			pending =
262 			    atomic_read(&oct->
263 					       instr_queue[i]->instr_pending);
264 			if (pending)
265 				__check_db_timeout(oct, i);
266 			instr_cnt += pending;
267 		}
268 
269 		if (instr_cnt == 0)
270 			break;
271 
272 		schedule_timeout_uninterruptible(1);
273 
274 	} while (retry-- && instr_cnt);
275 
276 	return instr_cnt;
277 }
278 
279 static inline void
280 ring_doorbell(struct octeon_device *oct, struct octeon_instr_queue *iq)
281 {
282 	if (atomic_read(&oct->status) == OCT_DEV_RUNNING) {
283 		writel(iq->fill_cnt, iq->doorbell_reg);
284 		/* make sure doorbell write goes through */
285 		mmiowb();
286 		iq->fill_cnt = 0;
287 		iq->last_db_time = jiffies;
288 		return;
289 	}
290 }
291 
292 static inline void __copy_cmd_into_iq(struct octeon_instr_queue *iq,
293 				      u8 *cmd)
294 {
295 	u8 *iqptr, cmdsize;
296 
297 	cmdsize = ((iq->iqcmd_64B) ? 64 : 32);
298 	iqptr = iq->base_addr + (cmdsize * iq->host_write_index);
299 
300 	memcpy(iqptr, cmd, cmdsize);
301 }
302 
303 static inline struct iq_post_status
304 __post_command2(struct octeon_instr_queue *iq, u8 *cmd)
305 {
306 	struct iq_post_status st;
307 
308 	st.status = IQ_SEND_OK;
309 
310 	/* This ensures that the read index does not wrap around to the same
311 	 * position if queue gets full before Octeon could fetch any instr.
312 	 */
313 	if (atomic_read(&iq->instr_pending) >= (s32)(iq->max_count - 1)) {
314 		st.status = IQ_SEND_FAILED;
315 		st.index = -1;
316 		return st;
317 	}
318 
319 	if (atomic_read(&iq->instr_pending) >= (s32)(iq->max_count - 2))
320 		st.status = IQ_SEND_STOP;
321 
322 	__copy_cmd_into_iq(iq, cmd);
323 
324 	/* "index" is returned, host_write_index is modified. */
325 	st.index = iq->host_write_index;
326 	iq->host_write_index = incr_index(iq->host_write_index, 1,
327 					  iq->max_count);
328 	iq->fill_cnt++;
329 
330 	/* Flush the command into memory. We need to be sure the data is in
331 	 * memory before indicating that the instruction is pending.
332 	 */
333 	wmb();
334 
335 	atomic_inc(&iq->instr_pending);
336 
337 	return st;
338 }
339 
340 int
341 octeon_register_reqtype_free_fn(struct octeon_device *oct, int reqtype,
342 				void (*fn)(void *))
343 {
344 	if (reqtype > REQTYPE_LAST) {
345 		dev_err(&oct->pci_dev->dev, "%s: Invalid reqtype: %d\n",
346 			__func__, reqtype);
347 		return -EINVAL;
348 	}
349 
350 	reqtype_free_fn[oct->octeon_id][reqtype] = fn;
351 
352 	return 0;
353 }
354 
355 static inline void
356 __add_to_request_list(struct octeon_instr_queue *iq,
357 		      int idx, void *buf, int reqtype)
358 {
359 	iq->request_list[idx].buf = buf;
360 	iq->request_list[idx].reqtype = reqtype;
361 }
362 
363 /* Can only run in process context */
364 int
365 lio_process_iq_request_list(struct octeon_device *oct,
366 			    struct octeon_instr_queue *iq, u32 napi_budget)
367 {
368 	int reqtype;
369 	void *buf;
370 	u32 old = iq->flush_index;
371 	u32 inst_count = 0;
372 	unsigned int pkts_compl = 0, bytes_compl = 0;
373 	struct octeon_soft_command *sc;
374 	struct octeon_instr_irh *irh;
375 	unsigned long flags;
376 
377 	while (old != iq->octeon_read_index) {
378 		reqtype = iq->request_list[old].reqtype;
379 		buf     = iq->request_list[old].buf;
380 
381 		if (reqtype == REQTYPE_NONE)
382 			goto skip_this;
383 
384 		octeon_update_tx_completion_counters(buf, reqtype, &pkts_compl,
385 						     &bytes_compl);
386 
387 		switch (reqtype) {
388 		case REQTYPE_NORESP_NET:
389 		case REQTYPE_NORESP_NET_SG:
390 		case REQTYPE_RESP_NET_SG:
391 			reqtype_free_fn[oct->octeon_id][reqtype](buf);
392 			break;
393 		case REQTYPE_RESP_NET:
394 		case REQTYPE_SOFT_COMMAND:
395 			sc = buf;
396 
397 			if (OCTEON_CN23XX_PF(oct) || OCTEON_CN23XX_VF(oct))
398 				irh = (struct octeon_instr_irh *)
399 					&sc->cmd.cmd3.irh;
400 			else
401 				irh = (struct octeon_instr_irh *)
402 					&sc->cmd.cmd2.irh;
403 			if (irh->rflag) {
404 				/* We're expecting a response from Octeon.
405 				 * It's up to lio_process_ordered_list() to
406 				 * process  sc. Add sc to the ordered soft
407 				 * command response list because we expect
408 				 * a response from Octeon.
409 				 */
410 				spin_lock_irqsave
411 					(&oct->response_list
412 					 [OCTEON_ORDERED_SC_LIST].lock,
413 					 flags);
414 				atomic_inc(&oct->response_list
415 					[OCTEON_ORDERED_SC_LIST].
416 					pending_req_count);
417 				list_add_tail(&sc->node, &oct->response_list
418 					[OCTEON_ORDERED_SC_LIST].head);
419 				spin_unlock_irqrestore
420 					(&oct->response_list
421 					 [OCTEON_ORDERED_SC_LIST].lock,
422 					 flags);
423 			} else {
424 				if (sc->callback) {
425 					/* This callback must not sleep */
426 					sc->callback(oct, OCTEON_REQUEST_DONE,
427 						     sc->callback_arg);
428 				}
429 			}
430 			break;
431 		default:
432 			dev_err(&oct->pci_dev->dev,
433 				"%s Unknown reqtype: %d buf: %p at idx %d\n",
434 				__func__, reqtype, buf, old);
435 		}
436 
437 		iq->request_list[old].buf = NULL;
438 		iq->request_list[old].reqtype = 0;
439 
440  skip_this:
441 		inst_count++;
442 		old = incr_index(old, 1, iq->max_count);
443 
444 		if ((napi_budget) && (inst_count >= napi_budget))
445 			break;
446 	}
447 	if (bytes_compl)
448 		octeon_report_tx_completion_to_bql(iq->app_ctx, pkts_compl,
449 						   bytes_compl);
450 	iq->flush_index = old;
451 
452 	return inst_count;
453 }
454 
455 /* Can only be called from process context */
456 int
457 octeon_flush_iq(struct octeon_device *oct, struct octeon_instr_queue *iq,
458 		u32 napi_budget)
459 {
460 	u32 inst_processed = 0;
461 	u32 tot_inst_processed = 0;
462 	int tx_done = 1;
463 
464 	if (!spin_trylock(&iq->iq_flush_running_lock))
465 		return tx_done;
466 
467 	spin_lock_bh(&iq->lock);
468 
469 	iq->octeon_read_index = oct->fn_list.update_iq_read_idx(iq);
470 
471 	do {
472 		/* Process any outstanding IQ packets. */
473 		if (iq->flush_index == iq->octeon_read_index)
474 			break;
475 
476 		if (napi_budget)
477 			inst_processed =
478 				lio_process_iq_request_list(oct, iq,
479 							    napi_budget -
480 							    tot_inst_processed);
481 		else
482 			inst_processed =
483 				lio_process_iq_request_list(oct, iq, 0);
484 
485 		if (inst_processed) {
486 			atomic_sub(inst_processed, &iq->instr_pending);
487 			iq->stats.instr_processed += inst_processed;
488 		}
489 
490 		tot_inst_processed += inst_processed;
491 		inst_processed = 0;
492 
493 	} while (tot_inst_processed < napi_budget);
494 
495 	if (napi_budget && (tot_inst_processed >= napi_budget))
496 		tx_done = 0;
497 
498 	iq->last_db_time = jiffies;
499 
500 	spin_unlock_bh(&iq->lock);
501 
502 	spin_unlock(&iq->iq_flush_running_lock);
503 
504 	return tx_done;
505 }
506 
507 /* Process instruction queue after timeout.
508  * This routine gets called from a workqueue or when removing the module.
509  */
510 static void __check_db_timeout(struct octeon_device *oct, u64 iq_no)
511 {
512 	struct octeon_instr_queue *iq;
513 	u64 next_time;
514 
515 	if (!oct)
516 		return;
517 
518 	iq = oct->instr_queue[iq_no];
519 	if (!iq)
520 		return;
521 
522 	/* return immediately, if no work pending */
523 	if (!atomic_read(&iq->instr_pending))
524 		return;
525 	/* If jiffies - last_db_time < db_timeout do nothing  */
526 	next_time = iq->last_db_time + iq->db_timeout;
527 	if (!time_after(jiffies, (unsigned long)next_time))
528 		return;
529 	iq->last_db_time = jiffies;
530 
531 	/* Flush the instruction queue */
532 	octeon_flush_iq(oct, iq, 0);
533 
534 	lio_enable_irq(NULL, iq);
535 }
536 
537 /* Called by the Poll thread at regular intervals to check the instruction
538  * queue for commands to be posted and for commands that were fetched by Octeon.
539  */
540 static void check_db_timeout(struct work_struct *work)
541 {
542 	struct cavium_wk *wk = (struct cavium_wk *)work;
543 	struct octeon_device *oct = (struct octeon_device *)wk->ctxptr;
544 	u64 iq_no = wk->ctxul;
545 	struct cavium_wq *db_wq = &oct->check_db_wq[iq_no];
546 	u32 delay = 10;
547 
548 	__check_db_timeout(oct, iq_no);
549 	queue_delayed_work(db_wq->wq, &db_wq->wk.work, msecs_to_jiffies(delay));
550 }
551 
552 int
553 octeon_send_command(struct octeon_device *oct, u32 iq_no,
554 		    u32 force_db, void *cmd, void *buf,
555 		    u32 datasize, u32 reqtype)
556 {
557 	struct iq_post_status st;
558 	struct octeon_instr_queue *iq = oct->instr_queue[iq_no];
559 
560 	/* Get the lock and prevent other tasks and tx interrupt handler from
561 	 * running.
562 	 */
563 	spin_lock_bh(&iq->post_lock);
564 
565 	st = __post_command2(iq, cmd);
566 
567 	if (st.status != IQ_SEND_FAILED) {
568 		octeon_report_sent_bytes_to_bql(buf, reqtype);
569 		__add_to_request_list(iq, st.index, buf, reqtype);
570 		INCR_INSTRQUEUE_PKT_COUNT(oct, iq_no, bytes_sent, datasize);
571 		INCR_INSTRQUEUE_PKT_COUNT(oct, iq_no, instr_posted, 1);
572 
573 		if (force_db)
574 			ring_doorbell(oct, iq);
575 	} else {
576 		INCR_INSTRQUEUE_PKT_COUNT(oct, iq_no, instr_dropped, 1);
577 	}
578 
579 	spin_unlock_bh(&iq->post_lock);
580 
581 	/* This is only done here to expedite packets being flushed
582 	 * for cases where there are no IQ completion interrupts.
583 	 */
584 
585 	return st.status;
586 }
587 
588 void
589 octeon_prepare_soft_command(struct octeon_device *oct,
590 			    struct octeon_soft_command *sc,
591 			    u8 opcode,
592 			    u8 subcode,
593 			    u32 irh_ossp,
594 			    u64 ossp0,
595 			    u64 ossp1)
596 {
597 	struct octeon_config *oct_cfg;
598 	struct octeon_instr_ih2 *ih2;
599 	struct octeon_instr_ih3 *ih3;
600 	struct octeon_instr_pki_ih3 *pki_ih3;
601 	struct octeon_instr_irh *irh;
602 	struct octeon_instr_rdp *rdp;
603 
604 	WARN_ON(opcode > 15);
605 	WARN_ON(subcode > 127);
606 
607 	oct_cfg = octeon_get_conf(oct);
608 
609 	if (OCTEON_CN23XX_PF(oct) || OCTEON_CN23XX_VF(oct)) {
610 		ih3 = (struct octeon_instr_ih3 *)&sc->cmd.cmd3.ih3;
611 
612 		ih3->pkind = oct->instr_queue[sc->iq_no]->txpciq.s.pkind;
613 
614 		pki_ih3 = (struct octeon_instr_pki_ih3 *)&sc->cmd.cmd3.pki_ih3;
615 
616 		pki_ih3->w           = 1;
617 		pki_ih3->raw         = 1;
618 		pki_ih3->utag        = 1;
619 		pki_ih3->uqpg        =
620 			oct->instr_queue[sc->iq_no]->txpciq.s.use_qpg;
621 		pki_ih3->utt         = 1;
622 		pki_ih3->tag     = LIO_CONTROL;
623 		pki_ih3->tagtype = ATOMIC_TAG;
624 		pki_ih3->qpg         =
625 			oct->instr_queue[sc->iq_no]->txpciq.s.qpg;
626 		pki_ih3->pm          = 0x7;
627 		pki_ih3->sl          = 8;
628 
629 		if (sc->datasize)
630 			ih3->dlengsz = sc->datasize;
631 
632 		irh            = (struct octeon_instr_irh *)&sc->cmd.cmd3.irh;
633 		irh->opcode    = opcode;
634 		irh->subcode   = subcode;
635 
636 		/* opcode/subcode specific parameters (ossp) */
637 		irh->ossp       = irh_ossp;
638 		sc->cmd.cmd3.ossp[0] = ossp0;
639 		sc->cmd.cmd3.ossp[1] = ossp1;
640 
641 		if (sc->rdatasize) {
642 			rdp = (struct octeon_instr_rdp *)&sc->cmd.cmd3.rdp;
643 			rdp->pcie_port = oct->pcie_port;
644 			rdp->rlen      = sc->rdatasize;
645 
646 			irh->rflag =  1;
647 			/*PKI IH3*/
648 			/* pki_ih3 irh+ossp[0]+ossp[1]+rdp+rptr = 48 bytes */
649 			ih3->fsz    = LIO_SOFTCMDRESP_IH3;
650 		} else {
651 			irh->rflag =  0;
652 			/*PKI IH3*/
653 			/* pki_h3 + irh + ossp[0] + ossp[1] = 32 bytes */
654 			ih3->fsz    = LIO_PCICMD_O3;
655 		}
656 
657 	} else {
658 		ih2          = (struct octeon_instr_ih2 *)&sc->cmd.cmd2.ih2;
659 		ih2->tagtype = ATOMIC_TAG;
660 		ih2->tag     = LIO_CONTROL;
661 		ih2->raw     = 1;
662 		ih2->grp     = CFG_GET_CTRL_Q_GRP(oct_cfg);
663 
664 		if (sc->datasize) {
665 			ih2->dlengsz = sc->datasize;
666 			ih2->rs = 1;
667 		}
668 
669 		irh            = (struct octeon_instr_irh *)&sc->cmd.cmd2.irh;
670 		irh->opcode    = opcode;
671 		irh->subcode   = subcode;
672 
673 		/* opcode/subcode specific parameters (ossp) */
674 		irh->ossp       = irh_ossp;
675 		sc->cmd.cmd2.ossp[0] = ossp0;
676 		sc->cmd.cmd2.ossp[1] = ossp1;
677 
678 		if (sc->rdatasize) {
679 			rdp = (struct octeon_instr_rdp *)&sc->cmd.cmd2.rdp;
680 			rdp->pcie_port = oct->pcie_port;
681 			rdp->rlen      = sc->rdatasize;
682 
683 			irh->rflag =  1;
684 			/* irh+ossp[0]+ossp[1]+rdp+rptr = 40 bytes */
685 			ih2->fsz   = LIO_SOFTCMDRESP_IH2;
686 		} else {
687 			irh->rflag =  0;
688 			/* irh + ossp[0] + ossp[1] = 24 bytes */
689 			ih2->fsz   = LIO_PCICMD_O2;
690 		}
691 	}
692 }
693 
694 int octeon_send_soft_command(struct octeon_device *oct,
695 			     struct octeon_soft_command *sc)
696 {
697 	struct octeon_instr_ih2 *ih2;
698 	struct octeon_instr_ih3 *ih3;
699 	struct octeon_instr_irh *irh;
700 	u32 len;
701 
702 	if (OCTEON_CN23XX_PF(oct) || OCTEON_CN23XX_VF(oct)) {
703 		ih3 =  (struct octeon_instr_ih3 *)&sc->cmd.cmd3.ih3;
704 		if (ih3->dlengsz) {
705 			WARN_ON(!sc->dmadptr);
706 			sc->cmd.cmd3.dptr = sc->dmadptr;
707 		}
708 		irh = (struct octeon_instr_irh *)&sc->cmd.cmd3.irh;
709 		if (irh->rflag) {
710 			WARN_ON(!sc->dmarptr);
711 			WARN_ON(!sc->status_word);
712 			*sc->status_word = COMPLETION_WORD_INIT;
713 			sc->cmd.cmd3.rptr = sc->dmarptr;
714 		}
715 		len = (u32)ih3->dlengsz;
716 	} else {
717 		ih2 = (struct octeon_instr_ih2 *)&sc->cmd.cmd2.ih2;
718 		if (ih2->dlengsz) {
719 			WARN_ON(!sc->dmadptr);
720 			sc->cmd.cmd2.dptr = sc->dmadptr;
721 		}
722 		irh = (struct octeon_instr_irh *)&sc->cmd.cmd2.irh;
723 		if (irh->rflag) {
724 			WARN_ON(!sc->dmarptr);
725 			WARN_ON(!sc->status_word);
726 			*sc->status_word = COMPLETION_WORD_INIT;
727 			sc->cmd.cmd2.rptr = sc->dmarptr;
728 		}
729 		len = (u32)ih2->dlengsz;
730 	}
731 
732 	if (sc->wait_time)
733 		sc->timeout = jiffies + sc->wait_time;
734 
735 	return (octeon_send_command(oct, sc->iq_no, 1, &sc->cmd, sc,
736 				    len, REQTYPE_SOFT_COMMAND));
737 }
738 
739 int octeon_setup_sc_buffer_pool(struct octeon_device *oct)
740 {
741 	int i;
742 	u64 dma_addr;
743 	struct octeon_soft_command *sc;
744 
745 	INIT_LIST_HEAD(&oct->sc_buf_pool.head);
746 	spin_lock_init(&oct->sc_buf_pool.lock);
747 	atomic_set(&oct->sc_buf_pool.alloc_buf_count, 0);
748 
749 	for (i = 0; i < MAX_SOFT_COMMAND_BUFFERS; i++) {
750 		sc = (struct octeon_soft_command *)
751 			lio_dma_alloc(oct,
752 				      SOFT_COMMAND_BUFFER_SIZE,
753 					  (dma_addr_t *)&dma_addr);
754 		if (!sc) {
755 			octeon_free_sc_buffer_pool(oct);
756 			return 1;
757 		}
758 
759 		sc->dma_addr = dma_addr;
760 		sc->size = SOFT_COMMAND_BUFFER_SIZE;
761 
762 		list_add_tail(&sc->node, &oct->sc_buf_pool.head);
763 	}
764 
765 	return 0;
766 }
767 
768 int octeon_free_sc_buffer_pool(struct octeon_device *oct)
769 {
770 	struct list_head *tmp, *tmp2;
771 	struct octeon_soft_command *sc;
772 
773 	spin_lock_bh(&oct->sc_buf_pool.lock);
774 
775 	list_for_each_safe(tmp, tmp2, &oct->sc_buf_pool.head) {
776 		list_del(tmp);
777 
778 		sc = (struct octeon_soft_command *)tmp;
779 
780 		lio_dma_free(oct, sc->size, sc, sc->dma_addr);
781 	}
782 
783 	INIT_LIST_HEAD(&oct->sc_buf_pool.head);
784 
785 	spin_unlock_bh(&oct->sc_buf_pool.lock);
786 
787 	return 0;
788 }
789 
790 struct octeon_soft_command *octeon_alloc_soft_command(struct octeon_device *oct,
791 						      u32 datasize,
792 						      u32 rdatasize,
793 						      u32 ctxsize)
794 {
795 	u64 dma_addr;
796 	u32 size;
797 	u32 offset = sizeof(struct octeon_soft_command);
798 	struct octeon_soft_command *sc = NULL;
799 	struct list_head *tmp;
800 
801 	WARN_ON((offset + datasize + rdatasize + ctxsize) >
802 	       SOFT_COMMAND_BUFFER_SIZE);
803 
804 	spin_lock_bh(&oct->sc_buf_pool.lock);
805 
806 	if (list_empty(&oct->sc_buf_pool.head)) {
807 		spin_unlock_bh(&oct->sc_buf_pool.lock);
808 		return NULL;
809 	}
810 
811 	list_for_each(tmp, &oct->sc_buf_pool.head)
812 		break;
813 
814 	list_del(tmp);
815 
816 	atomic_inc(&oct->sc_buf_pool.alloc_buf_count);
817 
818 	spin_unlock_bh(&oct->sc_buf_pool.lock);
819 
820 	sc = (struct octeon_soft_command *)tmp;
821 
822 	dma_addr = sc->dma_addr;
823 	size = sc->size;
824 
825 	memset(sc, 0, sc->size);
826 
827 	sc->dma_addr = dma_addr;
828 	sc->size = size;
829 
830 	if (ctxsize) {
831 		sc->ctxptr = (u8 *)sc + offset;
832 		sc->ctxsize = ctxsize;
833 	}
834 
835 	/* Start data at 128 byte boundary */
836 	offset = (offset + ctxsize + 127) & 0xffffff80;
837 
838 	if (datasize) {
839 		sc->virtdptr = (u8 *)sc + offset;
840 		sc->dmadptr = dma_addr + offset;
841 		sc->datasize = datasize;
842 	}
843 
844 	/* Start rdata at 128 byte boundary */
845 	offset = (offset + datasize + 127) & 0xffffff80;
846 
847 	if (rdatasize) {
848 		WARN_ON(rdatasize < 16);
849 		sc->virtrptr = (u8 *)sc + offset;
850 		sc->dmarptr = dma_addr + offset;
851 		sc->rdatasize = rdatasize;
852 		sc->status_word = (u64 *)((u8 *)(sc->virtrptr) + rdatasize - 8);
853 	}
854 
855 	return sc;
856 }
857 
858 void octeon_free_soft_command(struct octeon_device *oct,
859 			      struct octeon_soft_command *sc)
860 {
861 	spin_lock_bh(&oct->sc_buf_pool.lock);
862 
863 	list_add_tail(&sc->node, &oct->sc_buf_pool.head);
864 
865 	atomic_dec(&oct->sc_buf_pool.alloc_buf_count);
866 
867 	spin_unlock_bh(&oct->sc_buf_pool.lock);
868 }
869