1 /**********************************************************************
2  * Author: Cavium, Inc.
3  *
4  * Contact: support@cavium.com
5  *          Please include "LiquidIO" in the subject.
6  *
7  * Copyright (c) 2003-2015 Cavium, Inc.
8  *
9  * This file is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU General Public License, Version 2, as
11  * published by the Free Software Foundation.
12  *
13  * This file is distributed in the hope that it will be useful, but
14  * AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
15  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
16  * NONINFRINGEMENT.  See the GNU General Public License for more
17  * details.
18  *
19  * This file may also be available under a different license from Cavium.
20  * Contact Cavium, Inc. for more information
21  **********************************************************************/
22 #include <linux/pci.h>
23 #include <linux/netdevice.h>
24 #include <linux/vmalloc.h>
25 #include "liquidio_common.h"
26 #include "octeon_droq.h"
27 #include "octeon_iq.h"
28 #include "response_manager.h"
29 #include "octeon_device.h"
30 #include "octeon_main.h"
31 #include "octeon_network.h"
32 #include "cn66xx_device.h"
33 
34 #define INCR_INSTRQUEUE_PKT_COUNT(octeon_dev_ptr, iq_no, field, count)  \
35 	(octeon_dev_ptr->instr_queue[iq_no]->stats.field += count)
36 
37 struct iq_post_status {
38 	int status;
39 	int index;
40 };
41 
42 static void check_db_timeout(struct work_struct *work);
43 static void  __check_db_timeout(struct octeon_device *oct, u64 iq_no);
44 
45 static void (*reqtype_free_fn[MAX_OCTEON_DEVICES][REQTYPE_LAST + 1]) (void *);
46 
47 static inline int IQ_INSTR_MODE_64B(struct octeon_device *oct, int iq_no)
48 {
49 	struct octeon_instr_queue *iq =
50 	    (struct octeon_instr_queue *)oct->instr_queue[iq_no];
51 	return iq->iqcmd_64B;
52 }
53 
54 #define IQ_INSTR_MODE_32B(oct, iq_no)  (!IQ_INSTR_MODE_64B(oct, iq_no))
55 
56 /* Define this to return the request status comaptible to old code */
57 /*#define OCTEON_USE_OLD_REQ_STATUS*/
58 
59 /* Return 0 on success, 1 on failure */
60 int octeon_init_instr_queue(struct octeon_device *oct,
61 			    union oct_txpciq txpciq,
62 			    u32 num_descs)
63 {
64 	struct octeon_instr_queue *iq;
65 	struct octeon_iq_config *conf = NULL;
66 	u32 iq_no = (u32)txpciq.s.q_no;
67 	u32 q_size;
68 	struct cavium_wq *db_wq;
69 	int orig_node = dev_to_node(&oct->pci_dev->dev);
70 	int numa_node = cpu_to_node(iq_no % num_online_cpus());
71 
72 	if (OCTEON_CN6XXX(oct))
73 		conf = &(CFG_GET_IQ_CFG(CHIP_FIELD(oct, cn6xxx, conf)));
74 
75 	if (!conf) {
76 		dev_err(&oct->pci_dev->dev, "Unsupported Chip %x\n",
77 			oct->chip_id);
78 		return 1;
79 	}
80 
81 	if (num_descs & (num_descs - 1)) {
82 		dev_err(&oct->pci_dev->dev,
83 			"Number of descriptors for instr queue %d not in power of 2.\n",
84 			iq_no);
85 		return 1;
86 	}
87 
88 	q_size = (u32)conf->instr_type * num_descs;
89 
90 	iq = oct->instr_queue[iq_no];
91 	iq->oct_dev = oct;
92 
93 	set_dev_node(&oct->pci_dev->dev, numa_node);
94 	iq->base_addr = lio_dma_alloc(oct, q_size,
95 				      (dma_addr_t *)&iq->base_addr_dma);
96 	set_dev_node(&oct->pci_dev->dev, orig_node);
97 	if (!iq->base_addr)
98 		iq->base_addr = lio_dma_alloc(oct, q_size,
99 					      (dma_addr_t *)&iq->base_addr_dma);
100 	if (!iq->base_addr) {
101 		dev_err(&oct->pci_dev->dev, "Cannot allocate memory for instr queue %d\n",
102 			iq_no);
103 		return 1;
104 	}
105 
106 	iq->max_count = num_descs;
107 
108 	/* Initialize a list to holds requests that have been posted to Octeon
109 	 * but has yet to be fetched by octeon
110 	 */
111 	iq->request_list = vmalloc_node((sizeof(*iq->request_list) * num_descs),
112 					       numa_node);
113 	if (!iq->request_list)
114 		iq->request_list = vmalloc(sizeof(*iq->request_list) *
115 						  num_descs);
116 	if (!iq->request_list) {
117 		lio_dma_free(oct, q_size, iq->base_addr, iq->base_addr_dma);
118 		dev_err(&oct->pci_dev->dev, "Alloc failed for IQ[%d] nr free list\n",
119 			iq_no);
120 		return 1;
121 	}
122 
123 	memset(iq->request_list, 0, sizeof(*iq->request_list) * num_descs);
124 
125 	dev_dbg(&oct->pci_dev->dev, "IQ[%d]: base: %p basedma: %llx count: %d\n",
126 		iq_no, iq->base_addr, iq->base_addr_dma, iq->max_count);
127 
128 	iq->txpciq.u64 = txpciq.u64;
129 	iq->fill_threshold = (u32)conf->db_min;
130 	iq->fill_cnt = 0;
131 	iq->host_write_index = 0;
132 	iq->octeon_read_index = 0;
133 	iq->flush_index = 0;
134 	iq->last_db_time = 0;
135 	iq->do_auto_flush = 1;
136 	iq->db_timeout = (u32)conf->db_timeout;
137 	atomic_set(&iq->instr_pending, 0);
138 
139 	/* Initialize the spinlock for this instruction queue */
140 	spin_lock_init(&iq->lock);
141 	spin_lock_init(&iq->post_lock);
142 
143 	spin_lock_init(&iq->iq_flush_running_lock);
144 
145 	oct->io_qmask.iq |= (1ULL << iq_no);
146 
147 	/* Set the 32B/64B mode for each input queue */
148 	oct->io_qmask.iq64B |= ((conf->instr_type == 64) << iq_no);
149 	iq->iqcmd_64B = (conf->instr_type == 64);
150 
151 	oct->fn_list.setup_iq_regs(oct, iq_no);
152 
153 	oct->check_db_wq[iq_no].wq = alloc_workqueue("check_iq_db",
154 						     WQ_MEM_RECLAIM,
155 						     0);
156 	if (!oct->check_db_wq[iq_no].wq) {
157 		lio_dma_free(oct, q_size, iq->base_addr, iq->base_addr_dma);
158 		dev_err(&oct->pci_dev->dev, "check db wq create failed for iq %d\n",
159 			iq_no);
160 		return 1;
161 	}
162 
163 	db_wq = &oct->check_db_wq[iq_no];
164 
165 	INIT_DELAYED_WORK(&db_wq->wk.work, check_db_timeout);
166 	db_wq->wk.ctxptr = oct;
167 	db_wq->wk.ctxul = iq_no;
168 	queue_delayed_work(db_wq->wq, &db_wq->wk.work, msecs_to_jiffies(1));
169 
170 	return 0;
171 }
172 
173 int octeon_delete_instr_queue(struct octeon_device *oct, u32 iq_no)
174 {
175 	u64 desc_size = 0, q_size;
176 	struct octeon_instr_queue *iq = oct->instr_queue[iq_no];
177 
178 	cancel_delayed_work_sync(&oct->check_db_wq[iq_no].wk.work);
179 	destroy_workqueue(oct->check_db_wq[iq_no].wq);
180 
181 	if (OCTEON_CN6XXX(oct))
182 		desc_size =
183 		    CFG_GET_IQ_INSTR_TYPE(CHIP_FIELD(oct, cn6xxx, conf));
184 
185 	vfree(iq->request_list);
186 
187 	if (iq->base_addr) {
188 		q_size = iq->max_count * desc_size;
189 		lio_dma_free(oct, (u32)q_size, iq->base_addr,
190 			     iq->base_addr_dma);
191 		return 0;
192 	}
193 	return 1;
194 }
195 
196 /* Return 0 on success, 1 on failure */
197 int octeon_setup_iq(struct octeon_device *oct,
198 		    int ifidx,
199 		    int q_index,
200 		    union oct_txpciq txpciq,
201 		    u32 num_descs,
202 		    void *app_ctx)
203 {
204 	u32 iq_no = (u32)txpciq.s.q_no;
205 	int numa_node = cpu_to_node(iq_no % num_online_cpus());
206 
207 	if (oct->instr_queue[iq_no]) {
208 		dev_dbg(&oct->pci_dev->dev, "IQ is in use. Cannot create the IQ: %d again\n",
209 			iq_no);
210 		oct->instr_queue[iq_no]->txpciq.u64 = txpciq.u64;
211 		oct->instr_queue[iq_no]->app_ctx = app_ctx;
212 		return 0;
213 	}
214 	oct->instr_queue[iq_no] =
215 	    vmalloc_node(sizeof(struct octeon_instr_queue), numa_node);
216 	if (!oct->instr_queue[iq_no])
217 		oct->instr_queue[iq_no] =
218 		    vmalloc(sizeof(struct octeon_instr_queue));
219 	if (!oct->instr_queue[iq_no])
220 		return 1;
221 
222 	memset(oct->instr_queue[iq_no], 0,
223 	       sizeof(struct octeon_instr_queue));
224 
225 	oct->instr_queue[iq_no]->q_index = q_index;
226 	oct->instr_queue[iq_no]->app_ctx = app_ctx;
227 	oct->instr_queue[iq_no]->ifidx = ifidx;
228 
229 	if (octeon_init_instr_queue(oct, txpciq, num_descs)) {
230 		vfree(oct->instr_queue[iq_no]);
231 		oct->instr_queue[iq_no] = NULL;
232 		return 1;
233 	}
234 
235 	oct->num_iqs++;
236 	oct->fn_list.enable_io_queues(oct);
237 	return 0;
238 }
239 
240 int lio_wait_for_instr_fetch(struct octeon_device *oct)
241 {
242 	int i, retry = 1000, pending, instr_cnt = 0;
243 
244 	do {
245 		instr_cnt = 0;
246 
247 		/*for (i = 0; i < oct->num_iqs; i++) {*/
248 		for (i = 0; i < MAX_OCTEON_INSTR_QUEUES(oct); i++) {
249 			if (!(oct->io_qmask.iq & (1ULL << i)))
250 				continue;
251 			pending =
252 			    atomic_read(&oct->
253 					       instr_queue[i]->instr_pending);
254 			if (pending)
255 				__check_db_timeout(oct, i);
256 			instr_cnt += pending;
257 		}
258 
259 		if (instr_cnt == 0)
260 			break;
261 
262 		schedule_timeout_uninterruptible(1);
263 
264 	} while (retry-- && instr_cnt);
265 
266 	return instr_cnt;
267 }
268 
269 static inline void
270 ring_doorbell(struct octeon_device *oct, struct octeon_instr_queue *iq)
271 {
272 	if (atomic_read(&oct->status) == OCT_DEV_RUNNING) {
273 		writel(iq->fill_cnt, iq->doorbell_reg);
274 		/* make sure doorbell write goes through */
275 		mmiowb();
276 		iq->fill_cnt = 0;
277 		iq->last_db_time = jiffies;
278 		return;
279 	}
280 }
281 
282 static inline void __copy_cmd_into_iq(struct octeon_instr_queue *iq,
283 				      u8 *cmd)
284 {
285 	u8 *iqptr, cmdsize;
286 
287 	cmdsize = ((iq->iqcmd_64B) ? 64 : 32);
288 	iqptr = iq->base_addr + (cmdsize * iq->host_write_index);
289 
290 	memcpy(iqptr, cmd, cmdsize);
291 }
292 
293 static inline struct iq_post_status
294 __post_command2(struct octeon_instr_queue *iq, u8 *cmd)
295 {
296 	struct iq_post_status st;
297 
298 	st.status = IQ_SEND_OK;
299 
300 	/* This ensures that the read index does not wrap around to the same
301 	 * position if queue gets full before Octeon could fetch any instr.
302 	 */
303 	if (atomic_read(&iq->instr_pending) >= (s32)(iq->max_count - 1)) {
304 		st.status = IQ_SEND_FAILED;
305 		st.index = -1;
306 		return st;
307 	}
308 
309 	if (atomic_read(&iq->instr_pending) >= (s32)(iq->max_count - 2))
310 		st.status = IQ_SEND_STOP;
311 
312 	__copy_cmd_into_iq(iq, cmd);
313 
314 	/* "index" is returned, host_write_index is modified. */
315 	st.index = iq->host_write_index;
316 	INCR_INDEX_BY1(iq->host_write_index, iq->max_count);
317 	iq->fill_cnt++;
318 
319 	/* Flush the command into memory. We need to be sure the data is in
320 	 * memory before indicating that the instruction is pending.
321 	 */
322 	wmb();
323 
324 	atomic_inc(&iq->instr_pending);
325 
326 	return st;
327 }
328 
329 int
330 octeon_register_reqtype_free_fn(struct octeon_device *oct, int reqtype,
331 				void (*fn)(void *))
332 {
333 	if (reqtype > REQTYPE_LAST) {
334 		dev_err(&oct->pci_dev->dev, "%s: Invalid reqtype: %d\n",
335 			__func__, reqtype);
336 		return -EINVAL;
337 	}
338 
339 	reqtype_free_fn[oct->octeon_id][reqtype] = fn;
340 
341 	return 0;
342 }
343 
344 static inline void
345 __add_to_request_list(struct octeon_instr_queue *iq,
346 		      int idx, void *buf, int reqtype)
347 {
348 	iq->request_list[idx].buf = buf;
349 	iq->request_list[idx].reqtype = reqtype;
350 }
351 
352 /* Can only run in process context */
353 int
354 lio_process_iq_request_list(struct octeon_device *oct,
355 			    struct octeon_instr_queue *iq, u32 napi_budget)
356 {
357 	int reqtype;
358 	void *buf;
359 	u32 old = iq->flush_index;
360 	u32 inst_count = 0;
361 	unsigned int pkts_compl = 0, bytes_compl = 0;
362 	struct octeon_soft_command *sc;
363 	struct octeon_instr_irh *irh;
364 	unsigned long flags;
365 
366 	while (old != iq->octeon_read_index) {
367 		reqtype = iq->request_list[old].reqtype;
368 		buf     = iq->request_list[old].buf;
369 
370 		if (reqtype == REQTYPE_NONE)
371 			goto skip_this;
372 
373 		octeon_update_tx_completion_counters(buf, reqtype, &pkts_compl,
374 						     &bytes_compl);
375 
376 		switch (reqtype) {
377 		case REQTYPE_NORESP_NET:
378 		case REQTYPE_NORESP_NET_SG:
379 		case REQTYPE_RESP_NET_SG:
380 			reqtype_free_fn[oct->octeon_id][reqtype](buf);
381 			break;
382 		case REQTYPE_RESP_NET:
383 		case REQTYPE_SOFT_COMMAND:
384 			sc = buf;
385 
386 			irh = (struct octeon_instr_irh *)&sc->cmd.cmd2.irh;
387 			if (irh->rflag) {
388 				/* We're expecting a response from Octeon.
389 				 * It's up to lio_process_ordered_list() to
390 				 * process  sc. Add sc to the ordered soft
391 				 * command response list because we expect
392 				 * a response from Octeon.
393 				 */
394 				spin_lock_irqsave
395 					(&oct->response_list
396 					 [OCTEON_ORDERED_SC_LIST].lock,
397 					 flags);
398 				atomic_inc(&oct->response_list
399 					[OCTEON_ORDERED_SC_LIST].
400 					pending_req_count);
401 				list_add_tail(&sc->node, &oct->response_list
402 					[OCTEON_ORDERED_SC_LIST].head);
403 				spin_unlock_irqrestore
404 					(&oct->response_list
405 					 [OCTEON_ORDERED_SC_LIST].lock,
406 					 flags);
407 			} else {
408 				if (sc->callback) {
409 					/* This callback must not sleep */
410 					sc->callback(oct, OCTEON_REQUEST_DONE,
411 						     sc->callback_arg);
412 				}
413 			}
414 			break;
415 		default:
416 			dev_err(&oct->pci_dev->dev,
417 				"%s Unknown reqtype: %d buf: %p at idx %d\n",
418 				__func__, reqtype, buf, old);
419 		}
420 
421 		iq->request_list[old].buf = NULL;
422 		iq->request_list[old].reqtype = 0;
423 
424  skip_this:
425 		inst_count++;
426 		INCR_INDEX_BY1(old, iq->max_count);
427 
428 		if ((napi_budget) && (inst_count >= napi_budget))
429 			break;
430 	}
431 	if (bytes_compl)
432 		octeon_report_tx_completion_to_bql(iq->app_ctx, pkts_compl,
433 						   bytes_compl);
434 	iq->flush_index = old;
435 
436 	return inst_count;
437 }
438 
439 /* Can only be called from process context */
440 int
441 octeon_flush_iq(struct octeon_device *oct, struct octeon_instr_queue *iq,
442 		u32 pending_thresh, u32 napi_budget)
443 {
444 	u32 inst_processed = 0;
445 	u32 tot_inst_processed = 0;
446 	int tx_done = 1;
447 
448 	if (!spin_trylock(&iq->iq_flush_running_lock))
449 		return tx_done;
450 
451 	spin_lock_bh(&iq->lock);
452 
453 	iq->octeon_read_index = oct->fn_list.update_iq_read_idx(iq);
454 
455 	if (atomic_read(&iq->instr_pending) >= (s32)pending_thresh) {
456 		do {
457 			/* Process any outstanding IQ packets. */
458 			if (iq->flush_index == iq->octeon_read_index)
459 				break;
460 
461 			if (napi_budget)
462 				inst_processed = lio_process_iq_request_list
463 					(oct, iq,
464 					 napi_budget - tot_inst_processed);
465 			else
466 				inst_processed =
467 					lio_process_iq_request_list(oct, iq, 0);
468 
469 			if (inst_processed) {
470 				atomic_sub(inst_processed, &iq->instr_pending);
471 				iq->stats.instr_processed += inst_processed;
472 			}
473 
474 			tot_inst_processed += inst_processed;
475 			inst_processed = 0;
476 
477 		} while (tot_inst_processed < napi_budget);
478 
479 		if (napi_budget && (tot_inst_processed >= napi_budget))
480 			tx_done = 0;
481 	}
482 
483 	iq->last_db_time = jiffies;
484 
485 	spin_unlock_bh(&iq->lock);
486 
487 	spin_unlock(&iq->iq_flush_running_lock);
488 
489 	return tx_done;
490 }
491 
492 /* Process instruction queue after timeout.
493  * This routine gets called from a workqueue or when removing the module.
494  */
495 static void __check_db_timeout(struct octeon_device *oct, u64 iq_no)
496 {
497 	struct octeon_instr_queue *iq;
498 	u64 next_time;
499 
500 	if (!oct)
501 		return;
502 	iq = oct->instr_queue[iq_no];
503 	if (!iq)
504 		return;
505 
506 	/* return immediately, if no work pending */
507 	if (!atomic_read(&iq->instr_pending))
508 		return;
509 	/* If jiffies - last_db_time < db_timeout do nothing  */
510 	next_time = iq->last_db_time + iq->db_timeout;
511 	if (!time_after(jiffies, (unsigned long)next_time))
512 		return;
513 	iq->last_db_time = jiffies;
514 
515 	/* Flush the instruction queue */
516 	octeon_flush_iq(oct, iq, 1, 0);
517 }
518 
519 /* Called by the Poll thread at regular intervals to check the instruction
520  * queue for commands to be posted and for commands that were fetched by Octeon.
521  */
522 static void check_db_timeout(struct work_struct *work)
523 {
524 	struct cavium_wk *wk = (struct cavium_wk *)work;
525 	struct octeon_device *oct = (struct octeon_device *)wk->ctxptr;
526 	u64 iq_no = wk->ctxul;
527 	struct cavium_wq *db_wq = &oct->check_db_wq[iq_no];
528 	u32 delay = 10;
529 
530 	__check_db_timeout(oct, iq_no);
531 	queue_delayed_work(db_wq->wq, &db_wq->wk.work, msecs_to_jiffies(delay));
532 }
533 
534 int
535 octeon_send_command(struct octeon_device *oct, u32 iq_no,
536 		    u32 force_db, void *cmd, void *buf,
537 		    u32 datasize, u32 reqtype)
538 {
539 	struct iq_post_status st;
540 	struct octeon_instr_queue *iq = oct->instr_queue[iq_no];
541 
542 	/* Get the lock and prevent other tasks and tx interrupt handler from
543 	 * running.
544 	 */
545 	spin_lock_bh(&iq->post_lock);
546 
547 	st = __post_command2(iq, cmd);
548 
549 	if (st.status != IQ_SEND_FAILED) {
550 		octeon_report_sent_bytes_to_bql(buf, reqtype);
551 		__add_to_request_list(iq, st.index, buf, reqtype);
552 		INCR_INSTRQUEUE_PKT_COUNT(oct, iq_no, bytes_sent, datasize);
553 		INCR_INSTRQUEUE_PKT_COUNT(oct, iq_no, instr_posted, 1);
554 
555 		if (force_db)
556 			ring_doorbell(oct, iq);
557 	} else {
558 		INCR_INSTRQUEUE_PKT_COUNT(oct, iq_no, instr_dropped, 1);
559 	}
560 
561 	spin_unlock_bh(&iq->post_lock);
562 
563 	/* This is only done here to expedite packets being flushed
564 	 * for cases where there are no IQ completion interrupts.
565 	 */
566 	/*if (iq->do_auto_flush)*/
567 	/*	octeon_flush_iq(oct, iq, 2, 0);*/
568 
569 	return st.status;
570 }
571 
572 void
573 octeon_prepare_soft_command(struct octeon_device *oct,
574 			    struct octeon_soft_command *sc,
575 			    u8 opcode,
576 			    u8 subcode,
577 			    u32 irh_ossp,
578 			    u64 ossp0,
579 			    u64 ossp1)
580 {
581 	struct octeon_config *oct_cfg;
582 	struct octeon_instr_ih2 *ih2;
583 	struct octeon_instr_irh *irh;
584 	struct octeon_instr_rdp *rdp;
585 
586 	WARN_ON(opcode > 15);
587 	WARN_ON(subcode > 127);
588 
589 	oct_cfg = octeon_get_conf(oct);
590 
591 	ih2          = (struct octeon_instr_ih2 *)&sc->cmd.cmd2.ih2;
592 	ih2->tagtype = ATOMIC_TAG;
593 	ih2->tag     = LIO_CONTROL;
594 	ih2->raw     = 1;
595 	ih2->grp     = CFG_GET_CTRL_Q_GRP(oct_cfg);
596 
597 	if (sc->datasize) {
598 		ih2->dlengsz = sc->datasize;
599 		ih2->rs = 1;
600 	}
601 
602 	irh            = (struct octeon_instr_irh *)&sc->cmd.cmd2.irh;
603 	irh->opcode    = opcode;
604 	irh->subcode   = subcode;
605 
606 	/* opcode/subcode specific parameters (ossp) */
607 	irh->ossp       = irh_ossp;
608 	sc->cmd.cmd2.ossp[0] = ossp0;
609 	sc->cmd.cmd2.ossp[1] = ossp1;
610 
611 	if (sc->rdatasize) {
612 		rdp = (struct octeon_instr_rdp *)&sc->cmd.cmd2.rdp;
613 		rdp->pcie_port = oct->pcie_port;
614 		rdp->rlen      = sc->rdatasize;
615 
616 		irh->rflag =  1;
617 		ih2->fsz   = 40; /* irh+ossp[0]+ossp[1]+rdp+rptr = 40 bytes */
618 	} else {
619 		irh->rflag =  0;
620 		ih2->fsz   = 24; /* irh + ossp[0] + ossp[1] = 24 bytes */
621 	}
622 }
623 
624 int octeon_send_soft_command(struct octeon_device *oct,
625 			     struct octeon_soft_command *sc)
626 {
627 	struct octeon_instr_ih2 *ih2;
628 	struct octeon_instr_irh *irh;
629 	u32 len;
630 
631 	ih2 = (struct octeon_instr_ih2 *)&sc->cmd.cmd2.ih2;
632 	if (ih2->dlengsz) {
633 		WARN_ON(!sc->dmadptr);
634 		sc->cmd.cmd2.dptr = sc->dmadptr;
635 	}
636 	irh = (struct octeon_instr_irh *)&sc->cmd.cmd2.irh;
637 	if (irh->rflag) {
638 		WARN_ON(!sc->dmarptr);
639 		WARN_ON(!sc->status_word);
640 		*sc->status_word = COMPLETION_WORD_INIT;
641 
642 		sc->cmd.cmd2.rptr = sc->dmarptr;
643 	}
644 	len = (u32)ih2->dlengsz;
645 
646 	if (sc->wait_time)
647 		sc->timeout = jiffies + sc->wait_time;
648 
649 	return (octeon_send_command(oct, sc->iq_no, 1, &sc->cmd, sc,
650 				    len, REQTYPE_SOFT_COMMAND));
651 }
652 
653 int octeon_setup_sc_buffer_pool(struct octeon_device *oct)
654 {
655 	int i;
656 	u64 dma_addr;
657 	struct octeon_soft_command *sc;
658 
659 	INIT_LIST_HEAD(&oct->sc_buf_pool.head);
660 	spin_lock_init(&oct->sc_buf_pool.lock);
661 	atomic_set(&oct->sc_buf_pool.alloc_buf_count, 0);
662 
663 	for (i = 0; i < MAX_SOFT_COMMAND_BUFFERS; i++) {
664 		sc = (struct octeon_soft_command *)
665 			lio_dma_alloc(oct,
666 				      SOFT_COMMAND_BUFFER_SIZE,
667 					  (dma_addr_t *)&dma_addr);
668 		if (!sc)
669 			return 1;
670 
671 		sc->dma_addr = dma_addr;
672 		sc->size = SOFT_COMMAND_BUFFER_SIZE;
673 
674 		list_add_tail(&sc->node, &oct->sc_buf_pool.head);
675 	}
676 
677 	return 0;
678 }
679 
680 int octeon_free_sc_buffer_pool(struct octeon_device *oct)
681 {
682 	struct list_head *tmp, *tmp2;
683 	struct octeon_soft_command *sc;
684 
685 	spin_lock_bh(&oct->sc_buf_pool.lock);
686 
687 	list_for_each_safe(tmp, tmp2, &oct->sc_buf_pool.head) {
688 		list_del(tmp);
689 
690 		sc = (struct octeon_soft_command *)tmp;
691 
692 		lio_dma_free(oct, sc->size, sc, sc->dma_addr);
693 	}
694 
695 	INIT_LIST_HEAD(&oct->sc_buf_pool.head);
696 
697 	spin_unlock_bh(&oct->sc_buf_pool.lock);
698 
699 	return 0;
700 }
701 
702 struct octeon_soft_command *octeon_alloc_soft_command(struct octeon_device *oct,
703 						      u32 datasize,
704 						      u32 rdatasize,
705 						      u32 ctxsize)
706 {
707 	u64 dma_addr;
708 	u32 size;
709 	u32 offset = sizeof(struct octeon_soft_command);
710 	struct octeon_soft_command *sc = NULL;
711 	struct list_head *tmp;
712 
713 	WARN_ON((offset + datasize + rdatasize + ctxsize) >
714 	       SOFT_COMMAND_BUFFER_SIZE);
715 
716 	spin_lock_bh(&oct->sc_buf_pool.lock);
717 
718 	if (list_empty(&oct->sc_buf_pool.head)) {
719 		spin_unlock_bh(&oct->sc_buf_pool.lock);
720 		return NULL;
721 	}
722 
723 	list_for_each(tmp, &oct->sc_buf_pool.head)
724 		break;
725 
726 	list_del(tmp);
727 
728 	atomic_inc(&oct->sc_buf_pool.alloc_buf_count);
729 
730 	spin_unlock_bh(&oct->sc_buf_pool.lock);
731 
732 	sc = (struct octeon_soft_command *)tmp;
733 
734 	dma_addr = sc->dma_addr;
735 	size = sc->size;
736 
737 	memset(sc, 0, sc->size);
738 
739 	sc->dma_addr = dma_addr;
740 	sc->size = size;
741 
742 	if (ctxsize) {
743 		sc->ctxptr = (u8 *)sc + offset;
744 		sc->ctxsize = ctxsize;
745 	}
746 
747 	/* Start data at 128 byte boundary */
748 	offset = (offset + ctxsize + 127) & 0xffffff80;
749 
750 	if (datasize) {
751 		sc->virtdptr = (u8 *)sc + offset;
752 		sc->dmadptr = dma_addr + offset;
753 		sc->datasize = datasize;
754 	}
755 
756 	/* Start rdata at 128 byte boundary */
757 	offset = (offset + datasize + 127) & 0xffffff80;
758 
759 	if (rdatasize) {
760 		WARN_ON(rdatasize < 16);
761 		sc->virtrptr = (u8 *)sc + offset;
762 		sc->dmarptr = dma_addr + offset;
763 		sc->rdatasize = rdatasize;
764 		sc->status_word = (u64 *)((u8 *)(sc->virtrptr) + rdatasize - 8);
765 	}
766 
767 	return sc;
768 }
769 
770 void octeon_free_soft_command(struct octeon_device *oct,
771 			      struct octeon_soft_command *sc)
772 {
773 	spin_lock_bh(&oct->sc_buf_pool.lock);
774 
775 	list_add_tail(&sc->node, &oct->sc_buf_pool.head);
776 
777 	atomic_dec(&oct->sc_buf_pool.alloc_buf_count);
778 
779 	spin_unlock_bh(&oct->sc_buf_pool.lock);
780 }
781