1 /*
2  * libcxgb_ppm.c: Chelsio common library for T3/T4/T5 iSCSI PagePod Manager
3  *
4  * Copyright (c) 2016 Chelsio Communications, Inc. All rights reserved.
5  *
6  * This software is available to you under a choice of one of two
7  * licenses.  You may choose to be licensed under the terms of the GNU
8  * General Public License (GPL) Version 2, available from the file
9  * COPYING in the main directory of this source tree, or the
10  * OpenIB.org BSD license below:
11  *
12  *     Redistribution and use in source and binary forms, with or
13  *     without modification, are permitted provided that the following
14  *     conditions are met:
15  *
16  *      - Redistributions of source code must retain the above
17  *        copyright notice, this list of conditions and the following
18  *        disclaimer.
19  *
20  *      - Redistributions in binary form must reproduce the above
21  *        copyright notice, this list of conditions and the following
22  *        disclaimer in the documentation and/or other materials
23  *        provided with the distribution.
24  *
25  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32  * SOFTWARE.
33  *
34  * Written by: Karen Xie (kxie@chelsio.com)
35  */
36 
37 #define DRV_NAME "libcxgb"
38 #define DRV_VERSION "1.0.0-ko"
39 #define pr_fmt(fmt) DRV_NAME ": " fmt
40 
41 #include <linux/kernel.h>
42 #include <linux/module.h>
43 #include <linux/errno.h>
44 #include <linux/types.h>
45 #include <linux/debugfs.h>
46 #include <linux/export.h>
47 #include <linux/list.h>
48 #include <linux/skbuff.h>
49 #include <linux/pci.h>
50 #include <linux/scatterlist.h>
51 
52 #include "libcxgb_ppm.h"
53 
54 /* Direct Data Placement -
55  * Directly place the iSCSI Data-In or Data-Out PDU's payload into
56  * pre-posted final destination host-memory buffers based on the
57  * Initiator Task Tag (ITT) in Data-In or Target Task Tag (TTT)
58  * in Data-Out PDUs. The host memory address is programmed into
59  * h/w in the format of pagepod entries. The location of the
60  * pagepod entry is encoded into ddp tag which is used as the base
61  * for ITT/TTT.
62  */
63 
64 /* Direct-Data Placement page size adjustment
65  */
66 int cxgbi_ppm_find_page_index(struct cxgbi_ppm *ppm, unsigned long pgsz)
67 {
68 	struct cxgbi_tag_format *tformat = &ppm->tformat;
69 	int i;
70 
71 	for (i = 0; i < DDP_PGIDX_MAX; i++) {
72 		if (pgsz == 1UL << (DDP_PGSZ_BASE_SHIFT +
73 					 tformat->pgsz_order[i])) {
74 			pr_debug("%s: %s ppm, pgsz %lu -> idx %d.\n",
75 				 __func__, ppm->ndev->name, pgsz, i);
76 			return i;
77 		}
78 	}
79 	pr_info("ippm: ddp page size %lu not supported.\n", pgsz);
80 	return DDP_PGIDX_MAX;
81 }
82 
83 /* DDP setup & teardown
84  */
85 static int ppm_find_unused_entries(unsigned long *bmap,
86 				   unsigned int max_ppods,
87 				   unsigned int start,
88 				   unsigned int nr,
89 				   unsigned int align_mask)
90 {
91 	unsigned long i;
92 
93 	i = bitmap_find_next_zero_area(bmap, max_ppods, start, nr, align_mask);
94 
95 	if (unlikely(i >= max_ppods) && (start > nr))
96 		i = bitmap_find_next_zero_area(bmap, max_ppods, 0, start - 1,
97 					       align_mask);
98 	if (unlikely(i >= max_ppods))
99 		return -ENOSPC;
100 
101 	bitmap_set(bmap, i, nr);
102 	return (int)i;
103 }
104 
105 static void ppm_mark_entries(struct cxgbi_ppm *ppm, int i, int count,
106 			     unsigned long caller_data)
107 {
108 	struct cxgbi_ppod_data *pdata = ppm->ppod_data + i;
109 
110 	pdata->caller_data = caller_data;
111 	pdata->npods = count;
112 
113 	if (pdata->color == ((1 << PPOD_IDX_SHIFT) - 1))
114 		pdata->color = 0;
115 	else
116 		pdata->color++;
117 }
118 
119 static int ppm_get_cpu_entries(struct cxgbi_ppm *ppm, unsigned int count,
120 			       unsigned long caller_data)
121 {
122 	struct cxgbi_ppm_pool *pool;
123 	unsigned int cpu;
124 	int i;
125 
126 	if (!ppm->pool)
127 		return -EINVAL;
128 
129 	cpu = get_cpu();
130 	pool = per_cpu_ptr(ppm->pool, cpu);
131 	spin_lock_bh(&pool->lock);
132 	put_cpu();
133 
134 	i = ppm_find_unused_entries(pool->bmap, ppm->pool_index_max,
135 				    pool->next, count, 0);
136 	if (i < 0) {
137 		pool->next = 0;
138 		spin_unlock_bh(&pool->lock);
139 		return -ENOSPC;
140 	}
141 
142 	pool->next = i + count;
143 	if (pool->next >= ppm->pool_index_max)
144 		pool->next = 0;
145 
146 	spin_unlock_bh(&pool->lock);
147 
148 	pr_debug("%s: cpu %u, idx %d + %d (%d), next %u.\n",
149 		 __func__, cpu, i, count, i + cpu * ppm->pool_index_max,
150 		pool->next);
151 
152 	i += cpu * ppm->pool_index_max;
153 	ppm_mark_entries(ppm, i, count, caller_data);
154 
155 	return i;
156 }
157 
158 static int ppm_get_entries(struct cxgbi_ppm *ppm, unsigned int count,
159 			   unsigned long caller_data)
160 {
161 	int i;
162 
163 	spin_lock_bh(&ppm->map_lock);
164 	i = ppm_find_unused_entries(ppm->ppod_bmap, ppm->bmap_index_max,
165 				    ppm->next, count, 0);
166 	if (i < 0) {
167 		ppm->next = 0;
168 		spin_unlock_bh(&ppm->map_lock);
169 		pr_debug("ippm: NO suitable entries %u available.\n",
170 			 count);
171 		return -ENOSPC;
172 	}
173 
174 	ppm->next = i + count;
175 	if (ppm->max_index_in_edram && (ppm->next >= ppm->max_index_in_edram))
176 		ppm->next = 0;
177 	else if (ppm->next >= ppm->bmap_index_max)
178 		ppm->next = 0;
179 
180 	spin_unlock_bh(&ppm->map_lock);
181 
182 	pr_debug("%s: idx %d + %d (%d), next %u, caller_data 0x%lx.\n",
183 		 __func__, i, count, i + ppm->pool_rsvd, ppm->next,
184 		 caller_data);
185 
186 	i += ppm->pool_rsvd;
187 	ppm_mark_entries(ppm, i, count, caller_data);
188 
189 	return i;
190 }
191 
192 static void ppm_unmark_entries(struct cxgbi_ppm *ppm, int i, int count)
193 {
194 	pr_debug("%s: idx %d + %d.\n", __func__, i, count);
195 
196 	if (i < ppm->pool_rsvd) {
197 		unsigned int cpu;
198 		struct cxgbi_ppm_pool *pool;
199 
200 		cpu = i / ppm->pool_index_max;
201 		i %= ppm->pool_index_max;
202 
203 		pool = per_cpu_ptr(ppm->pool, cpu);
204 		spin_lock_bh(&pool->lock);
205 		bitmap_clear(pool->bmap, i, count);
206 
207 		if (i < pool->next)
208 			pool->next = i;
209 		spin_unlock_bh(&pool->lock);
210 
211 		pr_debug("%s: cpu %u, idx %d, next %u.\n",
212 			 __func__, cpu, i, pool->next);
213 	} else {
214 		spin_lock_bh(&ppm->map_lock);
215 
216 		i -= ppm->pool_rsvd;
217 		bitmap_clear(ppm->ppod_bmap, i, count);
218 
219 		if (i < ppm->next)
220 			ppm->next = i;
221 		spin_unlock_bh(&ppm->map_lock);
222 
223 		pr_debug("%s: idx %d, next %u.\n", __func__, i, ppm->next);
224 	}
225 }
226 
227 void cxgbi_ppm_ppod_release(struct cxgbi_ppm *ppm, u32 idx)
228 {
229 	struct cxgbi_ppod_data *pdata;
230 
231 	if (idx >= ppm->ppmax) {
232 		pr_warn("ippm: idx too big %u > %u.\n", idx, ppm->ppmax);
233 		return;
234 	}
235 
236 	pdata = ppm->ppod_data + idx;
237 	if (!pdata->npods) {
238 		pr_warn("ippm: idx %u, npods 0.\n", idx);
239 		return;
240 	}
241 
242 	pr_debug("release idx %u, npods %u.\n", idx, pdata->npods);
243 	ppm_unmark_entries(ppm, idx, pdata->npods);
244 }
245 EXPORT_SYMBOL(cxgbi_ppm_ppod_release);
246 
247 int cxgbi_ppm_ppods_reserve(struct cxgbi_ppm *ppm, unsigned short nr_pages,
248 			    u32 per_tag_pg_idx, u32 *ppod_idx,
249 			    u32 *ddp_tag, unsigned long caller_data)
250 {
251 	struct cxgbi_ppod_data *pdata;
252 	unsigned int npods;
253 	int idx = -1;
254 	unsigned int hwidx;
255 	u32 tag;
256 
257 	npods = (nr_pages + PPOD_PAGES_MAX - 1) >> PPOD_PAGES_SHIFT;
258 	if (!npods) {
259 		pr_warn("%s: pages %u -> npods %u, full.\n",
260 			__func__, nr_pages, npods);
261 		return -EINVAL;
262 	}
263 
264 	/* grab from cpu pool first */
265 	idx = ppm_get_cpu_entries(ppm, npods, caller_data);
266 	/* try the general pool */
267 	if (idx < 0)
268 		idx = ppm_get_entries(ppm, npods, caller_data);
269 	if (idx < 0) {
270 		pr_debug("ippm: pages %u, nospc %u, nxt %u, 0x%lx.\n",
271 			 nr_pages, npods, ppm->next, caller_data);
272 		return idx;
273 	}
274 
275 	pdata = ppm->ppod_data + idx;
276 	hwidx = ppm->base_idx + idx;
277 
278 	tag = cxgbi_ppm_make_ddp_tag(hwidx, pdata->color);
279 
280 	if (per_tag_pg_idx)
281 		tag |= (per_tag_pg_idx << 30) & 0xC0000000;
282 
283 	*ppod_idx = idx;
284 	*ddp_tag = tag;
285 
286 	pr_debug("ippm: sg %u, tag 0x%x(%u,%u), data 0x%lx.\n",
287 		 nr_pages, tag, idx, npods, caller_data);
288 
289 	return npods;
290 }
291 EXPORT_SYMBOL(cxgbi_ppm_ppods_reserve);
292 
293 void cxgbi_ppm_make_ppod_hdr(struct cxgbi_ppm *ppm, u32 tag,
294 			     unsigned int tid, unsigned int offset,
295 			     unsigned int length,
296 			     struct cxgbi_pagepod_hdr *hdr)
297 {
298 	/* The ddp tag in pagepod should be with bit 31:30 set to 0.
299 	 * The ddp Tag on the wire should be with non-zero 31:30 to the peer
300 	 */
301 	tag &= 0x3FFFFFFF;
302 
303 	hdr->vld_tid = htonl(PPOD_VALID_FLAG | PPOD_TID(tid));
304 
305 	hdr->rsvd = 0;
306 	hdr->pgsz_tag_clr = htonl(tag & ppm->tformat.idx_clr_mask);
307 	hdr->max_offset = htonl(length);
308 	hdr->page_offset = htonl(offset);
309 
310 	pr_debug("ippm: tag 0x%x, tid 0x%x, xfer %u, off %u.\n",
311 		 tag, tid, length, offset);
312 }
313 EXPORT_SYMBOL(cxgbi_ppm_make_ppod_hdr);
314 
315 static void ppm_free(struct cxgbi_ppm *ppm)
316 {
317 	vfree(ppm);
318 }
319 
320 static void ppm_destroy(struct kref *kref)
321 {
322 	struct cxgbi_ppm *ppm = container_of(kref,
323 					     struct cxgbi_ppm,
324 					     refcnt);
325 	pr_info("ippm: kref 0, destroy %s ppm 0x%p.\n",
326 		ppm->ndev->name, ppm);
327 
328 	*ppm->ppm_pp = NULL;
329 
330 	free_percpu(ppm->pool);
331 	ppm_free(ppm);
332 }
333 
334 int cxgbi_ppm_release(struct cxgbi_ppm *ppm)
335 {
336 	if (ppm) {
337 		int rv;
338 
339 		rv = kref_put(&ppm->refcnt, ppm_destroy);
340 		return rv;
341 	}
342 	return 1;
343 }
344 EXPORT_SYMBOL(cxgbi_ppm_release);
345 
346 static struct cxgbi_ppm_pool *ppm_alloc_cpu_pool(unsigned int *total,
347 						 unsigned int *pcpu_ppmax)
348 {
349 	struct cxgbi_ppm_pool *pools;
350 	unsigned int ppmax = (*total) / num_possible_cpus();
351 	unsigned int max = (PCPU_MIN_UNIT_SIZE - sizeof(*pools)) << 3;
352 	unsigned int bmap;
353 	unsigned int alloc_sz;
354 	unsigned int count = 0;
355 	unsigned int cpu;
356 
357 	/* make sure per cpu pool fits into PCPU_MIN_UNIT_SIZE */
358 	if (ppmax > max)
359 		ppmax = max;
360 
361 	/* pool size must be multiple of unsigned long */
362 	bmap = ppmax / BITS_PER_TYPE(unsigned long);
363 	if (!bmap)
364 		return NULL;
365 
366 	ppmax = (bmap * sizeof(unsigned long)) << 3;
367 
368 	alloc_sz = sizeof(*pools) + sizeof(unsigned long) * bmap;
369 	pools = __alloc_percpu(alloc_sz, __alignof__(struct cxgbi_ppm_pool));
370 
371 	if (!pools)
372 		return NULL;
373 
374 	for_each_possible_cpu(cpu) {
375 		struct cxgbi_ppm_pool *ppool = per_cpu_ptr(pools, cpu);
376 
377 		memset(ppool, 0, alloc_sz);
378 		spin_lock_init(&ppool->lock);
379 		count += ppmax;
380 	}
381 
382 	*total = count;
383 	*pcpu_ppmax = ppmax;
384 
385 	return pools;
386 }
387 
388 int cxgbi_ppm_init(void **ppm_pp, struct net_device *ndev,
389 		   struct pci_dev *pdev, void *lldev,
390 		   struct cxgbi_tag_format *tformat, unsigned int iscsi_size,
391 		   unsigned int llimit, unsigned int start,
392 		   unsigned int reserve_factor, unsigned int iscsi_edram_start,
393 		   unsigned int iscsi_edram_size)
394 {
395 	struct cxgbi_ppm *ppm = (struct cxgbi_ppm *)(*ppm_pp);
396 	struct cxgbi_ppm_pool *pool = NULL;
397 	unsigned int pool_index_max = 0;
398 	unsigned int ppmax_pool = 0;
399 	unsigned int ppod_bmap_size;
400 	unsigned int alloc_sz;
401 	unsigned int ppmax;
402 
403 	if (!iscsi_edram_start)
404 		iscsi_edram_size = 0;
405 
406 	if (iscsi_edram_size &&
407 	    ((iscsi_edram_start + iscsi_edram_size) != start)) {
408 		pr_err("iscsi ppod region not contiguous: EDRAM start 0x%x "
409 			"size 0x%x DDR start 0x%x\n",
410 			iscsi_edram_start, iscsi_edram_size, start);
411 		return -EINVAL;
412 	}
413 
414 	if (iscsi_edram_size) {
415 		reserve_factor = 0;
416 		start = iscsi_edram_start;
417 	}
418 
419 	ppmax = (iscsi_edram_size + iscsi_size) >> PPOD_SIZE_SHIFT;
420 
421 	if (ppm) {
422 		pr_info("ippm: %s, ppm 0x%p,0x%p already initialized, %u/%u.\n",
423 			ndev->name, ppm_pp, ppm, ppm->ppmax, ppmax);
424 		kref_get(&ppm->refcnt);
425 		return 1;
426 	}
427 
428 	if (reserve_factor) {
429 		ppmax_pool = ppmax / reserve_factor;
430 		pool = ppm_alloc_cpu_pool(&ppmax_pool, &pool_index_max);
431 		if (!pool) {
432 			ppmax_pool = 0;
433 			reserve_factor = 0;
434 		}
435 
436 		pr_debug("%s: ppmax %u, cpu total %u, per cpu %u.\n",
437 			 ndev->name, ppmax, ppmax_pool, pool_index_max);
438 	}
439 
440 	ppod_bmap_size = BITS_TO_LONGS(ppmax - ppmax_pool);
441 	alloc_sz = sizeof(struct cxgbi_ppm) +
442 			ppmax * (sizeof(struct cxgbi_ppod_data)) +
443 			ppod_bmap_size * sizeof(unsigned long);
444 
445 	ppm = vzalloc(alloc_sz);
446 	if (!ppm)
447 		goto release_ppm_pool;
448 
449 	ppm->ppod_bmap = (unsigned long *)(&ppm->ppod_data[ppmax]);
450 
451 	if ((ppod_bmap_size >> 3) > (ppmax - ppmax_pool)) {
452 		unsigned int start = ppmax - ppmax_pool;
453 		unsigned int end = ppod_bmap_size >> 3;
454 
455 		bitmap_set(ppm->ppod_bmap, ppmax, end - start);
456 		pr_info("%s: %u - %u < %u * 8, mask extra bits %u, %u.\n",
457 			__func__, ppmax, ppmax_pool, ppod_bmap_size, start,
458 			end);
459 	}
460 	if (iscsi_edram_size) {
461 		unsigned int first_ddr_idx =
462 				iscsi_edram_size >> PPOD_SIZE_SHIFT;
463 
464 		ppm->max_index_in_edram = first_ddr_idx - 1;
465 		bitmap_set(ppm->ppod_bmap, first_ddr_idx, 1);
466 		pr_debug("reserved %u ppod in bitmap\n", first_ddr_idx);
467 	}
468 
469 	spin_lock_init(&ppm->map_lock);
470 	kref_init(&ppm->refcnt);
471 
472 	memcpy(&ppm->tformat, tformat, sizeof(struct cxgbi_tag_format));
473 
474 	ppm->ppm_pp = ppm_pp;
475 	ppm->ndev = ndev;
476 	ppm->pdev = pdev;
477 	ppm->lldev = lldev;
478 	ppm->ppmax = ppmax;
479 	ppm->next = 0;
480 	ppm->llimit = llimit;
481 	ppm->base_idx = start > llimit ?
482 			(start - llimit + 1) >> PPOD_SIZE_SHIFT : 0;
483 	ppm->bmap_index_max = ppmax - ppmax_pool;
484 
485 	ppm->pool = pool;
486 	ppm->pool_rsvd = ppmax_pool;
487 	ppm->pool_index_max = pool_index_max;
488 
489 	/* check one more time */
490 	if (*ppm_pp) {
491 		ppm_free(ppm);
492 		ppm = (struct cxgbi_ppm *)(*ppm_pp);
493 
494 		pr_info("ippm: %s, ppm 0x%p,0x%p already initialized, %u/%u.\n",
495 			ndev->name, ppm_pp, *ppm_pp, ppm->ppmax, ppmax);
496 
497 		kref_get(&ppm->refcnt);
498 		return 1;
499 	}
500 	*ppm_pp = ppm;
501 
502 	ppm->tformat.pgsz_idx_dflt = cxgbi_ppm_find_page_index(ppm, PAGE_SIZE);
503 
504 	pr_info("ippm %s: ppm 0x%p, 0x%p, base %u/%u, pg %lu,%u, rsvd %u,%u.\n",
505 		ndev->name, ppm_pp, ppm, ppm->base_idx, ppm->ppmax, PAGE_SIZE,
506 		ppm->tformat.pgsz_idx_dflt, ppm->pool_rsvd,
507 		ppm->pool_index_max);
508 
509 	return 0;
510 
511 release_ppm_pool:
512 	free_percpu(pool);
513 	return -ENOMEM;
514 }
515 EXPORT_SYMBOL(cxgbi_ppm_init);
516 
517 unsigned int cxgbi_tagmask_set(unsigned int ppmax)
518 {
519 	unsigned int bits = fls(ppmax);
520 
521 	if (bits > PPOD_IDX_MAX_SIZE)
522 		bits = PPOD_IDX_MAX_SIZE;
523 
524 	pr_info("ippm: ppmax %u/0x%x -> bits %u, tagmask 0x%x.\n",
525 		ppmax, ppmax, bits, 1 << (bits + PPOD_IDX_SHIFT));
526 
527 	return 1 << (bits + PPOD_IDX_SHIFT);
528 }
529 EXPORT_SYMBOL(cxgbi_tagmask_set);
530 
531 MODULE_AUTHOR("Chelsio Communications");
532 MODULE_DESCRIPTION("Chelsio common library");
533 MODULE_VERSION(DRV_VERSION);
534 MODULE_LICENSE("Dual BSD/GPL");
535