1 /* 2 * libcxgb_ppm.c: Chelsio common library for T3/T4/T5 iSCSI PagePod Manager 3 * 4 * Copyright (c) 2016 Chelsio Communications, Inc. All rights reserved. 5 * 6 * This software is available to you under a choice of one of two 7 * licenses. You may choose to be licensed under the terms of the GNU 8 * General Public License (GPL) Version 2, available from the file 9 * COPYING in the main directory of this source tree, or the 10 * OpenIB.org BSD license below: 11 * 12 * Redistribution and use in source and binary forms, with or 13 * without modification, are permitted provided that the following 14 * conditions are met: 15 * 16 * - Redistributions of source code must retain the above 17 * copyright notice, this list of conditions and the following 18 * disclaimer. 19 * 20 * - Redistributions in binary form must reproduce the above 21 * copyright notice, this list of conditions and the following 22 * disclaimer in the documentation and/or other materials 23 * provided with the distribution. 24 * 25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 32 * SOFTWARE. 33 * 34 * Written by: Karen Xie (kxie@chelsio.com) 35 */ 36 37 #define DRV_NAME "libcxgb" 38 #define DRV_VERSION "1.0.0-ko" 39 #define pr_fmt(fmt) DRV_NAME ": " fmt 40 41 #include <linux/kernel.h> 42 #include <linux/module.h> 43 #include <linux/errno.h> 44 #include <linux/types.h> 45 #include <linux/debugfs.h> 46 #include <linux/export.h> 47 #include <linux/list.h> 48 #include <linux/skbuff.h> 49 #include <linux/pci.h> 50 #include <linux/scatterlist.h> 51 52 #include "libcxgb_ppm.h" 53 54 /* Direct Data Placement - 55 * Directly place the iSCSI Data-In or Data-Out PDU's payload into 56 * pre-posted final destination host-memory buffers based on the 57 * Initiator Task Tag (ITT) in Data-In or Target Task Tag (TTT) 58 * in Data-Out PDUs. The host memory address is programmed into 59 * h/w in the format of pagepod entries. The location of the 60 * pagepod entry is encoded into ddp tag which is used as the base 61 * for ITT/TTT. 62 */ 63 64 /* Direct-Data Placement page size adjustment 65 */ 66 int cxgbi_ppm_find_page_index(struct cxgbi_ppm *ppm, unsigned long pgsz) 67 { 68 struct cxgbi_tag_format *tformat = &ppm->tformat; 69 int i; 70 71 for (i = 0; i < DDP_PGIDX_MAX; i++) { 72 if (pgsz == 1UL << (DDP_PGSZ_BASE_SHIFT + 73 tformat->pgsz_order[i])) { 74 pr_debug("%s: %s ppm, pgsz %lu -> idx %d.\n", 75 __func__, ppm->ndev->name, pgsz, i); 76 return i; 77 } 78 } 79 pr_info("ippm: ddp page size %lu not supported.\n", pgsz); 80 return DDP_PGIDX_MAX; 81 } 82 83 /* DDP setup & teardown 84 */ 85 static int ppm_find_unused_entries(unsigned long *bmap, 86 unsigned int max_ppods, 87 unsigned int start, 88 unsigned int nr, 89 unsigned int align_mask) 90 { 91 unsigned long i; 92 93 i = bitmap_find_next_zero_area(bmap, max_ppods, start, nr, align_mask); 94 95 if (unlikely(i >= max_ppods) && (start > nr)) 96 i = bitmap_find_next_zero_area(bmap, max_ppods, 0, start - 1, 97 align_mask); 98 if (unlikely(i >= max_ppods)) 99 return -ENOSPC; 100 101 bitmap_set(bmap, i, nr); 102 return (int)i; 103 } 104 105 static void ppm_mark_entries(struct cxgbi_ppm *ppm, int i, int count, 106 unsigned long caller_data) 107 { 108 struct cxgbi_ppod_data *pdata = ppm->ppod_data + i; 109 110 pdata->caller_data = caller_data; 111 pdata->npods = count; 112 113 if (pdata->color == ((1 << PPOD_IDX_SHIFT) - 1)) 114 pdata->color = 0; 115 else 116 pdata->color++; 117 } 118 119 static int ppm_get_cpu_entries(struct cxgbi_ppm *ppm, unsigned int count, 120 unsigned long caller_data) 121 { 122 struct cxgbi_ppm_pool *pool; 123 unsigned int cpu; 124 int i; 125 126 if (!ppm->pool) 127 return -EINVAL; 128 129 cpu = get_cpu(); 130 pool = per_cpu_ptr(ppm->pool, cpu); 131 spin_lock_bh(&pool->lock); 132 put_cpu(); 133 134 i = ppm_find_unused_entries(pool->bmap, ppm->pool_index_max, 135 pool->next, count, 0); 136 if (i < 0) { 137 pool->next = 0; 138 spin_unlock_bh(&pool->lock); 139 return -ENOSPC; 140 } 141 142 pool->next = i + count; 143 if (pool->next >= ppm->pool_index_max) 144 pool->next = 0; 145 146 spin_unlock_bh(&pool->lock); 147 148 pr_debug("%s: cpu %u, idx %d + %d (%d), next %u.\n", 149 __func__, cpu, i, count, i + cpu * ppm->pool_index_max, 150 pool->next); 151 152 i += cpu * ppm->pool_index_max; 153 ppm_mark_entries(ppm, i, count, caller_data); 154 155 return i; 156 } 157 158 static int ppm_get_entries(struct cxgbi_ppm *ppm, unsigned int count, 159 unsigned long caller_data) 160 { 161 int i; 162 163 spin_lock_bh(&ppm->map_lock); 164 i = ppm_find_unused_entries(ppm->ppod_bmap, ppm->bmap_index_max, 165 ppm->next, count, 0); 166 if (i < 0) { 167 ppm->next = 0; 168 spin_unlock_bh(&ppm->map_lock); 169 pr_debug("ippm: NO suitable entries %u available.\n", 170 count); 171 return -ENOSPC; 172 } 173 174 ppm->next = i + count; 175 if (ppm->max_index_in_edram && (ppm->next >= ppm->max_index_in_edram)) 176 ppm->next = 0; 177 else if (ppm->next >= ppm->bmap_index_max) 178 ppm->next = 0; 179 180 spin_unlock_bh(&ppm->map_lock); 181 182 pr_debug("%s: idx %d + %d (%d), next %u, caller_data 0x%lx.\n", 183 __func__, i, count, i + ppm->pool_rsvd, ppm->next, 184 caller_data); 185 186 i += ppm->pool_rsvd; 187 ppm_mark_entries(ppm, i, count, caller_data); 188 189 return i; 190 } 191 192 static void ppm_unmark_entries(struct cxgbi_ppm *ppm, int i, int count) 193 { 194 pr_debug("%s: idx %d + %d.\n", __func__, i, count); 195 196 if (i < ppm->pool_rsvd) { 197 unsigned int cpu; 198 struct cxgbi_ppm_pool *pool; 199 200 cpu = i / ppm->pool_index_max; 201 i %= ppm->pool_index_max; 202 203 pool = per_cpu_ptr(ppm->pool, cpu); 204 spin_lock_bh(&pool->lock); 205 bitmap_clear(pool->bmap, i, count); 206 207 if (i < pool->next) 208 pool->next = i; 209 spin_unlock_bh(&pool->lock); 210 211 pr_debug("%s: cpu %u, idx %d, next %u.\n", 212 __func__, cpu, i, pool->next); 213 } else { 214 spin_lock_bh(&ppm->map_lock); 215 216 i -= ppm->pool_rsvd; 217 bitmap_clear(ppm->ppod_bmap, i, count); 218 219 if (i < ppm->next) 220 ppm->next = i; 221 spin_unlock_bh(&ppm->map_lock); 222 223 pr_debug("%s: idx %d, next %u.\n", __func__, i, ppm->next); 224 } 225 } 226 227 void cxgbi_ppm_ppod_release(struct cxgbi_ppm *ppm, u32 idx) 228 { 229 struct cxgbi_ppod_data *pdata; 230 231 if (idx >= ppm->ppmax) { 232 pr_warn("ippm: idx too big %u > %u.\n", idx, ppm->ppmax); 233 return; 234 } 235 236 pdata = ppm->ppod_data + idx; 237 if (!pdata->npods) { 238 pr_warn("ippm: idx %u, npods 0.\n", idx); 239 return; 240 } 241 242 pr_debug("release idx %u, npods %u.\n", idx, pdata->npods); 243 ppm_unmark_entries(ppm, idx, pdata->npods); 244 } 245 EXPORT_SYMBOL(cxgbi_ppm_ppod_release); 246 247 int cxgbi_ppm_ppods_reserve(struct cxgbi_ppm *ppm, unsigned short nr_pages, 248 u32 per_tag_pg_idx, u32 *ppod_idx, 249 u32 *ddp_tag, unsigned long caller_data) 250 { 251 struct cxgbi_ppod_data *pdata; 252 unsigned int npods; 253 int idx = -1; 254 unsigned int hwidx; 255 u32 tag; 256 257 npods = (nr_pages + PPOD_PAGES_MAX - 1) >> PPOD_PAGES_SHIFT; 258 if (!npods) { 259 pr_warn("%s: pages %u -> npods %u, full.\n", 260 __func__, nr_pages, npods); 261 return -EINVAL; 262 } 263 264 /* grab from cpu pool first */ 265 idx = ppm_get_cpu_entries(ppm, npods, caller_data); 266 /* try the general pool */ 267 if (idx < 0) 268 idx = ppm_get_entries(ppm, npods, caller_data); 269 if (idx < 0) { 270 pr_debug("ippm: pages %u, nospc %u, nxt %u, 0x%lx.\n", 271 nr_pages, npods, ppm->next, caller_data); 272 return idx; 273 } 274 275 pdata = ppm->ppod_data + idx; 276 hwidx = ppm->base_idx + idx; 277 278 tag = cxgbi_ppm_make_ddp_tag(hwidx, pdata->color); 279 280 if (per_tag_pg_idx) 281 tag |= (per_tag_pg_idx << 30) & 0xC0000000; 282 283 *ppod_idx = idx; 284 *ddp_tag = tag; 285 286 pr_debug("ippm: sg %u, tag 0x%x(%u,%u), data 0x%lx.\n", 287 nr_pages, tag, idx, npods, caller_data); 288 289 return npods; 290 } 291 EXPORT_SYMBOL(cxgbi_ppm_ppods_reserve); 292 293 void cxgbi_ppm_make_ppod_hdr(struct cxgbi_ppm *ppm, u32 tag, 294 unsigned int tid, unsigned int offset, 295 unsigned int length, 296 struct cxgbi_pagepod_hdr *hdr) 297 { 298 /* The ddp tag in pagepod should be with bit 31:30 set to 0. 299 * The ddp Tag on the wire should be with non-zero 31:30 to the peer 300 */ 301 tag &= 0x3FFFFFFF; 302 303 hdr->vld_tid = htonl(PPOD_VALID_FLAG | PPOD_TID(tid)); 304 305 hdr->rsvd = 0; 306 hdr->pgsz_tag_clr = htonl(tag & ppm->tformat.idx_clr_mask); 307 hdr->max_offset = htonl(length); 308 hdr->page_offset = htonl(offset); 309 310 pr_debug("ippm: tag 0x%x, tid 0x%x, xfer %u, off %u.\n", 311 tag, tid, length, offset); 312 } 313 EXPORT_SYMBOL(cxgbi_ppm_make_ppod_hdr); 314 315 static void ppm_free(struct cxgbi_ppm *ppm) 316 { 317 vfree(ppm); 318 } 319 320 static void ppm_destroy(struct kref *kref) 321 { 322 struct cxgbi_ppm *ppm = container_of(kref, 323 struct cxgbi_ppm, 324 refcnt); 325 pr_info("ippm: kref 0, destroy %s ppm 0x%p.\n", 326 ppm->ndev->name, ppm); 327 328 *ppm->ppm_pp = NULL; 329 330 free_percpu(ppm->pool); 331 ppm_free(ppm); 332 } 333 334 int cxgbi_ppm_release(struct cxgbi_ppm *ppm) 335 { 336 if (ppm) { 337 int rv; 338 339 rv = kref_put(&ppm->refcnt, ppm_destroy); 340 return rv; 341 } 342 return 1; 343 } 344 EXPORT_SYMBOL(cxgbi_ppm_release); 345 346 static struct cxgbi_ppm_pool *ppm_alloc_cpu_pool(unsigned int *total, 347 unsigned int *pcpu_ppmax) 348 { 349 struct cxgbi_ppm_pool *pools; 350 unsigned int ppmax = (*total) / num_possible_cpus(); 351 unsigned int max = (PCPU_MIN_UNIT_SIZE - sizeof(*pools)) << 3; 352 unsigned int bmap; 353 unsigned int alloc_sz; 354 unsigned int count = 0; 355 unsigned int cpu; 356 357 /* make sure per cpu pool fits into PCPU_MIN_UNIT_SIZE */ 358 if (ppmax > max) 359 ppmax = max; 360 361 /* pool size must be multiple of unsigned long */ 362 bmap = ppmax / BITS_PER_TYPE(unsigned long); 363 if (!bmap) 364 return NULL; 365 366 ppmax = (bmap * sizeof(unsigned long)) << 3; 367 368 alloc_sz = sizeof(*pools) + sizeof(unsigned long) * bmap; 369 pools = __alloc_percpu(alloc_sz, __alignof__(struct cxgbi_ppm_pool)); 370 371 if (!pools) 372 return NULL; 373 374 for_each_possible_cpu(cpu) { 375 struct cxgbi_ppm_pool *ppool = per_cpu_ptr(pools, cpu); 376 377 memset(ppool, 0, alloc_sz); 378 spin_lock_init(&ppool->lock); 379 count += ppmax; 380 } 381 382 *total = count; 383 *pcpu_ppmax = ppmax; 384 385 return pools; 386 } 387 388 int cxgbi_ppm_init(void **ppm_pp, struct net_device *ndev, 389 struct pci_dev *pdev, void *lldev, 390 struct cxgbi_tag_format *tformat, unsigned int iscsi_size, 391 unsigned int llimit, unsigned int start, 392 unsigned int reserve_factor, unsigned int iscsi_edram_start, 393 unsigned int iscsi_edram_size) 394 { 395 struct cxgbi_ppm *ppm = (struct cxgbi_ppm *)(*ppm_pp); 396 struct cxgbi_ppm_pool *pool = NULL; 397 unsigned int pool_index_max = 0; 398 unsigned int ppmax_pool = 0; 399 unsigned int ppod_bmap_size; 400 unsigned int alloc_sz; 401 unsigned int ppmax; 402 403 if (!iscsi_edram_start) 404 iscsi_edram_size = 0; 405 406 if (iscsi_edram_size && 407 ((iscsi_edram_start + iscsi_edram_size) != start)) { 408 pr_err("iscsi ppod region not contiguous: EDRAM start 0x%x " 409 "size 0x%x DDR start 0x%x\n", 410 iscsi_edram_start, iscsi_edram_size, start); 411 return -EINVAL; 412 } 413 414 if (iscsi_edram_size) { 415 reserve_factor = 0; 416 start = iscsi_edram_start; 417 } 418 419 ppmax = (iscsi_edram_size + iscsi_size) >> PPOD_SIZE_SHIFT; 420 421 if (ppm) { 422 pr_info("ippm: %s, ppm 0x%p,0x%p already initialized, %u/%u.\n", 423 ndev->name, ppm_pp, ppm, ppm->ppmax, ppmax); 424 kref_get(&ppm->refcnt); 425 return 1; 426 } 427 428 if (reserve_factor) { 429 ppmax_pool = ppmax / reserve_factor; 430 pool = ppm_alloc_cpu_pool(&ppmax_pool, &pool_index_max); 431 if (!pool) { 432 ppmax_pool = 0; 433 reserve_factor = 0; 434 } 435 436 pr_debug("%s: ppmax %u, cpu total %u, per cpu %u.\n", 437 ndev->name, ppmax, ppmax_pool, pool_index_max); 438 } 439 440 ppod_bmap_size = BITS_TO_LONGS(ppmax - ppmax_pool); 441 alloc_sz = sizeof(struct cxgbi_ppm) + 442 ppmax * (sizeof(struct cxgbi_ppod_data)) + 443 ppod_bmap_size * sizeof(unsigned long); 444 445 ppm = vzalloc(alloc_sz); 446 if (!ppm) 447 goto release_ppm_pool; 448 449 ppm->ppod_bmap = (unsigned long *)(&ppm->ppod_data[ppmax]); 450 451 if ((ppod_bmap_size >> 3) > (ppmax - ppmax_pool)) { 452 unsigned int start = ppmax - ppmax_pool; 453 unsigned int end = ppod_bmap_size >> 3; 454 455 bitmap_set(ppm->ppod_bmap, ppmax, end - start); 456 pr_info("%s: %u - %u < %u * 8, mask extra bits %u, %u.\n", 457 __func__, ppmax, ppmax_pool, ppod_bmap_size, start, 458 end); 459 } 460 if (iscsi_edram_size) { 461 unsigned int first_ddr_idx = 462 iscsi_edram_size >> PPOD_SIZE_SHIFT; 463 464 ppm->max_index_in_edram = first_ddr_idx - 1; 465 bitmap_set(ppm->ppod_bmap, first_ddr_idx, 1); 466 pr_debug("reserved %u ppod in bitmap\n", first_ddr_idx); 467 } 468 469 spin_lock_init(&ppm->map_lock); 470 kref_init(&ppm->refcnt); 471 472 memcpy(&ppm->tformat, tformat, sizeof(struct cxgbi_tag_format)); 473 474 ppm->ppm_pp = ppm_pp; 475 ppm->ndev = ndev; 476 ppm->pdev = pdev; 477 ppm->lldev = lldev; 478 ppm->ppmax = ppmax; 479 ppm->next = 0; 480 ppm->llimit = llimit; 481 ppm->base_idx = start > llimit ? 482 (start - llimit + 1) >> PPOD_SIZE_SHIFT : 0; 483 ppm->bmap_index_max = ppmax - ppmax_pool; 484 485 ppm->pool = pool; 486 ppm->pool_rsvd = ppmax_pool; 487 ppm->pool_index_max = pool_index_max; 488 489 /* check one more time */ 490 if (*ppm_pp) { 491 ppm_free(ppm); 492 ppm = (struct cxgbi_ppm *)(*ppm_pp); 493 494 pr_info("ippm: %s, ppm 0x%p,0x%p already initialized, %u/%u.\n", 495 ndev->name, ppm_pp, *ppm_pp, ppm->ppmax, ppmax); 496 497 kref_get(&ppm->refcnt); 498 return 1; 499 } 500 *ppm_pp = ppm; 501 502 ppm->tformat.pgsz_idx_dflt = cxgbi_ppm_find_page_index(ppm, PAGE_SIZE); 503 504 pr_info("ippm %s: ppm 0x%p, 0x%p, base %u/%u, pg %lu,%u, rsvd %u,%u.\n", 505 ndev->name, ppm_pp, ppm, ppm->base_idx, ppm->ppmax, PAGE_SIZE, 506 ppm->tformat.pgsz_idx_dflt, ppm->pool_rsvd, 507 ppm->pool_index_max); 508 509 return 0; 510 511 release_ppm_pool: 512 free_percpu(pool); 513 return -ENOMEM; 514 } 515 EXPORT_SYMBOL(cxgbi_ppm_init); 516 517 unsigned int cxgbi_tagmask_set(unsigned int ppmax) 518 { 519 unsigned int bits = fls(ppmax); 520 521 if (bits > PPOD_IDX_MAX_SIZE) 522 bits = PPOD_IDX_MAX_SIZE; 523 524 pr_info("ippm: ppmax %u/0x%x -> bits %u, tagmask 0x%x.\n", 525 ppmax, ppmax, bits, 1 << (bits + PPOD_IDX_SHIFT)); 526 527 return 1 << (bits + PPOD_IDX_SHIFT); 528 } 529 EXPORT_SYMBOL(cxgbi_tagmask_set); 530 531 MODULE_AUTHOR("Chelsio Communications"); 532 MODULE_DESCRIPTION("Chelsio common library"); 533 MODULE_VERSION(DRV_VERSION); 534 MODULE_LICENSE("Dual BSD/GPL"); 535