1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2007 Jens Axboe <jens.axboe@oracle.com> 4 * 5 * Scatterlist handling helpers. 6 */ 7 #include <linux/export.h> 8 #include <linux/slab.h> 9 #include <linux/scatterlist.h> 10 #include <linux/highmem.h> 11 #include <linux/kmemleak.h> 12 #include <linux/bvec.h> 13 #include <linux/uio.h> 14 15 /** 16 * sg_next - return the next scatterlist entry in a list 17 * @sg: The current sg entry 18 * 19 * Description: 20 * Usually the next entry will be @sg@ + 1, but if this sg element is part 21 * of a chained scatterlist, it could jump to the start of a new 22 * scatterlist array. 23 * 24 **/ 25 struct scatterlist *sg_next(struct scatterlist *sg) 26 { 27 if (sg_is_last(sg)) 28 return NULL; 29 30 sg++; 31 if (unlikely(sg_is_chain(sg))) 32 sg = sg_chain_ptr(sg); 33 34 return sg; 35 } 36 EXPORT_SYMBOL(sg_next); 37 38 /** 39 * sg_nents - return total count of entries in scatterlist 40 * @sg: The scatterlist 41 * 42 * Description: 43 * Allows to know how many entries are in sg, taking into account 44 * chaining as well 45 * 46 **/ 47 int sg_nents(struct scatterlist *sg) 48 { 49 int nents; 50 for (nents = 0; sg; sg = sg_next(sg)) 51 nents++; 52 return nents; 53 } 54 EXPORT_SYMBOL(sg_nents); 55 56 /** 57 * sg_nents_for_len - return total count of entries in scatterlist 58 * needed to satisfy the supplied length 59 * @sg: The scatterlist 60 * @len: The total required length 61 * 62 * Description: 63 * Determines the number of entries in sg that are required to meet 64 * the supplied length, taking into account chaining as well 65 * 66 * Returns: 67 * the number of sg entries needed, negative error on failure 68 * 69 **/ 70 int sg_nents_for_len(struct scatterlist *sg, u64 len) 71 { 72 int nents; 73 u64 total; 74 75 if (!len) 76 return 0; 77 78 for (nents = 0, total = 0; sg; sg = sg_next(sg)) { 79 nents++; 80 total += sg->length; 81 if (total >= len) 82 return nents; 83 } 84 85 return -EINVAL; 86 } 87 EXPORT_SYMBOL(sg_nents_for_len); 88 89 /** 90 * sg_last - return the last scatterlist entry in a list 91 * @sgl: First entry in the scatterlist 92 * @nents: Number of entries in the scatterlist 93 * 94 * Description: 95 * Should only be used casually, it (currently) scans the entire list 96 * to get the last entry. 97 * 98 * Note that the @sgl@ pointer passed in need not be the first one, 99 * the important bit is that @nents@ denotes the number of entries that 100 * exist from @sgl@. 101 * 102 **/ 103 struct scatterlist *sg_last(struct scatterlist *sgl, unsigned int nents) 104 { 105 struct scatterlist *sg, *ret = NULL; 106 unsigned int i; 107 108 for_each_sg(sgl, sg, nents, i) 109 ret = sg; 110 111 BUG_ON(!sg_is_last(ret)); 112 return ret; 113 } 114 EXPORT_SYMBOL(sg_last); 115 116 /** 117 * sg_init_table - Initialize SG table 118 * @sgl: The SG table 119 * @nents: Number of entries in table 120 * 121 * Notes: 122 * If this is part of a chained sg table, sg_mark_end() should be 123 * used only on the last table part. 124 * 125 **/ 126 void sg_init_table(struct scatterlist *sgl, unsigned int nents) 127 { 128 memset(sgl, 0, sizeof(*sgl) * nents); 129 sg_init_marker(sgl, nents); 130 } 131 EXPORT_SYMBOL(sg_init_table); 132 133 /** 134 * sg_init_one - Initialize a single entry sg list 135 * @sg: SG entry 136 * @buf: Virtual address for IO 137 * @buflen: IO length 138 * 139 **/ 140 void sg_init_one(struct scatterlist *sg, const void *buf, unsigned int buflen) 141 { 142 sg_init_table(sg, 1); 143 sg_set_buf(sg, buf, buflen); 144 } 145 EXPORT_SYMBOL(sg_init_one); 146 147 /* 148 * The default behaviour of sg_alloc_table() is to use these kmalloc/kfree 149 * helpers. 150 */ 151 static struct scatterlist *sg_kmalloc(unsigned int nents, gfp_t gfp_mask) 152 { 153 if (nents == SG_MAX_SINGLE_ALLOC) { 154 /* 155 * Kmemleak doesn't track page allocations as they are not 156 * commonly used (in a raw form) for kernel data structures. 157 * As we chain together a list of pages and then a normal 158 * kmalloc (tracked by kmemleak), in order to for that last 159 * allocation not to become decoupled (and thus a 160 * false-positive) we need to inform kmemleak of all the 161 * intermediate allocations. 162 */ 163 void *ptr = (void *) __get_free_page(gfp_mask); 164 kmemleak_alloc(ptr, PAGE_SIZE, 1, gfp_mask); 165 return ptr; 166 } else 167 return kmalloc_array(nents, sizeof(struct scatterlist), 168 gfp_mask); 169 } 170 171 static void sg_kfree(struct scatterlist *sg, unsigned int nents) 172 { 173 if (nents == SG_MAX_SINGLE_ALLOC) { 174 kmemleak_free(sg); 175 free_page((unsigned long) sg); 176 } else 177 kfree(sg); 178 } 179 180 /** 181 * __sg_free_table - Free a previously mapped sg table 182 * @table: The sg table header to use 183 * @max_ents: The maximum number of entries per single scatterlist 184 * @nents_first_chunk: Number of entries int the (preallocated) first 185 * scatterlist chunk, 0 means no such preallocated first chunk 186 * @free_fn: Free function 187 * @num_ents: Number of entries in the table 188 * 189 * Description: 190 * Free an sg table previously allocated and setup with 191 * __sg_alloc_table(). The @max_ents value must be identical to 192 * that previously used with __sg_alloc_table(). 193 * 194 **/ 195 void __sg_free_table(struct sg_table *table, unsigned int max_ents, 196 unsigned int nents_first_chunk, sg_free_fn *free_fn, 197 unsigned int num_ents) 198 { 199 struct scatterlist *sgl, *next; 200 unsigned curr_max_ents = nents_first_chunk ?: max_ents; 201 202 if (unlikely(!table->sgl)) 203 return; 204 205 sgl = table->sgl; 206 while (num_ents) { 207 unsigned int alloc_size = num_ents; 208 unsigned int sg_size; 209 210 /* 211 * If we have more than max_ents segments left, 212 * then assign 'next' to the sg table after the current one. 213 * sg_size is then one less than alloc size, since the last 214 * element is the chain pointer. 215 */ 216 if (alloc_size > curr_max_ents) { 217 next = sg_chain_ptr(&sgl[curr_max_ents - 1]); 218 alloc_size = curr_max_ents; 219 sg_size = alloc_size - 1; 220 } else { 221 sg_size = alloc_size; 222 next = NULL; 223 } 224 225 num_ents -= sg_size; 226 if (nents_first_chunk) 227 nents_first_chunk = 0; 228 else 229 free_fn(sgl, alloc_size); 230 sgl = next; 231 curr_max_ents = max_ents; 232 } 233 234 table->sgl = NULL; 235 } 236 EXPORT_SYMBOL(__sg_free_table); 237 238 /** 239 * sg_free_append_table - Free a previously allocated append sg table. 240 * @table: The mapped sg append table header 241 * 242 **/ 243 void sg_free_append_table(struct sg_append_table *table) 244 { 245 __sg_free_table(&table->sgt, SG_MAX_SINGLE_ALLOC, 0, sg_kfree, 246 table->total_nents); 247 } 248 EXPORT_SYMBOL(sg_free_append_table); 249 250 251 /** 252 * sg_free_table - Free a previously allocated sg table 253 * @table: The mapped sg table header 254 * 255 **/ 256 void sg_free_table(struct sg_table *table) 257 { 258 __sg_free_table(table, SG_MAX_SINGLE_ALLOC, 0, sg_kfree, 259 table->orig_nents); 260 } 261 EXPORT_SYMBOL(sg_free_table); 262 263 /** 264 * __sg_alloc_table - Allocate and initialize an sg table with given allocator 265 * @table: The sg table header to use 266 * @nents: Number of entries in sg list 267 * @max_ents: The maximum number of entries the allocator returns per call 268 * @nents_first_chunk: Number of entries int the (preallocated) first 269 * scatterlist chunk, 0 means no such preallocated chunk provided by user 270 * @gfp_mask: GFP allocation mask 271 * @alloc_fn: Allocator to use 272 * 273 * Description: 274 * This function returns a @table @nents long. The allocator is 275 * defined to return scatterlist chunks of maximum size @max_ents. 276 * Thus if @nents is bigger than @max_ents, the scatterlists will be 277 * chained in units of @max_ents. 278 * 279 * Notes: 280 * If this function returns non-0 (eg failure), the caller must call 281 * __sg_free_table() to cleanup any leftover allocations. 282 * 283 **/ 284 int __sg_alloc_table(struct sg_table *table, unsigned int nents, 285 unsigned int max_ents, struct scatterlist *first_chunk, 286 unsigned int nents_first_chunk, gfp_t gfp_mask, 287 sg_alloc_fn *alloc_fn) 288 { 289 struct scatterlist *sg, *prv; 290 unsigned int left; 291 unsigned curr_max_ents = nents_first_chunk ?: max_ents; 292 unsigned prv_max_ents; 293 294 memset(table, 0, sizeof(*table)); 295 296 if (nents == 0) 297 return -EINVAL; 298 #ifdef CONFIG_ARCH_NO_SG_CHAIN 299 if (WARN_ON_ONCE(nents > max_ents)) 300 return -EINVAL; 301 #endif 302 303 left = nents; 304 prv = NULL; 305 do { 306 unsigned int sg_size, alloc_size = left; 307 308 if (alloc_size > curr_max_ents) { 309 alloc_size = curr_max_ents; 310 sg_size = alloc_size - 1; 311 } else 312 sg_size = alloc_size; 313 314 left -= sg_size; 315 316 if (first_chunk) { 317 sg = first_chunk; 318 first_chunk = NULL; 319 } else { 320 sg = alloc_fn(alloc_size, gfp_mask); 321 } 322 if (unlikely(!sg)) { 323 /* 324 * Adjust entry count to reflect that the last 325 * entry of the previous table won't be used for 326 * linkage. Without this, sg_kfree() may get 327 * confused. 328 */ 329 if (prv) 330 table->nents = ++table->orig_nents; 331 332 return -ENOMEM; 333 } 334 335 sg_init_table(sg, alloc_size); 336 table->nents = table->orig_nents += sg_size; 337 338 /* 339 * If this is the first mapping, assign the sg table header. 340 * If this is not the first mapping, chain previous part. 341 */ 342 if (prv) 343 sg_chain(prv, prv_max_ents, sg); 344 else 345 table->sgl = sg; 346 347 /* 348 * If no more entries after this one, mark the end 349 */ 350 if (!left) 351 sg_mark_end(&sg[sg_size - 1]); 352 353 prv = sg; 354 prv_max_ents = curr_max_ents; 355 curr_max_ents = max_ents; 356 } while (left); 357 358 return 0; 359 } 360 EXPORT_SYMBOL(__sg_alloc_table); 361 362 /** 363 * sg_alloc_table - Allocate and initialize an sg table 364 * @table: The sg table header to use 365 * @nents: Number of entries in sg list 366 * @gfp_mask: GFP allocation mask 367 * 368 * Description: 369 * Allocate and initialize an sg table. If @nents@ is larger than 370 * SG_MAX_SINGLE_ALLOC a chained sg table will be setup. 371 * 372 **/ 373 int sg_alloc_table(struct sg_table *table, unsigned int nents, gfp_t gfp_mask) 374 { 375 int ret; 376 377 ret = __sg_alloc_table(table, nents, SG_MAX_SINGLE_ALLOC, 378 NULL, 0, gfp_mask, sg_kmalloc); 379 if (unlikely(ret)) 380 sg_free_table(table); 381 return ret; 382 } 383 EXPORT_SYMBOL(sg_alloc_table); 384 385 static struct scatterlist *get_next_sg(struct sg_append_table *table, 386 struct scatterlist *cur, 387 unsigned long needed_sges, 388 gfp_t gfp_mask) 389 { 390 struct scatterlist *new_sg, *next_sg; 391 unsigned int alloc_size; 392 393 if (cur) { 394 next_sg = sg_next(cur); 395 /* Check if last entry should be keeped for chainning */ 396 if (!sg_is_last(next_sg) || needed_sges == 1) 397 return next_sg; 398 } 399 400 alloc_size = min_t(unsigned long, needed_sges, SG_MAX_SINGLE_ALLOC); 401 new_sg = sg_kmalloc(alloc_size, gfp_mask); 402 if (!new_sg) 403 return ERR_PTR(-ENOMEM); 404 sg_init_table(new_sg, alloc_size); 405 if (cur) { 406 table->total_nents += alloc_size - 1; 407 __sg_chain(next_sg, new_sg); 408 } else { 409 table->sgt.sgl = new_sg; 410 table->total_nents = alloc_size; 411 } 412 return new_sg; 413 } 414 415 static bool pages_are_mergeable(struct page *a, struct page *b) 416 { 417 if (page_to_pfn(a) != page_to_pfn(b) + 1) 418 return false; 419 if (!zone_device_pages_have_same_pgmap(a, b)) 420 return false; 421 return true; 422 } 423 424 /** 425 * sg_alloc_append_table_from_pages - Allocate and initialize an append sg 426 * table from an array of pages 427 * @sgt_append: The sg append table to use 428 * @pages: Pointer to an array of page pointers 429 * @n_pages: Number of pages in the pages array 430 * @offset: Offset from start of the first page to the start of a buffer 431 * @size: Number of valid bytes in the buffer (after offset) 432 * @max_segment: Maximum size of a scatterlist element in bytes 433 * @left_pages: Left pages caller have to set after this call 434 * @gfp_mask: GFP allocation mask 435 * 436 * Description: 437 * In the first call it allocate and initialize an sg table from a list of 438 * pages, else reuse the scatterlist from sgt_append. Contiguous ranges of 439 * the pages are squashed into a single scatterlist entry up to the maximum 440 * size specified in @max_segment. A user may provide an offset at a start 441 * and a size of valid data in a buffer specified by the page array. The 442 * returned sg table is released by sg_free_append_table 443 * 444 * Returns: 445 * 0 on success, negative error on failure 446 * 447 * Notes: 448 * If this function returns non-0 (eg failure), the caller must call 449 * sg_free_append_table() to cleanup any leftover allocations. 450 * 451 * In the fist call, sgt_append must by initialized. 452 */ 453 int sg_alloc_append_table_from_pages(struct sg_append_table *sgt_append, 454 struct page **pages, unsigned int n_pages, unsigned int offset, 455 unsigned long size, unsigned int max_segment, 456 unsigned int left_pages, gfp_t gfp_mask) 457 { 458 unsigned int chunks, cur_page, seg_len, i, prv_len = 0; 459 unsigned int added_nents = 0; 460 struct scatterlist *s = sgt_append->prv; 461 struct page *last_pg; 462 463 /* 464 * The algorithm below requires max_segment to be aligned to PAGE_SIZE 465 * otherwise it can overshoot. 466 */ 467 max_segment = ALIGN_DOWN(max_segment, PAGE_SIZE); 468 if (WARN_ON(max_segment < PAGE_SIZE)) 469 return -EINVAL; 470 471 if (IS_ENABLED(CONFIG_ARCH_NO_SG_CHAIN) && sgt_append->prv) 472 return -EOPNOTSUPP; 473 474 if (sgt_append->prv) { 475 unsigned long next_pfn = (page_to_phys(sg_page(sgt_append->prv)) + 476 sgt_append->prv->offset + sgt_append->prv->length) / PAGE_SIZE; 477 478 if (WARN_ON(offset)) 479 return -EINVAL; 480 481 /* Merge contiguous pages into the last SG */ 482 prv_len = sgt_append->prv->length; 483 if (page_to_pfn(pages[0]) == next_pfn) { 484 last_pg = pfn_to_page(next_pfn - 1); 485 while (n_pages && pages_are_mergeable(pages[0], last_pg)) { 486 if (sgt_append->prv->length + PAGE_SIZE > max_segment) 487 break; 488 sgt_append->prv->length += PAGE_SIZE; 489 last_pg = pages[0]; 490 pages++; 491 n_pages--; 492 } 493 if (!n_pages) 494 goto out; 495 } 496 } 497 498 /* compute number of contiguous chunks */ 499 chunks = 1; 500 seg_len = 0; 501 for (i = 1; i < n_pages; i++) { 502 seg_len += PAGE_SIZE; 503 if (seg_len >= max_segment || 504 !pages_are_mergeable(pages[i], pages[i - 1])) { 505 chunks++; 506 seg_len = 0; 507 } 508 } 509 510 /* merging chunks and putting them into the scatterlist */ 511 cur_page = 0; 512 for (i = 0; i < chunks; i++) { 513 unsigned int j, chunk_size; 514 515 /* look for the end of the current chunk */ 516 seg_len = 0; 517 for (j = cur_page + 1; j < n_pages; j++) { 518 seg_len += PAGE_SIZE; 519 if (seg_len >= max_segment || 520 !pages_are_mergeable(pages[j], pages[j - 1])) 521 break; 522 } 523 524 /* Pass how many chunks might be left */ 525 s = get_next_sg(sgt_append, s, chunks - i + left_pages, 526 gfp_mask); 527 if (IS_ERR(s)) { 528 /* 529 * Adjust entry length to be as before function was 530 * called. 531 */ 532 if (sgt_append->prv) 533 sgt_append->prv->length = prv_len; 534 return PTR_ERR(s); 535 } 536 chunk_size = ((j - cur_page) << PAGE_SHIFT) - offset; 537 sg_set_page(s, pages[cur_page], 538 min_t(unsigned long, size, chunk_size), offset); 539 added_nents++; 540 size -= chunk_size; 541 offset = 0; 542 cur_page = j; 543 } 544 sgt_append->sgt.nents += added_nents; 545 sgt_append->sgt.orig_nents = sgt_append->sgt.nents; 546 sgt_append->prv = s; 547 out: 548 if (!left_pages) 549 sg_mark_end(s); 550 return 0; 551 } 552 EXPORT_SYMBOL(sg_alloc_append_table_from_pages); 553 554 /** 555 * sg_alloc_table_from_pages_segment - Allocate and initialize an sg table from 556 * an array of pages and given maximum 557 * segment. 558 * @sgt: The sg table header to use 559 * @pages: Pointer to an array of page pointers 560 * @n_pages: Number of pages in the pages array 561 * @offset: Offset from start of the first page to the start of a buffer 562 * @size: Number of valid bytes in the buffer (after offset) 563 * @max_segment: Maximum size of a scatterlist element in bytes 564 * @gfp_mask: GFP allocation mask 565 * 566 * Description: 567 * Allocate and initialize an sg table from a list of pages. Contiguous 568 * ranges of the pages are squashed into a single scatterlist node up to the 569 * maximum size specified in @max_segment. A user may provide an offset at a 570 * start and a size of valid data in a buffer specified by the page array. 571 * 572 * The returned sg table is released by sg_free_table. 573 * 574 * Returns: 575 * 0 on success, negative error on failure 576 */ 577 int sg_alloc_table_from_pages_segment(struct sg_table *sgt, struct page **pages, 578 unsigned int n_pages, unsigned int offset, 579 unsigned long size, unsigned int max_segment, 580 gfp_t gfp_mask) 581 { 582 struct sg_append_table append = {}; 583 int err; 584 585 err = sg_alloc_append_table_from_pages(&append, pages, n_pages, offset, 586 size, max_segment, 0, gfp_mask); 587 if (err) { 588 sg_free_append_table(&append); 589 return err; 590 } 591 memcpy(sgt, &append.sgt, sizeof(*sgt)); 592 WARN_ON(append.total_nents != sgt->orig_nents); 593 return 0; 594 } 595 EXPORT_SYMBOL(sg_alloc_table_from_pages_segment); 596 597 #ifdef CONFIG_SGL_ALLOC 598 599 /** 600 * sgl_alloc_order - allocate a scatterlist and its pages 601 * @length: Length in bytes of the scatterlist. Must be at least one 602 * @order: Second argument for alloc_pages() 603 * @chainable: Whether or not to allocate an extra element in the scatterlist 604 * for scatterlist chaining purposes 605 * @gfp: Memory allocation flags 606 * @nent_p: [out] Number of entries in the scatterlist that have pages 607 * 608 * Returns: A pointer to an initialized scatterlist or %NULL upon failure. 609 */ 610 struct scatterlist *sgl_alloc_order(unsigned long long length, 611 unsigned int order, bool chainable, 612 gfp_t gfp, unsigned int *nent_p) 613 { 614 struct scatterlist *sgl, *sg; 615 struct page *page; 616 unsigned int nent, nalloc; 617 u32 elem_len; 618 619 nent = round_up(length, PAGE_SIZE << order) >> (PAGE_SHIFT + order); 620 /* Check for integer overflow */ 621 if (length > (nent << (PAGE_SHIFT + order))) 622 return NULL; 623 nalloc = nent; 624 if (chainable) { 625 /* Check for integer overflow */ 626 if (nalloc + 1 < nalloc) 627 return NULL; 628 nalloc++; 629 } 630 sgl = kmalloc_array(nalloc, sizeof(struct scatterlist), 631 gfp & ~GFP_DMA); 632 if (!sgl) 633 return NULL; 634 635 sg_init_table(sgl, nalloc); 636 sg = sgl; 637 while (length) { 638 elem_len = min_t(u64, length, PAGE_SIZE << order); 639 page = alloc_pages(gfp, order); 640 if (!page) { 641 sgl_free_order(sgl, order); 642 return NULL; 643 } 644 645 sg_set_page(sg, page, elem_len, 0); 646 length -= elem_len; 647 sg = sg_next(sg); 648 } 649 WARN_ONCE(length, "length = %lld\n", length); 650 if (nent_p) 651 *nent_p = nent; 652 return sgl; 653 } 654 EXPORT_SYMBOL(sgl_alloc_order); 655 656 /** 657 * sgl_alloc - allocate a scatterlist and its pages 658 * @length: Length in bytes of the scatterlist 659 * @gfp: Memory allocation flags 660 * @nent_p: [out] Number of entries in the scatterlist 661 * 662 * Returns: A pointer to an initialized scatterlist or %NULL upon failure. 663 */ 664 struct scatterlist *sgl_alloc(unsigned long long length, gfp_t gfp, 665 unsigned int *nent_p) 666 { 667 return sgl_alloc_order(length, 0, false, gfp, nent_p); 668 } 669 EXPORT_SYMBOL(sgl_alloc); 670 671 /** 672 * sgl_free_n_order - free a scatterlist and its pages 673 * @sgl: Scatterlist with one or more elements 674 * @nents: Maximum number of elements to free 675 * @order: Second argument for __free_pages() 676 * 677 * Notes: 678 * - If several scatterlists have been chained and each chain element is 679 * freed separately then it's essential to set nents correctly to avoid that a 680 * page would get freed twice. 681 * - All pages in a chained scatterlist can be freed at once by setting @nents 682 * to a high number. 683 */ 684 void sgl_free_n_order(struct scatterlist *sgl, int nents, int order) 685 { 686 struct scatterlist *sg; 687 struct page *page; 688 int i; 689 690 for_each_sg(sgl, sg, nents, i) { 691 if (!sg) 692 break; 693 page = sg_page(sg); 694 if (page) 695 __free_pages(page, order); 696 } 697 kfree(sgl); 698 } 699 EXPORT_SYMBOL(sgl_free_n_order); 700 701 /** 702 * sgl_free_order - free a scatterlist and its pages 703 * @sgl: Scatterlist with one or more elements 704 * @order: Second argument for __free_pages() 705 */ 706 void sgl_free_order(struct scatterlist *sgl, int order) 707 { 708 sgl_free_n_order(sgl, INT_MAX, order); 709 } 710 EXPORT_SYMBOL(sgl_free_order); 711 712 /** 713 * sgl_free - free a scatterlist and its pages 714 * @sgl: Scatterlist with one or more elements 715 */ 716 void sgl_free(struct scatterlist *sgl) 717 { 718 sgl_free_order(sgl, 0); 719 } 720 EXPORT_SYMBOL(sgl_free); 721 722 #endif /* CONFIG_SGL_ALLOC */ 723 724 void __sg_page_iter_start(struct sg_page_iter *piter, 725 struct scatterlist *sglist, unsigned int nents, 726 unsigned long pgoffset) 727 { 728 piter->__pg_advance = 0; 729 piter->__nents = nents; 730 731 piter->sg = sglist; 732 piter->sg_pgoffset = pgoffset; 733 } 734 EXPORT_SYMBOL(__sg_page_iter_start); 735 736 static int sg_page_count(struct scatterlist *sg) 737 { 738 return PAGE_ALIGN(sg->offset + sg->length) >> PAGE_SHIFT; 739 } 740 741 bool __sg_page_iter_next(struct sg_page_iter *piter) 742 { 743 if (!piter->__nents || !piter->sg) 744 return false; 745 746 piter->sg_pgoffset += piter->__pg_advance; 747 piter->__pg_advance = 1; 748 749 while (piter->sg_pgoffset >= sg_page_count(piter->sg)) { 750 piter->sg_pgoffset -= sg_page_count(piter->sg); 751 piter->sg = sg_next(piter->sg); 752 if (!--piter->__nents || !piter->sg) 753 return false; 754 } 755 756 return true; 757 } 758 EXPORT_SYMBOL(__sg_page_iter_next); 759 760 static int sg_dma_page_count(struct scatterlist *sg) 761 { 762 return PAGE_ALIGN(sg->offset + sg_dma_len(sg)) >> PAGE_SHIFT; 763 } 764 765 bool __sg_page_iter_dma_next(struct sg_dma_page_iter *dma_iter) 766 { 767 struct sg_page_iter *piter = &dma_iter->base; 768 769 if (!piter->__nents || !piter->sg) 770 return false; 771 772 piter->sg_pgoffset += piter->__pg_advance; 773 piter->__pg_advance = 1; 774 775 while (piter->sg_pgoffset >= sg_dma_page_count(piter->sg)) { 776 piter->sg_pgoffset -= sg_dma_page_count(piter->sg); 777 piter->sg = sg_next(piter->sg); 778 if (!--piter->__nents || !piter->sg) 779 return false; 780 } 781 782 return true; 783 } 784 EXPORT_SYMBOL(__sg_page_iter_dma_next); 785 786 /** 787 * sg_miter_start - start mapping iteration over a sg list 788 * @miter: sg mapping iter to be started 789 * @sgl: sg list to iterate over 790 * @nents: number of sg entries 791 * 792 * Description: 793 * Starts mapping iterator @miter. 794 * 795 * Context: 796 * Don't care. 797 */ 798 void sg_miter_start(struct sg_mapping_iter *miter, struct scatterlist *sgl, 799 unsigned int nents, unsigned int flags) 800 { 801 memset(miter, 0, sizeof(struct sg_mapping_iter)); 802 803 __sg_page_iter_start(&miter->piter, sgl, nents, 0); 804 WARN_ON(!(flags & (SG_MITER_TO_SG | SG_MITER_FROM_SG))); 805 miter->__flags = flags; 806 } 807 EXPORT_SYMBOL(sg_miter_start); 808 809 static bool sg_miter_get_next_page(struct sg_mapping_iter *miter) 810 { 811 if (!miter->__remaining) { 812 struct scatterlist *sg; 813 814 if (!__sg_page_iter_next(&miter->piter)) 815 return false; 816 817 sg = miter->piter.sg; 818 819 miter->__offset = miter->piter.sg_pgoffset ? 0 : sg->offset; 820 miter->piter.sg_pgoffset += miter->__offset >> PAGE_SHIFT; 821 miter->__offset &= PAGE_SIZE - 1; 822 miter->__remaining = sg->offset + sg->length - 823 (miter->piter.sg_pgoffset << PAGE_SHIFT) - 824 miter->__offset; 825 miter->__remaining = min_t(unsigned long, miter->__remaining, 826 PAGE_SIZE - miter->__offset); 827 } 828 829 return true; 830 } 831 832 /** 833 * sg_miter_skip - reposition mapping iterator 834 * @miter: sg mapping iter to be skipped 835 * @offset: number of bytes to plus the current location 836 * 837 * Description: 838 * Sets the offset of @miter to its current location plus @offset bytes. 839 * If mapping iterator @miter has been proceeded by sg_miter_next(), this 840 * stops @miter. 841 * 842 * Context: 843 * Don't care. 844 * 845 * Returns: 846 * true if @miter contains the valid mapping. false if end of sg 847 * list is reached. 848 */ 849 bool sg_miter_skip(struct sg_mapping_iter *miter, off_t offset) 850 { 851 sg_miter_stop(miter); 852 853 while (offset) { 854 off_t consumed; 855 856 if (!sg_miter_get_next_page(miter)) 857 return false; 858 859 consumed = min_t(off_t, offset, miter->__remaining); 860 miter->__offset += consumed; 861 miter->__remaining -= consumed; 862 offset -= consumed; 863 } 864 865 return true; 866 } 867 EXPORT_SYMBOL(sg_miter_skip); 868 869 /** 870 * sg_miter_next - proceed mapping iterator to the next mapping 871 * @miter: sg mapping iter to proceed 872 * 873 * Description: 874 * Proceeds @miter to the next mapping. @miter should have been started 875 * using sg_miter_start(). On successful return, @miter->page, 876 * @miter->addr and @miter->length point to the current mapping. 877 * 878 * Context: 879 * May sleep if !SG_MITER_ATOMIC. 880 * 881 * Returns: 882 * true if @miter contains the next mapping. false if end of sg 883 * list is reached. 884 */ 885 bool sg_miter_next(struct sg_mapping_iter *miter) 886 { 887 sg_miter_stop(miter); 888 889 /* 890 * Get to the next page if necessary. 891 * __remaining, __offset is adjusted by sg_miter_stop 892 */ 893 if (!sg_miter_get_next_page(miter)) 894 return false; 895 896 miter->page = sg_page_iter_page(&miter->piter); 897 miter->consumed = miter->length = miter->__remaining; 898 899 if (miter->__flags & SG_MITER_ATOMIC) 900 miter->addr = kmap_atomic(miter->page) + miter->__offset; 901 else 902 miter->addr = kmap(miter->page) + miter->__offset; 903 904 return true; 905 } 906 EXPORT_SYMBOL(sg_miter_next); 907 908 /** 909 * sg_miter_stop - stop mapping iteration 910 * @miter: sg mapping iter to be stopped 911 * 912 * Description: 913 * Stops mapping iterator @miter. @miter should have been started 914 * using sg_miter_start(). A stopped iteration can be resumed by 915 * calling sg_miter_next() on it. This is useful when resources (kmap) 916 * need to be released during iteration. 917 * 918 * Context: 919 * Don't care otherwise. 920 */ 921 void sg_miter_stop(struct sg_mapping_iter *miter) 922 { 923 WARN_ON(miter->consumed > miter->length); 924 925 /* drop resources from the last iteration */ 926 if (miter->addr) { 927 miter->__offset += miter->consumed; 928 miter->__remaining -= miter->consumed; 929 930 if (miter->__flags & SG_MITER_TO_SG) 931 flush_dcache_page(miter->page); 932 933 if (miter->__flags & SG_MITER_ATOMIC) { 934 WARN_ON_ONCE(!pagefault_disabled()); 935 kunmap_atomic(miter->addr); 936 } else 937 kunmap(miter->page); 938 939 miter->page = NULL; 940 miter->addr = NULL; 941 miter->length = 0; 942 miter->consumed = 0; 943 } 944 } 945 EXPORT_SYMBOL(sg_miter_stop); 946 947 /** 948 * sg_copy_buffer - Copy data between a linear buffer and an SG list 949 * @sgl: The SG list 950 * @nents: Number of SG entries 951 * @buf: Where to copy from 952 * @buflen: The number of bytes to copy 953 * @skip: Number of bytes to skip before copying 954 * @to_buffer: transfer direction (true == from an sg list to a 955 * buffer, false == from a buffer to an sg list) 956 * 957 * Returns the number of copied bytes. 958 * 959 **/ 960 size_t sg_copy_buffer(struct scatterlist *sgl, unsigned int nents, void *buf, 961 size_t buflen, off_t skip, bool to_buffer) 962 { 963 unsigned int offset = 0; 964 struct sg_mapping_iter miter; 965 unsigned int sg_flags = SG_MITER_ATOMIC; 966 967 if (to_buffer) 968 sg_flags |= SG_MITER_FROM_SG; 969 else 970 sg_flags |= SG_MITER_TO_SG; 971 972 sg_miter_start(&miter, sgl, nents, sg_flags); 973 974 if (!sg_miter_skip(&miter, skip)) 975 return 0; 976 977 while ((offset < buflen) && sg_miter_next(&miter)) { 978 unsigned int len; 979 980 len = min(miter.length, buflen - offset); 981 982 if (to_buffer) 983 memcpy(buf + offset, miter.addr, len); 984 else 985 memcpy(miter.addr, buf + offset, len); 986 987 offset += len; 988 } 989 990 sg_miter_stop(&miter); 991 992 return offset; 993 } 994 EXPORT_SYMBOL(sg_copy_buffer); 995 996 /** 997 * sg_copy_from_buffer - Copy from a linear buffer to an SG list 998 * @sgl: The SG list 999 * @nents: Number of SG entries 1000 * @buf: Where to copy from 1001 * @buflen: The number of bytes to copy 1002 * 1003 * Returns the number of copied bytes. 1004 * 1005 **/ 1006 size_t sg_copy_from_buffer(struct scatterlist *sgl, unsigned int nents, 1007 const void *buf, size_t buflen) 1008 { 1009 return sg_copy_buffer(sgl, nents, (void *)buf, buflen, 0, false); 1010 } 1011 EXPORT_SYMBOL(sg_copy_from_buffer); 1012 1013 /** 1014 * sg_copy_to_buffer - Copy from an SG list to a linear buffer 1015 * @sgl: The SG list 1016 * @nents: Number of SG entries 1017 * @buf: Where to copy to 1018 * @buflen: The number of bytes to copy 1019 * 1020 * Returns the number of copied bytes. 1021 * 1022 **/ 1023 size_t sg_copy_to_buffer(struct scatterlist *sgl, unsigned int nents, 1024 void *buf, size_t buflen) 1025 { 1026 return sg_copy_buffer(sgl, nents, buf, buflen, 0, true); 1027 } 1028 EXPORT_SYMBOL(sg_copy_to_buffer); 1029 1030 /** 1031 * sg_pcopy_from_buffer - Copy from a linear buffer to an SG list 1032 * @sgl: The SG list 1033 * @nents: Number of SG entries 1034 * @buf: Where to copy from 1035 * @buflen: The number of bytes to copy 1036 * @skip: Number of bytes to skip before copying 1037 * 1038 * Returns the number of copied bytes. 1039 * 1040 **/ 1041 size_t sg_pcopy_from_buffer(struct scatterlist *sgl, unsigned int nents, 1042 const void *buf, size_t buflen, off_t skip) 1043 { 1044 return sg_copy_buffer(sgl, nents, (void *)buf, buflen, skip, false); 1045 } 1046 EXPORT_SYMBOL(sg_pcopy_from_buffer); 1047 1048 /** 1049 * sg_pcopy_to_buffer - Copy from an SG list to a linear buffer 1050 * @sgl: The SG list 1051 * @nents: Number of SG entries 1052 * @buf: Where to copy to 1053 * @buflen: The number of bytes to copy 1054 * @skip: Number of bytes to skip before copying 1055 * 1056 * Returns the number of copied bytes. 1057 * 1058 **/ 1059 size_t sg_pcopy_to_buffer(struct scatterlist *sgl, unsigned int nents, 1060 void *buf, size_t buflen, off_t skip) 1061 { 1062 return sg_copy_buffer(sgl, nents, buf, buflen, skip, true); 1063 } 1064 EXPORT_SYMBOL(sg_pcopy_to_buffer); 1065 1066 /** 1067 * sg_zero_buffer - Zero-out a part of a SG list 1068 * @sgl: The SG list 1069 * @nents: Number of SG entries 1070 * @buflen: The number of bytes to zero out 1071 * @skip: Number of bytes to skip before zeroing 1072 * 1073 * Returns the number of bytes zeroed. 1074 **/ 1075 size_t sg_zero_buffer(struct scatterlist *sgl, unsigned int nents, 1076 size_t buflen, off_t skip) 1077 { 1078 unsigned int offset = 0; 1079 struct sg_mapping_iter miter; 1080 unsigned int sg_flags = SG_MITER_ATOMIC | SG_MITER_TO_SG; 1081 1082 sg_miter_start(&miter, sgl, nents, sg_flags); 1083 1084 if (!sg_miter_skip(&miter, skip)) 1085 return false; 1086 1087 while (offset < buflen && sg_miter_next(&miter)) { 1088 unsigned int len; 1089 1090 len = min(miter.length, buflen - offset); 1091 memset(miter.addr, 0, len); 1092 1093 offset += len; 1094 } 1095 1096 sg_miter_stop(&miter); 1097 return offset; 1098 } 1099 EXPORT_SYMBOL(sg_zero_buffer); 1100 1101 /* 1102 * Extract and pin a list of up to sg_max pages from UBUF- or IOVEC-class 1103 * iterators, and add them to the scatterlist. 1104 */ 1105 static ssize_t extract_user_to_sg(struct iov_iter *iter, 1106 ssize_t maxsize, 1107 struct sg_table *sgtable, 1108 unsigned int sg_max, 1109 iov_iter_extraction_t extraction_flags) 1110 { 1111 struct scatterlist *sg = sgtable->sgl + sgtable->nents; 1112 struct page **pages; 1113 unsigned int npages; 1114 ssize_t ret = 0, res; 1115 size_t len, off; 1116 1117 /* We decant the page list into the tail of the scatterlist */ 1118 pages = (void *)sgtable->sgl + 1119 array_size(sg_max, sizeof(struct scatterlist)); 1120 pages -= sg_max; 1121 1122 do { 1123 res = iov_iter_extract_pages(iter, &pages, maxsize, sg_max, 1124 extraction_flags, &off); 1125 if (res < 0) 1126 goto failed; 1127 1128 len = res; 1129 maxsize -= len; 1130 ret += len; 1131 npages = DIV_ROUND_UP(off + len, PAGE_SIZE); 1132 sg_max -= npages; 1133 1134 for (; npages > 0; npages--) { 1135 struct page *page = *pages; 1136 size_t seg = min_t(size_t, PAGE_SIZE - off, len); 1137 1138 *pages++ = NULL; 1139 sg_set_page(sg, page, seg, off); 1140 sgtable->nents++; 1141 sg++; 1142 len -= seg; 1143 off = 0; 1144 } 1145 } while (maxsize > 0 && sg_max > 0); 1146 1147 return ret; 1148 1149 failed: 1150 while (sgtable->nents > sgtable->orig_nents) 1151 put_page(sg_page(&sgtable->sgl[--sgtable->nents])); 1152 return res; 1153 } 1154 1155 /* 1156 * Extract up to sg_max pages from a BVEC-type iterator and add them to the 1157 * scatterlist. The pages are not pinned. 1158 */ 1159 static ssize_t extract_bvec_to_sg(struct iov_iter *iter, 1160 ssize_t maxsize, 1161 struct sg_table *sgtable, 1162 unsigned int sg_max, 1163 iov_iter_extraction_t extraction_flags) 1164 { 1165 const struct bio_vec *bv = iter->bvec; 1166 struct scatterlist *sg = sgtable->sgl + sgtable->nents; 1167 unsigned long start = iter->iov_offset; 1168 unsigned int i; 1169 ssize_t ret = 0; 1170 1171 for (i = 0; i < iter->nr_segs; i++) { 1172 size_t off, len; 1173 1174 len = bv[i].bv_len; 1175 if (start >= len) { 1176 start -= len; 1177 continue; 1178 } 1179 1180 len = min_t(size_t, maxsize, len - start); 1181 off = bv[i].bv_offset + start; 1182 1183 sg_set_page(sg, bv[i].bv_page, len, off); 1184 sgtable->nents++; 1185 sg++; 1186 sg_max--; 1187 1188 ret += len; 1189 maxsize -= len; 1190 if (maxsize <= 0 || sg_max == 0) 1191 break; 1192 start = 0; 1193 } 1194 1195 if (ret > 0) 1196 iov_iter_advance(iter, ret); 1197 return ret; 1198 } 1199 1200 /* 1201 * Extract up to sg_max pages from a KVEC-type iterator and add them to the 1202 * scatterlist. This can deal with vmalloc'd buffers as well as kmalloc'd or 1203 * static buffers. The pages are not pinned. 1204 */ 1205 static ssize_t extract_kvec_to_sg(struct iov_iter *iter, 1206 ssize_t maxsize, 1207 struct sg_table *sgtable, 1208 unsigned int sg_max, 1209 iov_iter_extraction_t extraction_flags) 1210 { 1211 const struct kvec *kv = iter->kvec; 1212 struct scatterlist *sg = sgtable->sgl + sgtable->nents; 1213 unsigned long start = iter->iov_offset; 1214 unsigned int i; 1215 ssize_t ret = 0; 1216 1217 for (i = 0; i < iter->nr_segs; i++) { 1218 struct page *page; 1219 unsigned long kaddr; 1220 size_t off, len, seg; 1221 1222 len = kv[i].iov_len; 1223 if (start >= len) { 1224 start -= len; 1225 continue; 1226 } 1227 1228 kaddr = (unsigned long)kv[i].iov_base + start; 1229 off = kaddr & ~PAGE_MASK; 1230 len = min_t(size_t, maxsize, len - start); 1231 kaddr &= PAGE_MASK; 1232 1233 maxsize -= len; 1234 ret += len; 1235 do { 1236 seg = min_t(size_t, len, PAGE_SIZE - off); 1237 if (is_vmalloc_or_module_addr((void *)kaddr)) 1238 page = vmalloc_to_page((void *)kaddr); 1239 else 1240 page = virt_to_page((void *)kaddr); 1241 1242 sg_set_page(sg, page, len, off); 1243 sgtable->nents++; 1244 sg++; 1245 sg_max--; 1246 1247 len -= seg; 1248 kaddr += PAGE_SIZE; 1249 off = 0; 1250 } while (len > 0 && sg_max > 0); 1251 1252 if (maxsize <= 0 || sg_max == 0) 1253 break; 1254 start = 0; 1255 } 1256 1257 if (ret > 0) 1258 iov_iter_advance(iter, ret); 1259 return ret; 1260 } 1261 1262 /* 1263 * Extract up to sg_max folios from an XARRAY-type iterator and add them to 1264 * the scatterlist. The pages are not pinned. 1265 */ 1266 static ssize_t extract_xarray_to_sg(struct iov_iter *iter, 1267 ssize_t maxsize, 1268 struct sg_table *sgtable, 1269 unsigned int sg_max, 1270 iov_iter_extraction_t extraction_flags) 1271 { 1272 struct scatterlist *sg = sgtable->sgl + sgtable->nents; 1273 struct xarray *xa = iter->xarray; 1274 struct folio *folio; 1275 loff_t start = iter->xarray_start + iter->iov_offset; 1276 pgoff_t index = start / PAGE_SIZE; 1277 ssize_t ret = 0; 1278 size_t offset, len; 1279 XA_STATE(xas, xa, index); 1280 1281 rcu_read_lock(); 1282 1283 xas_for_each(&xas, folio, ULONG_MAX) { 1284 if (xas_retry(&xas, folio)) 1285 continue; 1286 if (WARN_ON(xa_is_value(folio))) 1287 break; 1288 if (WARN_ON(folio_test_hugetlb(folio))) 1289 break; 1290 1291 offset = offset_in_folio(folio, start); 1292 len = min_t(size_t, maxsize, folio_size(folio) - offset); 1293 1294 sg_set_page(sg, folio_page(folio, 0), len, offset); 1295 sgtable->nents++; 1296 sg++; 1297 sg_max--; 1298 1299 maxsize -= len; 1300 ret += len; 1301 if (maxsize <= 0 || sg_max == 0) 1302 break; 1303 } 1304 1305 rcu_read_unlock(); 1306 if (ret > 0) 1307 iov_iter_advance(iter, ret); 1308 return ret; 1309 } 1310 1311 /** 1312 * extract_iter_to_sg - Extract pages from an iterator and add to an sglist 1313 * @iter: The iterator to extract from 1314 * @maxsize: The amount of iterator to copy 1315 * @sgtable: The scatterlist table to fill in 1316 * @sg_max: Maximum number of elements in @sgtable that may be filled 1317 * @extraction_flags: Flags to qualify the request 1318 * 1319 * Extract the page fragments from the given amount of the source iterator and 1320 * add them to a scatterlist that refers to all of those bits, to a maximum 1321 * addition of @sg_max elements. 1322 * 1323 * The pages referred to by UBUF- and IOVEC-type iterators are extracted and 1324 * pinned; BVEC-, KVEC- and XARRAY-type are extracted but aren't pinned; PIPE- 1325 * and DISCARD-type are not supported. 1326 * 1327 * No end mark is placed on the scatterlist; that's left to the caller. 1328 * 1329 * @extraction_flags can have ITER_ALLOW_P2PDMA set to request peer-to-peer DMA 1330 * be allowed on the pages extracted. 1331 * 1332 * If successful, @sgtable->nents is updated to include the number of elements 1333 * added and the number of bytes added is returned. @sgtable->orig_nents is 1334 * left unaltered. 1335 * 1336 * The iov_iter_extract_mode() function should be used to query how cleanup 1337 * should be performed. 1338 */ 1339 ssize_t extract_iter_to_sg(struct iov_iter *iter, size_t maxsize, 1340 struct sg_table *sgtable, unsigned int sg_max, 1341 iov_iter_extraction_t extraction_flags) 1342 { 1343 if (maxsize == 0) 1344 return 0; 1345 1346 switch (iov_iter_type(iter)) { 1347 case ITER_UBUF: 1348 case ITER_IOVEC: 1349 return extract_user_to_sg(iter, maxsize, sgtable, sg_max, 1350 extraction_flags); 1351 case ITER_BVEC: 1352 return extract_bvec_to_sg(iter, maxsize, sgtable, sg_max, 1353 extraction_flags); 1354 case ITER_KVEC: 1355 return extract_kvec_to_sg(iter, maxsize, sgtable, sg_max, 1356 extraction_flags); 1357 case ITER_XARRAY: 1358 return extract_xarray_to_sg(iter, maxsize, sgtable, sg_max, 1359 extraction_flags); 1360 default: 1361 pr_err("%s(%u) unsupported\n", __func__, iov_iter_type(iter)); 1362 WARN_ON_ONCE(1); 1363 return -EIO; 1364 } 1365 } 1366 EXPORT_SYMBOL_GPL(extract_iter_to_sg); 1367