1 /* 2 * linux/fs/nfs/pagelist.c 3 * 4 * A set of helper functions for managing NFS read and write requests. 5 * The main purpose of these routines is to provide support for the 6 * coalescing of several requests into a single RPC call. 7 * 8 * Copyright 2000, 2001 (c) Trond Myklebust <trond.myklebust@fys.uio.no> 9 * 10 */ 11 12 #include <linux/slab.h> 13 #include <linux/file.h> 14 #include <linux/sched.h> 15 #include <linux/sunrpc/clnt.h> 16 #include <linux/nfs3.h> 17 #include <linux/nfs4.h> 18 #include <linux/nfs_page.h> 19 #include <linux/nfs_fs.h> 20 #include <linux/nfs_mount.h> 21 22 #include "internal.h" 23 #include "pnfs.h" 24 25 static struct kmem_cache *nfs_page_cachep; 26 27 static inline struct nfs_page * 28 nfs_page_alloc(void) 29 { 30 struct nfs_page *p = kmem_cache_zalloc(nfs_page_cachep, GFP_KERNEL); 31 if (p) 32 INIT_LIST_HEAD(&p->wb_list); 33 return p; 34 } 35 36 static inline void 37 nfs_page_free(struct nfs_page *p) 38 { 39 kmem_cache_free(nfs_page_cachep, p); 40 } 41 42 /** 43 * nfs_create_request - Create an NFS read/write request. 44 * @file: file descriptor to use 45 * @inode: inode to which the request is attached 46 * @page: page to write 47 * @offset: starting offset within the page for the write 48 * @count: number of bytes to read/write 49 * 50 * The page must be locked by the caller. This makes sure we never 51 * create two different requests for the same page. 52 * User should ensure it is safe to sleep in this function. 53 */ 54 struct nfs_page * 55 nfs_create_request(struct nfs_open_context *ctx, struct inode *inode, 56 struct page *page, 57 unsigned int offset, unsigned int count) 58 { 59 struct nfs_page *req; 60 61 /* try to allocate the request struct */ 62 req = nfs_page_alloc(); 63 if (req == NULL) 64 return ERR_PTR(-ENOMEM); 65 66 /* get lock context early so we can deal with alloc failures */ 67 req->wb_lock_context = nfs_get_lock_context(ctx); 68 if (req->wb_lock_context == NULL) { 69 nfs_page_free(req); 70 return ERR_PTR(-ENOMEM); 71 } 72 73 /* Initialize the request struct. Initially, we assume a 74 * long write-back delay. This will be adjusted in 75 * update_nfs_request below if the region is not locked. */ 76 req->wb_page = page; 77 atomic_set(&req->wb_complete, 0); 78 req->wb_index = page->index; 79 page_cache_get(page); 80 BUG_ON(PagePrivate(page)); 81 BUG_ON(!PageLocked(page)); 82 BUG_ON(page->mapping->host != inode); 83 req->wb_offset = offset; 84 req->wb_pgbase = offset; 85 req->wb_bytes = count; 86 req->wb_context = get_nfs_open_context(ctx); 87 kref_init(&req->wb_kref); 88 return req; 89 } 90 91 /** 92 * nfs_unlock_request - Unlock request and wake up sleepers. 93 * @req: 94 */ 95 void nfs_unlock_request(struct nfs_page *req) 96 { 97 if (!NFS_WBACK_BUSY(req)) { 98 printk(KERN_ERR "NFS: Invalid unlock attempted\n"); 99 BUG(); 100 } 101 smp_mb__before_clear_bit(); 102 clear_bit(PG_BUSY, &req->wb_flags); 103 smp_mb__after_clear_bit(); 104 wake_up_bit(&req->wb_flags, PG_BUSY); 105 nfs_release_request(req); 106 } 107 108 /** 109 * nfs_set_page_tag_locked - Tag a request as locked 110 * @req: 111 */ 112 int nfs_set_page_tag_locked(struct nfs_page *req) 113 { 114 if (!nfs_lock_request_dontget(req)) 115 return 0; 116 if (test_bit(PG_MAPPED, &req->wb_flags)) 117 radix_tree_tag_set(&NFS_I(req->wb_context->dentry->d_inode)->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED); 118 return 1; 119 } 120 121 /** 122 * nfs_clear_page_tag_locked - Clear request tag and wake up sleepers 123 */ 124 void nfs_clear_page_tag_locked(struct nfs_page *req) 125 { 126 if (test_bit(PG_MAPPED, &req->wb_flags)) { 127 struct inode *inode = req->wb_context->dentry->d_inode; 128 struct nfs_inode *nfsi = NFS_I(inode); 129 130 spin_lock(&inode->i_lock); 131 radix_tree_tag_clear(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED); 132 nfs_unlock_request(req); 133 spin_unlock(&inode->i_lock); 134 } else 135 nfs_unlock_request(req); 136 } 137 138 /* 139 * nfs_clear_request - Free up all resources allocated to the request 140 * @req: 141 * 142 * Release page and open context resources associated with a read/write 143 * request after it has completed. 144 */ 145 static void nfs_clear_request(struct nfs_page *req) 146 { 147 struct page *page = req->wb_page; 148 struct nfs_open_context *ctx = req->wb_context; 149 struct nfs_lock_context *l_ctx = req->wb_lock_context; 150 151 if (page != NULL) { 152 page_cache_release(page); 153 req->wb_page = NULL; 154 } 155 if (l_ctx != NULL) { 156 nfs_put_lock_context(l_ctx); 157 req->wb_lock_context = NULL; 158 } 159 if (ctx != NULL) { 160 put_nfs_open_context(ctx); 161 req->wb_context = NULL; 162 } 163 } 164 165 166 /** 167 * nfs_release_request - Release the count on an NFS read/write request 168 * @req: request to release 169 * 170 * Note: Should never be called with the spinlock held! 171 */ 172 static void nfs_free_request(struct kref *kref) 173 { 174 struct nfs_page *req = container_of(kref, struct nfs_page, wb_kref); 175 176 /* Release struct file and open context */ 177 nfs_clear_request(req); 178 nfs_page_free(req); 179 } 180 181 void nfs_release_request(struct nfs_page *req) 182 { 183 kref_put(&req->wb_kref, nfs_free_request); 184 } 185 186 static int nfs_wait_bit_uninterruptible(void *word) 187 { 188 io_schedule(); 189 return 0; 190 } 191 192 /** 193 * nfs_wait_on_request - Wait for a request to complete. 194 * @req: request to wait upon. 195 * 196 * Interruptible by fatal signals only. 197 * The user is responsible for holding a count on the request. 198 */ 199 int 200 nfs_wait_on_request(struct nfs_page *req) 201 { 202 return wait_on_bit(&req->wb_flags, PG_BUSY, 203 nfs_wait_bit_uninterruptible, 204 TASK_UNINTERRUPTIBLE); 205 } 206 207 bool nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, struct nfs_page *prev, struct nfs_page *req) 208 { 209 /* 210 * FIXME: ideally we should be able to coalesce all requests 211 * that are not block boundary aligned, but currently this 212 * is problematic for the case of bsize < PAGE_CACHE_SIZE, 213 * since nfs_flush_multi and nfs_pagein_multi assume you 214 * can have only one struct nfs_page. 215 */ 216 if (desc->pg_bsize < PAGE_SIZE) 217 return 0; 218 219 return desc->pg_count + req->wb_bytes <= desc->pg_bsize; 220 } 221 EXPORT_SYMBOL_GPL(nfs_generic_pg_test); 222 223 /** 224 * nfs_pageio_init - initialise a page io descriptor 225 * @desc: pointer to descriptor 226 * @inode: pointer to inode 227 * @doio: pointer to io function 228 * @bsize: io block size 229 * @io_flags: extra parameters for the io function 230 */ 231 void nfs_pageio_init(struct nfs_pageio_descriptor *desc, 232 struct inode *inode, 233 const struct nfs_pageio_ops *pg_ops, 234 size_t bsize, 235 int io_flags) 236 { 237 INIT_LIST_HEAD(&desc->pg_list); 238 desc->pg_bytes_written = 0; 239 desc->pg_count = 0; 240 desc->pg_bsize = bsize; 241 desc->pg_base = 0; 242 desc->pg_moreio = 0; 243 desc->pg_recoalesce = 0; 244 desc->pg_inode = inode; 245 desc->pg_ops = pg_ops; 246 desc->pg_ioflags = io_flags; 247 desc->pg_error = 0; 248 desc->pg_lseg = NULL; 249 } 250 251 /** 252 * nfs_can_coalesce_requests - test two requests for compatibility 253 * @prev: pointer to nfs_page 254 * @req: pointer to nfs_page 255 * 256 * The nfs_page structures 'prev' and 'req' are compared to ensure that the 257 * page data area they describe is contiguous, and that their RPC 258 * credentials, NFSv4 open state, and lockowners are the same. 259 * 260 * Return 'true' if this is the case, else return 'false'. 261 */ 262 static bool nfs_can_coalesce_requests(struct nfs_page *prev, 263 struct nfs_page *req, 264 struct nfs_pageio_descriptor *pgio) 265 { 266 if (req->wb_context->cred != prev->wb_context->cred) 267 return false; 268 if (req->wb_lock_context->lockowner != prev->wb_lock_context->lockowner) 269 return false; 270 if (req->wb_context->state != prev->wb_context->state) 271 return false; 272 if (req->wb_index != (prev->wb_index + 1)) 273 return false; 274 if (req->wb_pgbase != 0) 275 return false; 276 if (prev->wb_pgbase + prev->wb_bytes != PAGE_CACHE_SIZE) 277 return false; 278 return pgio->pg_ops->pg_test(pgio, prev, req); 279 } 280 281 /** 282 * nfs_pageio_do_add_request - Attempt to coalesce a request into a page list. 283 * @desc: destination io descriptor 284 * @req: request 285 * 286 * Returns true if the request 'req' was successfully coalesced into the 287 * existing list of pages 'desc'. 288 */ 289 static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc, 290 struct nfs_page *req) 291 { 292 if (desc->pg_count != 0) { 293 struct nfs_page *prev; 294 295 prev = nfs_list_entry(desc->pg_list.prev); 296 if (!nfs_can_coalesce_requests(prev, req, desc)) 297 return 0; 298 } else { 299 if (desc->pg_ops->pg_init) 300 desc->pg_ops->pg_init(desc, req); 301 desc->pg_base = req->wb_pgbase; 302 } 303 nfs_list_remove_request(req); 304 nfs_list_add_request(req, &desc->pg_list); 305 desc->pg_count += req->wb_bytes; 306 return 1; 307 } 308 309 /* 310 * Helper for nfs_pageio_add_request and nfs_pageio_complete 311 */ 312 static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc) 313 { 314 if (!list_empty(&desc->pg_list)) { 315 int error = desc->pg_ops->pg_doio(desc); 316 if (error < 0) 317 desc->pg_error = error; 318 else 319 desc->pg_bytes_written += desc->pg_count; 320 } 321 if (list_empty(&desc->pg_list)) { 322 desc->pg_count = 0; 323 desc->pg_base = 0; 324 } 325 } 326 327 /** 328 * nfs_pageio_add_request - Attempt to coalesce a request into a page list. 329 * @desc: destination io descriptor 330 * @req: request 331 * 332 * Returns true if the request 'req' was successfully coalesced into the 333 * existing list of pages 'desc'. 334 */ 335 static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, 336 struct nfs_page *req) 337 { 338 while (!nfs_pageio_do_add_request(desc, req)) { 339 desc->pg_moreio = 1; 340 nfs_pageio_doio(desc); 341 if (desc->pg_error < 0) 342 return 0; 343 desc->pg_moreio = 0; 344 if (desc->pg_recoalesce) 345 return 0; 346 } 347 return 1; 348 } 349 350 static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc) 351 { 352 LIST_HEAD(head); 353 354 do { 355 list_splice_init(&desc->pg_list, &head); 356 desc->pg_bytes_written -= desc->pg_count; 357 desc->pg_count = 0; 358 desc->pg_base = 0; 359 desc->pg_recoalesce = 0; 360 361 while (!list_empty(&head)) { 362 struct nfs_page *req; 363 364 req = list_first_entry(&head, struct nfs_page, wb_list); 365 nfs_list_remove_request(req); 366 if (__nfs_pageio_add_request(desc, req)) 367 continue; 368 if (desc->pg_error < 0) 369 return 0; 370 break; 371 } 372 } while (desc->pg_recoalesce); 373 return 1; 374 } 375 376 int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, 377 struct nfs_page *req) 378 { 379 int ret; 380 381 do { 382 ret = __nfs_pageio_add_request(desc, req); 383 if (ret) 384 break; 385 if (desc->pg_error < 0) 386 break; 387 ret = nfs_do_recoalesce(desc); 388 } while (ret); 389 return ret; 390 } 391 392 /** 393 * nfs_pageio_complete - Complete I/O on an nfs_pageio_descriptor 394 * @desc: pointer to io descriptor 395 */ 396 void nfs_pageio_complete(struct nfs_pageio_descriptor *desc) 397 { 398 for (;;) { 399 nfs_pageio_doio(desc); 400 if (!desc->pg_recoalesce) 401 break; 402 if (!nfs_do_recoalesce(desc)) 403 break; 404 } 405 } 406 407 /** 408 * nfs_pageio_cond_complete - Conditional I/O completion 409 * @desc: pointer to io descriptor 410 * @index: page index 411 * 412 * It is important to ensure that processes don't try to take locks 413 * on non-contiguous ranges of pages as that might deadlock. This 414 * function should be called before attempting to wait on a locked 415 * nfs_page. It will complete the I/O if the page index 'index' 416 * is not contiguous with the existing list of pages in 'desc'. 417 */ 418 void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *desc, pgoff_t index) 419 { 420 if (!list_empty(&desc->pg_list)) { 421 struct nfs_page *prev = nfs_list_entry(desc->pg_list.prev); 422 if (index != prev->wb_index + 1) 423 nfs_pageio_complete(desc); 424 } 425 } 426 427 #define NFS_SCAN_MAXENTRIES 16 428 /** 429 * nfs_scan_list - Scan a list for matching requests 430 * @nfsi: NFS inode 431 * @dst: Destination list 432 * @idx_start: lower bound of page->index to scan 433 * @npages: idx_start + npages sets the upper bound to scan. 434 * @tag: tag to scan for 435 * 436 * Moves elements from one of the inode request lists. 437 * If the number of requests is set to 0, the entire address_space 438 * starting at index idx_start, is scanned. 439 * The requests are *not* checked to ensure that they form a contiguous set. 440 * You must be holding the inode's i_lock when calling this function 441 */ 442 int nfs_scan_list(struct nfs_inode *nfsi, 443 struct list_head *dst, pgoff_t idx_start, 444 unsigned int npages, int tag) 445 { 446 struct nfs_page *pgvec[NFS_SCAN_MAXENTRIES]; 447 struct nfs_page *req; 448 pgoff_t idx_end; 449 int found, i; 450 int res; 451 struct list_head *list; 452 453 res = 0; 454 if (npages == 0) 455 idx_end = ~0; 456 else 457 idx_end = idx_start + npages - 1; 458 459 for (;;) { 460 found = radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree, 461 (void **)&pgvec[0], idx_start, 462 NFS_SCAN_MAXENTRIES, tag); 463 if (found <= 0) 464 break; 465 for (i = 0; i < found; i++) { 466 req = pgvec[i]; 467 if (req->wb_index > idx_end) 468 goto out; 469 idx_start = req->wb_index + 1; 470 if (nfs_set_page_tag_locked(req)) { 471 kref_get(&req->wb_kref); 472 radix_tree_tag_clear(&nfsi->nfs_page_tree, 473 req->wb_index, tag); 474 list = pnfs_choose_commit_list(req, dst); 475 nfs_list_add_request(req, list); 476 res++; 477 if (res == INT_MAX) 478 goto out; 479 } 480 } 481 /* for latency reduction */ 482 cond_resched_lock(&nfsi->vfs_inode.i_lock); 483 } 484 out: 485 return res; 486 } 487 488 int __init nfs_init_nfspagecache(void) 489 { 490 nfs_page_cachep = kmem_cache_create("nfs_page", 491 sizeof(struct nfs_page), 492 0, SLAB_HWCACHE_ALIGN, 493 NULL); 494 if (nfs_page_cachep == NULL) 495 return -ENOMEM; 496 497 return 0; 498 } 499 500 void nfs_destroy_nfspagecache(void) 501 { 502 kmem_cache_destroy(nfs_page_cachep); 503 } 504 505