1 /* 2 * linux/fs/nfs/pagelist.c 3 * 4 * A set of helper functions for managing NFS read and write requests. 5 * The main purpose of these routines is to provide support for the 6 * coalescing of several requests into a single RPC call. 7 * 8 * Copyright 2000, 2001 (c) Trond Myklebust <trond.myklebust@fys.uio.no> 9 * 10 */ 11 12 #include <linux/slab.h> 13 #include <linux/file.h> 14 #include <linux/sched.h> 15 #include <linux/sunrpc/clnt.h> 16 #include <linux/nfs3.h> 17 #include <linux/nfs4.h> 18 #include <linux/nfs_page.h> 19 #include <linux/nfs_fs.h> 20 #include <linux/nfs_mount.h> 21 22 #include "internal.h" 23 #include "pnfs.h" 24 25 static struct kmem_cache *nfs_page_cachep; 26 27 static inline struct nfs_page * 28 nfs_page_alloc(void) 29 { 30 struct nfs_page *p = kmem_cache_zalloc(nfs_page_cachep, GFP_KERNEL); 31 if (p) 32 INIT_LIST_HEAD(&p->wb_list); 33 return p; 34 } 35 36 static inline void 37 nfs_page_free(struct nfs_page *p) 38 { 39 kmem_cache_free(nfs_page_cachep, p); 40 } 41 42 /** 43 * nfs_create_request - Create an NFS read/write request. 44 * @file: file descriptor to use 45 * @inode: inode to which the request is attached 46 * @page: page to write 47 * @offset: starting offset within the page for the write 48 * @count: number of bytes to read/write 49 * 50 * The page must be locked by the caller. This makes sure we never 51 * create two different requests for the same page. 52 * User should ensure it is safe to sleep in this function. 53 */ 54 struct nfs_page * 55 nfs_create_request(struct nfs_open_context *ctx, struct inode *inode, 56 struct page *page, 57 unsigned int offset, unsigned int count) 58 { 59 struct nfs_page *req; 60 61 /* try to allocate the request struct */ 62 req = nfs_page_alloc(); 63 if (req == NULL) 64 return ERR_PTR(-ENOMEM); 65 66 /* get lock context early so we can deal with alloc failures */ 67 req->wb_lock_context = nfs_get_lock_context(ctx); 68 if (req->wb_lock_context == NULL) { 69 nfs_page_free(req); 70 return ERR_PTR(-ENOMEM); 71 } 72 73 /* Initialize the request struct. Initially, we assume a 74 * long write-back delay. This will be adjusted in 75 * update_nfs_request below if the region is not locked. */ 76 req->wb_page = page; 77 atomic_set(&req->wb_complete, 0); 78 req->wb_index = page->index; 79 page_cache_get(page); 80 BUG_ON(PagePrivate(page)); 81 BUG_ON(!PageLocked(page)); 82 BUG_ON(page->mapping->host != inode); 83 req->wb_offset = offset; 84 req->wb_pgbase = offset; 85 req->wb_bytes = count; 86 req->wb_context = get_nfs_open_context(ctx); 87 kref_init(&req->wb_kref); 88 return req; 89 } 90 91 /** 92 * nfs_unlock_request - Unlock request and wake up sleepers. 93 * @req: 94 */ 95 void nfs_unlock_request(struct nfs_page *req) 96 { 97 if (!NFS_WBACK_BUSY(req)) { 98 printk(KERN_ERR "NFS: Invalid unlock attempted\n"); 99 BUG(); 100 } 101 smp_mb__before_clear_bit(); 102 clear_bit(PG_BUSY, &req->wb_flags); 103 smp_mb__after_clear_bit(); 104 wake_up_bit(&req->wb_flags, PG_BUSY); 105 nfs_release_request(req); 106 } 107 108 /** 109 * nfs_set_page_tag_locked - Tag a request as locked 110 * @req: 111 */ 112 int nfs_set_page_tag_locked(struct nfs_page *req) 113 { 114 if (!nfs_lock_request_dontget(req)) 115 return 0; 116 if (test_bit(PG_MAPPED, &req->wb_flags)) 117 radix_tree_tag_set(&NFS_I(req->wb_context->dentry->d_inode)->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED); 118 return 1; 119 } 120 121 /** 122 * nfs_clear_page_tag_locked - Clear request tag and wake up sleepers 123 */ 124 void nfs_clear_page_tag_locked(struct nfs_page *req) 125 { 126 if (test_bit(PG_MAPPED, &req->wb_flags)) { 127 struct inode *inode = req->wb_context->dentry->d_inode; 128 struct nfs_inode *nfsi = NFS_I(inode); 129 130 spin_lock(&inode->i_lock); 131 radix_tree_tag_clear(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED); 132 nfs_unlock_request(req); 133 spin_unlock(&inode->i_lock); 134 } else 135 nfs_unlock_request(req); 136 } 137 138 /* 139 * nfs_clear_request - Free up all resources allocated to the request 140 * @req: 141 * 142 * Release page and open context resources associated with a read/write 143 * request after it has completed. 144 */ 145 static void nfs_clear_request(struct nfs_page *req) 146 { 147 struct page *page = req->wb_page; 148 struct nfs_open_context *ctx = req->wb_context; 149 struct nfs_lock_context *l_ctx = req->wb_lock_context; 150 151 if (page != NULL) { 152 page_cache_release(page); 153 req->wb_page = NULL; 154 } 155 if (l_ctx != NULL) { 156 nfs_put_lock_context(l_ctx); 157 req->wb_lock_context = NULL; 158 } 159 if (ctx != NULL) { 160 put_nfs_open_context(ctx); 161 req->wb_context = NULL; 162 } 163 } 164 165 166 /** 167 * nfs_release_request - Release the count on an NFS read/write request 168 * @req: request to release 169 * 170 * Note: Should never be called with the spinlock held! 171 */ 172 static void nfs_free_request(struct kref *kref) 173 { 174 struct nfs_page *req = container_of(kref, struct nfs_page, wb_kref); 175 176 /* Release struct file and open context */ 177 nfs_clear_request(req); 178 nfs_page_free(req); 179 } 180 181 void nfs_release_request(struct nfs_page *req) 182 { 183 kref_put(&req->wb_kref, nfs_free_request); 184 } 185 186 static int nfs_wait_bit_uninterruptible(void *word) 187 { 188 io_schedule(); 189 return 0; 190 } 191 192 /** 193 * nfs_wait_on_request - Wait for a request to complete. 194 * @req: request to wait upon. 195 * 196 * Interruptible by fatal signals only. 197 * The user is responsible for holding a count on the request. 198 */ 199 int 200 nfs_wait_on_request(struct nfs_page *req) 201 { 202 return wait_on_bit(&req->wb_flags, PG_BUSY, 203 nfs_wait_bit_uninterruptible, 204 TASK_UNINTERRUPTIBLE); 205 } 206 207 bool nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, struct nfs_page *prev, struct nfs_page *req) 208 { 209 /* 210 * FIXME: ideally we should be able to coalesce all requests 211 * that are not block boundary aligned, but currently this 212 * is problematic for the case of bsize < PAGE_CACHE_SIZE, 213 * since nfs_flush_multi and nfs_pagein_multi assume you 214 * can have only one struct nfs_page. 215 */ 216 if (desc->pg_bsize < PAGE_SIZE) 217 return 0; 218 219 return desc->pg_count + req->wb_bytes <= desc->pg_bsize; 220 } 221 EXPORT_SYMBOL_GPL(nfs_generic_pg_test); 222 223 /** 224 * nfs_pageio_init - initialise a page io descriptor 225 * @desc: pointer to descriptor 226 * @inode: pointer to inode 227 * @doio: pointer to io function 228 * @bsize: io block size 229 * @io_flags: extra parameters for the io function 230 */ 231 void nfs_pageio_init(struct nfs_pageio_descriptor *desc, 232 struct inode *inode, 233 int (*doio)(struct nfs_pageio_descriptor *), 234 size_t bsize, 235 int io_flags) 236 { 237 INIT_LIST_HEAD(&desc->pg_list); 238 desc->pg_bytes_written = 0; 239 desc->pg_count = 0; 240 desc->pg_bsize = bsize; 241 desc->pg_base = 0; 242 desc->pg_moreio = 0; 243 desc->pg_inode = inode; 244 desc->pg_doio = doio; 245 desc->pg_ioflags = io_flags; 246 desc->pg_error = 0; 247 desc->pg_lseg = NULL; 248 desc->pg_test = nfs_generic_pg_test; 249 pnfs_pageio_init(desc, inode); 250 } 251 252 /** 253 * nfs_can_coalesce_requests - test two requests for compatibility 254 * @prev: pointer to nfs_page 255 * @req: pointer to nfs_page 256 * 257 * The nfs_page structures 'prev' and 'req' are compared to ensure that the 258 * page data area they describe is contiguous, and that their RPC 259 * credentials, NFSv4 open state, and lockowners are the same. 260 * 261 * Return 'true' if this is the case, else return 'false'. 262 */ 263 static bool nfs_can_coalesce_requests(struct nfs_page *prev, 264 struct nfs_page *req, 265 struct nfs_pageio_descriptor *pgio) 266 { 267 if (req->wb_context->cred != prev->wb_context->cred) 268 return false; 269 if (req->wb_lock_context->lockowner != prev->wb_lock_context->lockowner) 270 return false; 271 if (req->wb_context->state != prev->wb_context->state) 272 return false; 273 if (req->wb_index != (prev->wb_index + 1)) 274 return false; 275 if (req->wb_pgbase != 0) 276 return false; 277 if (prev->wb_pgbase + prev->wb_bytes != PAGE_CACHE_SIZE) 278 return false; 279 return pgio->pg_test(pgio, prev, req); 280 } 281 282 /** 283 * nfs_pageio_do_add_request - Attempt to coalesce a request into a page list. 284 * @desc: destination io descriptor 285 * @req: request 286 * 287 * Returns true if the request 'req' was successfully coalesced into the 288 * existing list of pages 'desc'. 289 */ 290 static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc, 291 struct nfs_page *req) 292 { 293 if (desc->pg_count != 0) { 294 struct nfs_page *prev; 295 296 prev = nfs_list_entry(desc->pg_list.prev); 297 if (!nfs_can_coalesce_requests(prev, req, desc)) 298 return 0; 299 } else { 300 desc->pg_base = req->wb_pgbase; 301 } 302 nfs_list_remove_request(req); 303 nfs_list_add_request(req, &desc->pg_list); 304 desc->pg_count += req->wb_bytes; 305 return 1; 306 } 307 308 /* 309 * Helper for nfs_pageio_add_request and nfs_pageio_complete 310 */ 311 static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc) 312 { 313 if (!list_empty(&desc->pg_list)) { 314 int error = desc->pg_doio(desc); 315 if (error < 0) 316 desc->pg_error = error; 317 else 318 desc->pg_bytes_written += desc->pg_count; 319 } 320 if (list_empty(&desc->pg_list)) { 321 desc->pg_count = 0; 322 desc->pg_base = 0; 323 } 324 } 325 326 /** 327 * nfs_pageio_add_request - Attempt to coalesce a request into a page list. 328 * @desc: destination io descriptor 329 * @req: request 330 * 331 * Returns true if the request 'req' was successfully coalesced into the 332 * existing list of pages 'desc'. 333 */ 334 int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, 335 struct nfs_page *req) 336 { 337 while (!nfs_pageio_do_add_request(desc, req)) { 338 desc->pg_moreio = 1; 339 nfs_pageio_doio(desc); 340 if (desc->pg_error < 0) 341 return 0; 342 desc->pg_moreio = 0; 343 } 344 return 1; 345 } 346 347 /** 348 * nfs_pageio_complete - Complete I/O on an nfs_pageio_descriptor 349 * @desc: pointer to io descriptor 350 */ 351 void nfs_pageio_complete(struct nfs_pageio_descriptor *desc) 352 { 353 nfs_pageio_doio(desc); 354 } 355 356 /** 357 * nfs_pageio_cond_complete - Conditional I/O completion 358 * @desc: pointer to io descriptor 359 * @index: page index 360 * 361 * It is important to ensure that processes don't try to take locks 362 * on non-contiguous ranges of pages as that might deadlock. This 363 * function should be called before attempting to wait on a locked 364 * nfs_page. It will complete the I/O if the page index 'index' 365 * is not contiguous with the existing list of pages in 'desc'. 366 */ 367 void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *desc, pgoff_t index) 368 { 369 if (!list_empty(&desc->pg_list)) { 370 struct nfs_page *prev = nfs_list_entry(desc->pg_list.prev); 371 if (index != prev->wb_index + 1) 372 nfs_pageio_doio(desc); 373 } 374 } 375 376 #define NFS_SCAN_MAXENTRIES 16 377 /** 378 * nfs_scan_list - Scan a list for matching requests 379 * @nfsi: NFS inode 380 * @dst: Destination list 381 * @idx_start: lower bound of page->index to scan 382 * @npages: idx_start + npages sets the upper bound to scan. 383 * @tag: tag to scan for 384 * 385 * Moves elements from one of the inode request lists. 386 * If the number of requests is set to 0, the entire address_space 387 * starting at index idx_start, is scanned. 388 * The requests are *not* checked to ensure that they form a contiguous set. 389 * You must be holding the inode's i_lock when calling this function 390 */ 391 int nfs_scan_list(struct nfs_inode *nfsi, 392 struct list_head *dst, pgoff_t idx_start, 393 unsigned int npages, int tag) 394 { 395 struct nfs_page *pgvec[NFS_SCAN_MAXENTRIES]; 396 struct nfs_page *req; 397 pgoff_t idx_end; 398 int found, i; 399 int res; 400 struct list_head *list; 401 402 res = 0; 403 if (npages == 0) 404 idx_end = ~0; 405 else 406 idx_end = idx_start + npages - 1; 407 408 for (;;) { 409 found = radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree, 410 (void **)&pgvec[0], idx_start, 411 NFS_SCAN_MAXENTRIES, tag); 412 if (found <= 0) 413 break; 414 for (i = 0; i < found; i++) { 415 req = pgvec[i]; 416 if (req->wb_index > idx_end) 417 goto out; 418 idx_start = req->wb_index + 1; 419 if (nfs_set_page_tag_locked(req)) { 420 kref_get(&req->wb_kref); 421 radix_tree_tag_clear(&nfsi->nfs_page_tree, 422 req->wb_index, tag); 423 list = pnfs_choose_commit_list(req, dst); 424 nfs_list_add_request(req, list); 425 res++; 426 if (res == INT_MAX) 427 goto out; 428 } 429 } 430 /* for latency reduction */ 431 cond_resched_lock(&nfsi->vfs_inode.i_lock); 432 } 433 out: 434 return res; 435 } 436 437 int __init nfs_init_nfspagecache(void) 438 { 439 nfs_page_cachep = kmem_cache_create("nfs_page", 440 sizeof(struct nfs_page), 441 0, SLAB_HWCACHE_ALIGN, 442 NULL); 443 if (nfs_page_cachep == NULL) 444 return -ENOMEM; 445 446 return 0; 447 } 448 449 void nfs_destroy_nfspagecache(void) 450 { 451 kmem_cache_destroy(nfs_page_cachep); 452 } 453 454