1 /* 2 * linux/fs/nfs/pagelist.c 3 * 4 * A set of helper functions for managing NFS read and write requests. 5 * The main purpose of these routines is to provide support for the 6 * coalescing of several requests into a single RPC call. 7 * 8 * Copyright 2000, 2001 (c) Trond Myklebust <trond.myklebust@fys.uio.no> 9 * 10 */ 11 12 #include <linux/slab.h> 13 #include <linux/file.h> 14 #include <linux/sched.h> 15 #include <linux/sunrpc/clnt.h> 16 #include <linux/nfs.h> 17 #include <linux/nfs3.h> 18 #include <linux/nfs4.h> 19 #include <linux/nfs_page.h> 20 #include <linux/nfs_fs.h> 21 #include <linux/nfs_mount.h> 22 #include <linux/export.h> 23 24 #include "internal.h" 25 #include "pnfs.h" 26 27 static struct kmem_cache *nfs_page_cachep; 28 29 bool nfs_pgarray_set(struct nfs_page_array *p, unsigned int pagecount) 30 { 31 p->npages = pagecount; 32 if (pagecount <= ARRAY_SIZE(p->page_array)) 33 p->pagevec = p->page_array; 34 else { 35 p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_KERNEL); 36 if (!p->pagevec) 37 p->npages = 0; 38 } 39 return p->pagevec != NULL; 40 } 41 42 void nfs_pgheader_init(struct nfs_pageio_descriptor *desc, 43 struct nfs_pgio_header *hdr, 44 void (*release)(struct nfs_pgio_header *hdr)) 45 { 46 hdr->req = nfs_list_entry(desc->pg_list.next); 47 hdr->inode = desc->pg_inode; 48 hdr->cred = hdr->req->wb_context->cred; 49 hdr->io_start = req_offset(hdr->req); 50 hdr->good_bytes = desc->pg_count; 51 hdr->dreq = desc->pg_dreq; 52 hdr->layout_private = desc->pg_layout_private; 53 hdr->release = release; 54 hdr->completion_ops = desc->pg_completion_ops; 55 if (hdr->completion_ops->init_hdr) 56 hdr->completion_ops->init_hdr(hdr); 57 } 58 EXPORT_SYMBOL_GPL(nfs_pgheader_init); 59 60 void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos) 61 { 62 spin_lock(&hdr->lock); 63 if (pos < hdr->io_start + hdr->good_bytes) { 64 set_bit(NFS_IOHDR_ERROR, &hdr->flags); 65 clear_bit(NFS_IOHDR_EOF, &hdr->flags); 66 hdr->good_bytes = pos - hdr->io_start; 67 hdr->error = error; 68 } 69 spin_unlock(&hdr->lock); 70 } 71 72 static inline struct nfs_page * 73 nfs_page_alloc(void) 74 { 75 struct nfs_page *p = kmem_cache_zalloc(nfs_page_cachep, GFP_NOIO); 76 if (p) 77 INIT_LIST_HEAD(&p->wb_list); 78 return p; 79 } 80 81 static inline void 82 nfs_page_free(struct nfs_page *p) 83 { 84 kmem_cache_free(nfs_page_cachep, p); 85 } 86 87 /** 88 * nfs_create_request - Create an NFS read/write request. 89 * @ctx: open context to use 90 * @inode: inode to which the request is attached 91 * @page: page to write 92 * @offset: starting offset within the page for the write 93 * @count: number of bytes to read/write 94 * 95 * The page must be locked by the caller. This makes sure we never 96 * create two different requests for the same page. 97 * User should ensure it is safe to sleep in this function. 98 */ 99 struct nfs_page * 100 nfs_create_request(struct nfs_open_context *ctx, struct inode *inode, 101 struct page *page, 102 unsigned int offset, unsigned int count) 103 { 104 struct nfs_page *req; 105 106 /* try to allocate the request struct */ 107 req = nfs_page_alloc(); 108 if (req == NULL) 109 return ERR_PTR(-ENOMEM); 110 111 /* get lock context early so we can deal with alloc failures */ 112 req->wb_lock_context = nfs_get_lock_context(ctx); 113 if (req->wb_lock_context == NULL) { 114 nfs_page_free(req); 115 return ERR_PTR(-ENOMEM); 116 } 117 118 /* Initialize the request struct. Initially, we assume a 119 * long write-back delay. This will be adjusted in 120 * update_nfs_request below if the region is not locked. */ 121 req->wb_page = page; 122 req->wb_index = page_file_index(page); 123 page_cache_get(page); 124 req->wb_offset = offset; 125 req->wb_pgbase = offset; 126 req->wb_bytes = count; 127 req->wb_context = get_nfs_open_context(ctx); 128 kref_init(&req->wb_kref); 129 return req; 130 } 131 132 /** 133 * nfs_unlock_request - Unlock request and wake up sleepers. 134 * @req: 135 */ 136 void nfs_unlock_request(struct nfs_page *req) 137 { 138 if (!NFS_WBACK_BUSY(req)) { 139 printk(KERN_ERR "NFS: Invalid unlock attempted\n"); 140 BUG(); 141 } 142 smp_mb__before_clear_bit(); 143 clear_bit(PG_BUSY, &req->wb_flags); 144 smp_mb__after_clear_bit(); 145 wake_up_bit(&req->wb_flags, PG_BUSY); 146 } 147 148 /** 149 * nfs_unlock_and_release_request - Unlock request and release the nfs_page 150 * @req: 151 */ 152 void nfs_unlock_and_release_request(struct nfs_page *req) 153 { 154 nfs_unlock_request(req); 155 nfs_release_request(req); 156 } 157 158 /* 159 * nfs_clear_request - Free up all resources allocated to the request 160 * @req: 161 * 162 * Release page and open context resources associated with a read/write 163 * request after it has completed. 164 */ 165 static void nfs_clear_request(struct nfs_page *req) 166 { 167 struct page *page = req->wb_page; 168 struct nfs_open_context *ctx = req->wb_context; 169 struct nfs_lock_context *l_ctx = req->wb_lock_context; 170 171 if (page != NULL) { 172 page_cache_release(page); 173 req->wb_page = NULL; 174 } 175 if (l_ctx != NULL) { 176 nfs_put_lock_context(l_ctx); 177 req->wb_lock_context = NULL; 178 } 179 if (ctx != NULL) { 180 put_nfs_open_context(ctx); 181 req->wb_context = NULL; 182 } 183 } 184 185 186 /** 187 * nfs_release_request - Release the count on an NFS read/write request 188 * @req: request to release 189 * 190 * Note: Should never be called with the spinlock held! 191 */ 192 static void nfs_free_request(struct kref *kref) 193 { 194 struct nfs_page *req = container_of(kref, struct nfs_page, wb_kref); 195 196 /* Release struct file and open context */ 197 nfs_clear_request(req); 198 nfs_page_free(req); 199 } 200 201 void nfs_release_request(struct nfs_page *req) 202 { 203 kref_put(&req->wb_kref, nfs_free_request); 204 } 205 206 static int nfs_wait_bit_uninterruptible(void *word) 207 { 208 io_schedule(); 209 return 0; 210 } 211 212 /** 213 * nfs_wait_on_request - Wait for a request to complete. 214 * @req: request to wait upon. 215 * 216 * Interruptible by fatal signals only. 217 * The user is responsible for holding a count on the request. 218 */ 219 int 220 nfs_wait_on_request(struct nfs_page *req) 221 { 222 return wait_on_bit(&req->wb_flags, PG_BUSY, 223 nfs_wait_bit_uninterruptible, 224 TASK_UNINTERRUPTIBLE); 225 } 226 227 bool nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, struct nfs_page *prev, struct nfs_page *req) 228 { 229 /* 230 * FIXME: ideally we should be able to coalesce all requests 231 * that are not block boundary aligned, but currently this 232 * is problematic for the case of bsize < PAGE_CACHE_SIZE, 233 * since nfs_flush_multi and nfs_pagein_multi assume you 234 * can have only one struct nfs_page. 235 */ 236 if (desc->pg_bsize < PAGE_SIZE) 237 return 0; 238 239 return desc->pg_count + req->wb_bytes <= desc->pg_bsize; 240 } 241 EXPORT_SYMBOL_GPL(nfs_generic_pg_test); 242 243 /** 244 * nfs_pageio_init - initialise a page io descriptor 245 * @desc: pointer to descriptor 246 * @inode: pointer to inode 247 * @doio: pointer to io function 248 * @bsize: io block size 249 * @io_flags: extra parameters for the io function 250 */ 251 void nfs_pageio_init(struct nfs_pageio_descriptor *desc, 252 struct inode *inode, 253 const struct nfs_pageio_ops *pg_ops, 254 const struct nfs_pgio_completion_ops *compl_ops, 255 size_t bsize, 256 int io_flags) 257 { 258 INIT_LIST_HEAD(&desc->pg_list); 259 desc->pg_bytes_written = 0; 260 desc->pg_count = 0; 261 desc->pg_bsize = bsize; 262 desc->pg_base = 0; 263 desc->pg_moreio = 0; 264 desc->pg_recoalesce = 0; 265 desc->pg_inode = inode; 266 desc->pg_ops = pg_ops; 267 desc->pg_completion_ops = compl_ops; 268 desc->pg_ioflags = io_flags; 269 desc->pg_error = 0; 270 desc->pg_lseg = NULL; 271 desc->pg_dreq = NULL; 272 desc->pg_layout_private = NULL; 273 } 274 EXPORT_SYMBOL_GPL(nfs_pageio_init); 275 276 /** 277 * nfs_can_coalesce_requests - test two requests for compatibility 278 * @prev: pointer to nfs_page 279 * @req: pointer to nfs_page 280 * 281 * The nfs_page structures 'prev' and 'req' are compared to ensure that the 282 * page data area they describe is contiguous, and that their RPC 283 * credentials, NFSv4 open state, and lockowners are the same. 284 * 285 * Return 'true' if this is the case, else return 'false'. 286 */ 287 static bool nfs_can_coalesce_requests(struct nfs_page *prev, 288 struct nfs_page *req, 289 struct nfs_pageio_descriptor *pgio) 290 { 291 if (req->wb_context->cred != prev->wb_context->cred) 292 return false; 293 if (req->wb_lock_context->lockowner != prev->wb_lock_context->lockowner) 294 return false; 295 if (req->wb_context->state != prev->wb_context->state) 296 return false; 297 if (req->wb_pgbase != 0) 298 return false; 299 if (prev->wb_pgbase + prev->wb_bytes != PAGE_CACHE_SIZE) 300 return false; 301 if (req_offset(req) != req_offset(prev) + prev->wb_bytes) 302 return false; 303 return pgio->pg_ops->pg_test(pgio, prev, req); 304 } 305 306 /** 307 * nfs_pageio_do_add_request - Attempt to coalesce a request into a page list. 308 * @desc: destination io descriptor 309 * @req: request 310 * 311 * Returns true if the request 'req' was successfully coalesced into the 312 * existing list of pages 'desc'. 313 */ 314 static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc, 315 struct nfs_page *req) 316 { 317 if (desc->pg_count != 0) { 318 struct nfs_page *prev; 319 320 prev = nfs_list_entry(desc->pg_list.prev); 321 if (!nfs_can_coalesce_requests(prev, req, desc)) 322 return 0; 323 } else { 324 if (desc->pg_ops->pg_init) 325 desc->pg_ops->pg_init(desc, req); 326 desc->pg_base = req->wb_pgbase; 327 } 328 nfs_list_remove_request(req); 329 nfs_list_add_request(req, &desc->pg_list); 330 desc->pg_count += req->wb_bytes; 331 return 1; 332 } 333 334 /* 335 * Helper for nfs_pageio_add_request and nfs_pageio_complete 336 */ 337 static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc) 338 { 339 if (!list_empty(&desc->pg_list)) { 340 int error = desc->pg_ops->pg_doio(desc); 341 if (error < 0) 342 desc->pg_error = error; 343 else 344 desc->pg_bytes_written += desc->pg_count; 345 } 346 if (list_empty(&desc->pg_list)) { 347 desc->pg_count = 0; 348 desc->pg_base = 0; 349 } 350 } 351 352 /** 353 * nfs_pageio_add_request - Attempt to coalesce a request into a page list. 354 * @desc: destination io descriptor 355 * @req: request 356 * 357 * Returns true if the request 'req' was successfully coalesced into the 358 * existing list of pages 'desc'. 359 */ 360 static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, 361 struct nfs_page *req) 362 { 363 while (!nfs_pageio_do_add_request(desc, req)) { 364 desc->pg_moreio = 1; 365 nfs_pageio_doio(desc); 366 if (desc->pg_error < 0) 367 return 0; 368 desc->pg_moreio = 0; 369 if (desc->pg_recoalesce) 370 return 0; 371 } 372 return 1; 373 } 374 375 static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc) 376 { 377 LIST_HEAD(head); 378 379 do { 380 list_splice_init(&desc->pg_list, &head); 381 desc->pg_bytes_written -= desc->pg_count; 382 desc->pg_count = 0; 383 desc->pg_base = 0; 384 desc->pg_recoalesce = 0; 385 386 while (!list_empty(&head)) { 387 struct nfs_page *req; 388 389 req = list_first_entry(&head, struct nfs_page, wb_list); 390 nfs_list_remove_request(req); 391 if (__nfs_pageio_add_request(desc, req)) 392 continue; 393 if (desc->pg_error < 0) 394 return 0; 395 break; 396 } 397 } while (desc->pg_recoalesce); 398 return 1; 399 } 400 401 int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, 402 struct nfs_page *req) 403 { 404 int ret; 405 406 do { 407 ret = __nfs_pageio_add_request(desc, req); 408 if (ret) 409 break; 410 if (desc->pg_error < 0) 411 break; 412 ret = nfs_do_recoalesce(desc); 413 } while (ret); 414 return ret; 415 } 416 EXPORT_SYMBOL_GPL(nfs_pageio_add_request); 417 418 /** 419 * nfs_pageio_complete - Complete I/O on an nfs_pageio_descriptor 420 * @desc: pointer to io descriptor 421 */ 422 void nfs_pageio_complete(struct nfs_pageio_descriptor *desc) 423 { 424 for (;;) { 425 nfs_pageio_doio(desc); 426 if (!desc->pg_recoalesce) 427 break; 428 if (!nfs_do_recoalesce(desc)) 429 break; 430 } 431 } 432 EXPORT_SYMBOL_GPL(nfs_pageio_complete); 433 434 /** 435 * nfs_pageio_cond_complete - Conditional I/O completion 436 * @desc: pointer to io descriptor 437 * @index: page index 438 * 439 * It is important to ensure that processes don't try to take locks 440 * on non-contiguous ranges of pages as that might deadlock. This 441 * function should be called before attempting to wait on a locked 442 * nfs_page. It will complete the I/O if the page index 'index' 443 * is not contiguous with the existing list of pages in 'desc'. 444 */ 445 void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *desc, pgoff_t index) 446 { 447 if (!list_empty(&desc->pg_list)) { 448 struct nfs_page *prev = nfs_list_entry(desc->pg_list.prev); 449 if (index != prev->wb_index + 1) 450 nfs_pageio_complete(desc); 451 } 452 } 453 454 int __init nfs_init_nfspagecache(void) 455 { 456 nfs_page_cachep = kmem_cache_create("nfs_page", 457 sizeof(struct nfs_page), 458 0, SLAB_HWCACHE_ALIGN, 459 NULL); 460 if (nfs_page_cachep == NULL) 461 return -ENOMEM; 462 463 return 0; 464 } 465 466 void nfs_destroy_nfspagecache(void) 467 { 468 kmem_cache_destroy(nfs_page_cachep); 469 } 470 471