1 /* 2 * (C) 2001 Clemson University and The University of Chicago 3 * 4 * See COPYING in top-level directory. 5 */ 6 7 #include "protocol.h" 8 #include "orangefs-kernel.h" 9 #include "orangefs-bufmap.h" 10 11 /* 12 * decode routine used by kmod to deal with the blob sent from 13 * userspace for readdirs. The blob contains zero or more of these 14 * sub-blobs: 15 * __u32 - represents length of the character string that follows. 16 * string - between 1 and ORANGEFS_NAME_MAX bytes long. 17 * padding - (if needed) to cause the __u32 plus the string to be 18 * eight byte aligned. 19 * khandle - sizeof(khandle) bytes. 20 */ 21 static long decode_dirents(char *ptr, size_t size, 22 struct orangefs_readdir_response_s *readdir) 23 { 24 int i; 25 struct orangefs_readdir_response_s *rd = 26 (struct orangefs_readdir_response_s *) ptr; 27 char *buf = ptr; 28 int khandle_size = sizeof(struct orangefs_khandle); 29 size_t offset = offsetof(struct orangefs_readdir_response_s, 30 dirent_array); 31 /* 8 reflects eight byte alignment */ 32 int smallest_blob = khandle_size + 8; 33 __u32 len; 34 int aligned_len; 35 int sizeof_u32 = sizeof(__u32); 36 long ret; 37 38 gossip_debug(GOSSIP_DIR_DEBUG, "%s: size:%zu:\n", __func__, size); 39 40 /* size is = offset on empty dirs, > offset on non-empty dirs... */ 41 if (size < offset) { 42 gossip_err("%s: size:%zu: offset:%zu:\n", 43 __func__, 44 size, 45 offset); 46 ret = -EINVAL; 47 goto out; 48 } 49 50 if ((size == offset) && (readdir->orangefs_dirent_outcount != 0)) { 51 gossip_err("%s: size:%zu: dirent_outcount:%d:\n", 52 __func__, 53 size, 54 readdir->orangefs_dirent_outcount); 55 ret = -EINVAL; 56 goto out; 57 } 58 59 readdir->token = rd->token; 60 readdir->orangefs_dirent_outcount = rd->orangefs_dirent_outcount; 61 readdir->dirent_array = kcalloc(readdir->orangefs_dirent_outcount, 62 sizeof(*readdir->dirent_array), 63 GFP_KERNEL); 64 if (readdir->dirent_array == NULL) { 65 gossip_err("%s: kcalloc failed.\n", __func__); 66 ret = -ENOMEM; 67 goto out; 68 } 69 70 buf += offset; 71 size -= offset; 72 73 for (i = 0; i < readdir->orangefs_dirent_outcount; i++) { 74 if (size < smallest_blob) { 75 gossip_err("%s: size:%zu: smallest_blob:%d:\n", 76 __func__, 77 size, 78 smallest_blob); 79 ret = -EINVAL; 80 goto free; 81 } 82 83 len = *(__u32 *)buf; 84 if ((len < 1) || (len > ORANGEFS_NAME_MAX)) { 85 gossip_err("%s: len:%d:\n", __func__, len); 86 ret = -EINVAL; 87 goto free; 88 } 89 90 gossip_debug(GOSSIP_DIR_DEBUG, 91 "%s: size:%zu: len:%d:\n", 92 __func__, 93 size, 94 len); 95 96 readdir->dirent_array[i].d_name = buf + sizeof_u32; 97 readdir->dirent_array[i].d_length = len; 98 99 /* 100 * Calculate "aligned" length of this string and its 101 * associated __u32 descriptor. 102 */ 103 aligned_len = ((sizeof_u32 + len + 1) + 7) & ~7; 104 gossip_debug(GOSSIP_DIR_DEBUG, 105 "%s: aligned_len:%d:\n", 106 __func__, 107 aligned_len); 108 109 /* 110 * The end of the blob should coincide with the end 111 * of the last sub-blob. 112 */ 113 if (size < aligned_len + khandle_size) { 114 gossip_err("%s: ran off the end of the blob.\n", 115 __func__); 116 ret = -EINVAL; 117 goto free; 118 } 119 size -= aligned_len + khandle_size; 120 121 buf += aligned_len; 122 123 readdir->dirent_array[i].khandle = 124 *(struct orangefs_khandle *) buf; 125 buf += khandle_size; 126 } 127 ret = buf - ptr; 128 gossip_debug(GOSSIP_DIR_DEBUG, "%s: returning:%ld:\n", __func__, ret); 129 goto out; 130 131 free: 132 kfree(readdir->dirent_array); 133 readdir->dirent_array = NULL; 134 135 out: 136 return ret; 137 } 138 139 /* 140 * Read directory entries from an instance of an open directory. 141 */ 142 static int orangefs_readdir(struct file *file, struct dir_context *ctx) 143 { 144 int ret = 0; 145 int buffer_index; 146 /* 147 * ptoken supports Orangefs' distributed directory logic, added 148 * in 2.9.2. 149 */ 150 __u64 *ptoken = file->private_data; 151 __u64 pos = 0; 152 ino_t ino = 0; 153 struct dentry *dentry = file->f_path.dentry; 154 struct orangefs_kernel_op_s *new_op = NULL; 155 struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(dentry->d_inode); 156 struct orangefs_readdir_response_s readdir_response; 157 void *dents_buf; 158 int i = 0; 159 int len = 0; 160 ino_t current_ino = 0; 161 char *current_entry = NULL; 162 long bytes_decoded; 163 164 gossip_debug(GOSSIP_DIR_DEBUG, 165 "%s: ctx->pos:%lld, ptoken = %llu\n", 166 __func__, 167 lld(ctx->pos), 168 llu(*ptoken)); 169 170 pos = (__u64) ctx->pos; 171 172 /* are we done? */ 173 if (pos == ORANGEFS_READDIR_END) { 174 gossip_debug(GOSSIP_DIR_DEBUG, 175 "Skipping to termination path\n"); 176 return 0; 177 } 178 179 gossip_debug(GOSSIP_DIR_DEBUG, 180 "orangefs_readdir called on %s (pos=%llu)\n", 181 dentry->d_name.name, llu(pos)); 182 183 memset(&readdir_response, 0, sizeof(readdir_response)); 184 185 new_op = op_alloc(ORANGEFS_VFS_OP_READDIR); 186 if (!new_op) 187 return -ENOMEM; 188 189 /* 190 * Only the indices are shared. No memory is actually shared, but the 191 * mechanism is used. 192 */ 193 new_op->uses_shared_memory = 1; 194 new_op->upcall.req.readdir.refn = orangefs_inode->refn; 195 new_op->upcall.req.readdir.max_dirent_count = 196 ORANGEFS_MAX_DIRENT_COUNT_READDIR; 197 198 gossip_debug(GOSSIP_DIR_DEBUG, 199 "%s: upcall.req.readdir.refn.khandle: %pU\n", 200 __func__, 201 &new_op->upcall.req.readdir.refn.khandle); 202 203 new_op->upcall.req.readdir.token = *ptoken; 204 205 get_new_buffer_index: 206 buffer_index = orangefs_readdir_index_get(); 207 if (buffer_index < 0) { 208 ret = buffer_index; 209 gossip_lerr("orangefs_readdir: orangefs_readdir_index_get() failure (%d)\n", 210 ret); 211 goto out_free_op; 212 } 213 new_op->upcall.req.readdir.buf_index = buffer_index; 214 215 ret = service_operation(new_op, 216 "orangefs_readdir", 217 get_interruptible_flag(dentry->d_inode)); 218 219 gossip_debug(GOSSIP_DIR_DEBUG, 220 "Readdir downcall status is %d. ret:%d\n", 221 new_op->downcall.status, 222 ret); 223 224 orangefs_readdir_index_put(buffer_index); 225 226 if (ret == -EAGAIN && op_state_purged(new_op)) { 227 /* Client-core indices are invalid after it restarted. */ 228 gossip_debug(GOSSIP_DIR_DEBUG, 229 "%s: Getting new buffer_index for retry of readdir..\n", 230 __func__); 231 goto get_new_buffer_index; 232 } 233 234 if (ret == -EIO && op_state_purged(new_op)) { 235 gossip_err("%s: Client is down. Aborting readdir call.\n", 236 __func__); 237 goto out_free_op; 238 } 239 240 if (ret < 0 || new_op->downcall.status != 0) { 241 gossip_debug(GOSSIP_DIR_DEBUG, 242 "Readdir request failed. Status:%d\n", 243 new_op->downcall.status); 244 if (ret >= 0) 245 ret = new_op->downcall.status; 246 goto out_free_op; 247 } 248 249 dents_buf = new_op->downcall.trailer_buf; 250 if (dents_buf == NULL) { 251 gossip_err("Invalid NULL buffer in readdir response\n"); 252 ret = -ENOMEM; 253 goto out_free_op; 254 } 255 256 bytes_decoded = decode_dirents(dents_buf, new_op->downcall.trailer_size, 257 &readdir_response); 258 if (bytes_decoded < 0) { 259 ret = bytes_decoded; 260 gossip_err("Could not decode readdir from buffer %d\n", ret); 261 goto out_vfree; 262 } 263 264 if (bytes_decoded != new_op->downcall.trailer_size) { 265 gossip_err("orangefs_readdir: # bytes decoded (%ld) " 266 "!= trailer size (%ld)\n", 267 bytes_decoded, 268 (long)new_op->downcall.trailer_size); 269 ret = -EINVAL; 270 goto out_destroy_handle; 271 } 272 273 /* 274 * orangefs doesn't actually store dot and dot-dot, but 275 * we need to have them represented. 276 */ 277 if (pos == 0) { 278 ino = get_ino_from_khandle(dentry->d_inode); 279 gossip_debug(GOSSIP_DIR_DEBUG, 280 "%s: calling dir_emit of \".\" with pos = %llu\n", 281 __func__, 282 llu(pos)); 283 ret = dir_emit(ctx, ".", 1, ino, DT_DIR); 284 pos += 1; 285 } 286 287 if (pos == 1) { 288 ino = get_parent_ino_from_dentry(dentry); 289 gossip_debug(GOSSIP_DIR_DEBUG, 290 "%s: calling dir_emit of \"..\" with pos = %llu\n", 291 __func__, 292 llu(pos)); 293 ret = dir_emit(ctx, "..", 2, ino, DT_DIR); 294 pos += 1; 295 } 296 297 /* 298 * we stored ORANGEFS_ITERATE_NEXT in ctx->pos last time around 299 * to prevent "finding" dot and dot-dot on any iteration 300 * other than the first. 301 */ 302 if (ctx->pos == ORANGEFS_ITERATE_NEXT) 303 ctx->pos = 0; 304 305 gossip_debug(GOSSIP_DIR_DEBUG, 306 "%s: dirent_outcount:%d:\n", 307 __func__, 308 readdir_response.orangefs_dirent_outcount); 309 for (i = ctx->pos; 310 i < readdir_response.orangefs_dirent_outcount; 311 i++) { 312 len = readdir_response.dirent_array[i].d_length; 313 current_entry = readdir_response.dirent_array[i].d_name; 314 current_ino = orangefs_khandle_to_ino( 315 &readdir_response.dirent_array[i].khandle); 316 317 gossip_debug(GOSSIP_DIR_DEBUG, 318 "calling dir_emit for %s with len %d" 319 ", ctx->pos %ld\n", 320 current_entry, 321 len, 322 (unsigned long)ctx->pos); 323 /* 324 * type is unknown. We don't return object type 325 * in the dirent_array. This leaves getdents 326 * clueless about type. 327 */ 328 ret = 329 dir_emit(ctx, current_entry, len, current_ino, DT_UNKNOWN); 330 if (!ret) 331 break; 332 ctx->pos++; 333 gossip_debug(GOSSIP_DIR_DEBUG, 334 "%s: ctx->pos:%lld\n", 335 __func__, 336 lld(ctx->pos)); 337 338 } 339 340 /* 341 * we ran all the way through the last batch, set up for 342 * getting another batch... 343 */ 344 if (ret) { 345 *ptoken = readdir_response.token; 346 ctx->pos = ORANGEFS_ITERATE_NEXT; 347 } 348 349 /* 350 * Did we hit the end of the directory? 351 */ 352 if (readdir_response.token == ORANGEFS_READDIR_END) { 353 gossip_debug(GOSSIP_DIR_DEBUG, 354 "End of dir detected; setting ctx->pos to ORANGEFS_READDIR_END.\n"); 355 ctx->pos = ORANGEFS_READDIR_END; 356 } 357 358 out_destroy_handle: 359 /* kfree(NULL) is safe */ 360 kfree(readdir_response.dirent_array); 361 out_vfree: 362 gossip_debug(GOSSIP_DIR_DEBUG, "vfree %p\n", dents_buf); 363 vfree(dents_buf); 364 out_free_op: 365 op_release(new_op); 366 gossip_debug(GOSSIP_DIR_DEBUG, "orangefs_readdir returning %d\n", ret); 367 return ret; 368 } 369 370 static int orangefs_dir_open(struct inode *inode, struct file *file) 371 { 372 __u64 *ptoken; 373 374 file->private_data = kmalloc(sizeof(__u64), GFP_KERNEL); 375 if (!file->private_data) 376 return -ENOMEM; 377 378 ptoken = file->private_data; 379 *ptoken = ORANGEFS_READDIR_START; 380 return 0; 381 } 382 383 static int orangefs_dir_release(struct inode *inode, struct file *file) 384 { 385 orangefs_flush_inode(inode); 386 kfree(file->private_data); 387 return 0; 388 } 389 390 /** ORANGEFS implementation of VFS directory operations */ 391 const struct file_operations orangefs_dir_operations = { 392 .read = generic_read_dir, 393 .iterate = orangefs_readdir, 394 .open = orangefs_dir_open, 395 .release = orangefs_dir_release, 396 }; 397