1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright 2017 Omnibond Systems, L.L.C. 4 */ 5 6 #include "protocol.h" 7 #include "orangefs-kernel.h" 8 #include "orangefs-bufmap.h" 9 10 struct orangefs_dir_part { 11 struct orangefs_dir_part *next; 12 size_t len; 13 }; 14 15 struct orangefs_dir { 16 __u64 token; 17 struct orangefs_dir_part *part; 18 loff_t end; 19 int error; 20 }; 21 22 #define PART_SHIFT (24) 23 #define PART_SIZE (1<<24) 24 #define PART_MASK (~(PART_SIZE - 1)) 25 26 /* 27 * There can be up to 512 directory entries. Each entry is encoded as 28 * follows: 29 * 4 bytes: string size (n) 30 * n bytes: string 31 * 1 byte: trailing zero 32 * padding to 8 bytes 33 * 16 bytes: khandle 34 * padding to 8 bytes 35 * 36 * The trailer_buf starts with a struct orangefs_readdir_response_s 37 * which must be skipped to get to the directory data. 38 * 39 * The data which is received from the userspace daemon is termed a 40 * part and is stored in a linked list in case more than one part is 41 * needed for a large directory. 42 * 43 * The position pointer (ctx->pos) encodes the part and offset on which 44 * to begin reading at. Bits above PART_SHIFT encode the part and bits 45 * below PART_SHIFT encode the offset. Parts are stored in a linked 46 * list which grows as data is received from the server. The overhead 47 * associated with managing the list is presumed to be small compared to 48 * the overhead of communicating with the server. 49 * 50 * As data is received from the server, it is placed at the end of the 51 * part list. Data is parsed from the current position as it is needed. 52 * When data is determined to be corrupt, it is either because the 53 * userspace component has sent back corrupt data or because the file 54 * pointer has been moved to an invalid location. Since the two cannot 55 * be differentiated, return EIO. 56 * 57 * Part zero is synthesized to contains `.' and `..'. Part one is the 58 * first part of the part list. 59 */ 60 61 static int do_readdir(struct orangefs_inode_s *oi, 62 struct orangefs_dir *od, struct dentry *dentry, 63 struct orangefs_kernel_op_s *op) 64 { 65 struct orangefs_readdir_response_s *resp; 66 int bufi, r; 67 68 /* 69 * Despite the badly named field, readdir does not use shared 70 * memory. However, there are a limited number of readdir 71 * slots, which must be allocated here. This flag simply tells 72 * the op scheduler to return the op here for retry. 73 */ 74 op->uses_shared_memory = 1; 75 op->upcall.req.readdir.refn = oi->refn; 76 op->upcall.req.readdir.token = od->token; 77 op->upcall.req.readdir.max_dirent_count = 78 ORANGEFS_MAX_DIRENT_COUNT_READDIR; 79 80 again: 81 bufi = orangefs_readdir_index_get(); 82 if (bufi < 0) { 83 od->error = bufi; 84 return bufi; 85 } 86 87 op->upcall.req.readdir.buf_index = bufi; 88 89 r = service_operation(op, "orangefs_readdir", 90 get_interruptible_flag(dentry->d_inode)); 91 92 orangefs_readdir_index_put(bufi); 93 94 if (op_state_purged(op)) { 95 if (r == -EAGAIN) { 96 vfree(op->downcall.trailer_buf); 97 goto again; 98 } else if (r == -EIO) { 99 vfree(op->downcall.trailer_buf); 100 od->error = r; 101 return r; 102 } 103 } 104 105 if (r < 0) { 106 vfree(op->downcall.trailer_buf); 107 od->error = r; 108 return r; 109 } else if (op->downcall.status) { 110 vfree(op->downcall.trailer_buf); 111 od->error = op->downcall.status; 112 return op->downcall.status; 113 } 114 115 /* 116 * The maximum size is size per entry times the 512 entries plus 117 * the header. This is well under the limit. 118 */ 119 if (op->downcall.trailer_size > PART_SIZE) { 120 vfree(op->downcall.trailer_buf); 121 od->error = -EIO; 122 return -EIO; 123 } 124 125 resp = (struct orangefs_readdir_response_s *) 126 op->downcall.trailer_buf; 127 od->token = resp->token; 128 return 0; 129 } 130 131 static int parse_readdir(struct orangefs_dir *od, 132 struct orangefs_kernel_op_s *op) 133 { 134 struct orangefs_dir_part *part, *new; 135 size_t count; 136 137 count = 1; 138 part = od->part; 139 while (part) { 140 count++; 141 if (part->next) 142 part = part->next; 143 else 144 break; 145 } 146 147 new = (void *)op->downcall.trailer_buf; 148 new->next = NULL; 149 new->len = op->downcall.trailer_size - 150 sizeof(struct orangefs_readdir_response_s); 151 if (!od->part) 152 od->part = new; 153 else 154 part->next = new; 155 count++; 156 od->end = count << PART_SHIFT; 157 158 return 0; 159 } 160 161 static int orangefs_dir_more(struct orangefs_inode_s *oi, 162 struct orangefs_dir *od, struct dentry *dentry) 163 { 164 struct orangefs_kernel_op_s *op; 165 int r; 166 167 op = op_alloc(ORANGEFS_VFS_OP_READDIR); 168 if (!op) { 169 od->error = -ENOMEM; 170 return -ENOMEM; 171 } 172 r = do_readdir(oi, od, dentry, op); 173 if (r) { 174 od->error = r; 175 goto out; 176 } 177 r = parse_readdir(od, op); 178 if (r) { 179 od->error = r; 180 goto out; 181 } 182 183 od->error = 0; 184 out: 185 op_release(op); 186 return od->error; 187 } 188 189 static int fill_from_part(struct orangefs_dir_part *part, 190 struct dir_context *ctx) 191 { 192 const int offset = sizeof(struct orangefs_readdir_response_s); 193 struct orangefs_khandle *khandle; 194 __u32 *len, padlen; 195 loff_t i; 196 char *s; 197 i = ctx->pos & ~PART_MASK; 198 199 /* The file offset from userspace is too large. */ 200 if (i > part->len) 201 return 1; 202 203 /* 204 * If the seek pointer is positioned just before an entry it 205 * should find the next entry. 206 */ 207 if (i % 8) 208 i = i + (8 - i%8)%8; 209 210 while (i < part->len) { 211 if (part->len < i + sizeof *len) 212 break; 213 len = (void *)part + offset + i; 214 /* 215 * len is the size of the string itself. padlen is the 216 * total size of the encoded string. 217 */ 218 padlen = (sizeof *len + *len + 1) + 219 (8 - (sizeof *len + *len + 1)%8)%8; 220 if (part->len < i + padlen + sizeof *khandle) 221 goto next; 222 s = (void *)part + offset + i + sizeof *len; 223 if (s[*len] != 0) 224 goto next; 225 khandle = (void *)part + offset + i + padlen; 226 if (!dir_emit(ctx, s, *len, 227 orangefs_khandle_to_ino(khandle), 228 DT_UNKNOWN)) 229 return 0; 230 i += padlen + sizeof *khandle; 231 i = i + (8 - i%8)%8; 232 BUG_ON(i > part->len); 233 ctx->pos = (ctx->pos & PART_MASK) | i; 234 continue; 235 next: 236 i += 8; 237 } 238 return 1; 239 } 240 241 static int orangefs_dir_fill(struct orangefs_inode_s *oi, 242 struct orangefs_dir *od, struct dentry *dentry, 243 struct dir_context *ctx) 244 { 245 struct orangefs_dir_part *part; 246 size_t count; 247 248 count = ((ctx->pos & PART_MASK) >> PART_SHIFT) - 1; 249 250 part = od->part; 251 while (part->next && count) { 252 count--; 253 part = part->next; 254 } 255 /* This means the userspace file offset is invalid. */ 256 if (count) { 257 od->error = -EIO; 258 return -EIO; 259 } 260 261 while (part && part->len) { 262 int r; 263 r = fill_from_part(part, ctx); 264 if (r < 0) { 265 od->error = r; 266 return r; 267 } else if (r == 0) { 268 /* Userspace buffer is full. */ 269 break; 270 } else { 271 /* 272 * The part ran out of data. Move to the next 273 * part. */ 274 ctx->pos = (ctx->pos & PART_MASK) + 275 (1 << PART_SHIFT); 276 part = part->next; 277 } 278 } 279 return 0; 280 } 281 282 static loff_t orangefs_dir_llseek(struct file *file, loff_t offset, 283 int whence) 284 { 285 struct orangefs_dir *od = file->private_data; 286 /* 287 * Delete the stored data so userspace sees new directory 288 * entries. 289 */ 290 if (!whence && offset < od->end) { 291 struct orangefs_dir_part *part = od->part; 292 while (part) { 293 struct orangefs_dir_part *next = part->next; 294 vfree(part); 295 part = next; 296 } 297 od->token = ORANGEFS_ITERATE_START; 298 od->part = NULL; 299 od->end = 1 << PART_SHIFT; 300 } 301 return default_llseek(file, offset, whence); 302 } 303 304 static int orangefs_dir_iterate(struct file *file, 305 struct dir_context *ctx) 306 { 307 struct orangefs_inode_s *oi; 308 struct orangefs_dir *od; 309 struct dentry *dentry; 310 int r; 311 312 dentry = file->f_path.dentry; 313 oi = ORANGEFS_I(dentry->d_inode); 314 od = file->private_data; 315 316 if (od->error) 317 return od->error; 318 319 if (ctx->pos == 0) { 320 if (!dir_emit_dot(file, ctx)) 321 return 0; 322 ctx->pos++; 323 } 324 if (ctx->pos == 1) { 325 if (!dir_emit_dotdot(file, ctx)) 326 return 0; 327 ctx->pos = 1 << PART_SHIFT; 328 } 329 330 /* 331 * The seek position is in the first synthesized part but is not 332 * valid. 333 */ 334 if ((ctx->pos & PART_MASK) == 0) 335 return -EIO; 336 337 r = 0; 338 339 /* 340 * Must read more if the user has sought past what has been read 341 * so far. Stop a user who has sought past the end. 342 */ 343 while (od->token != ORANGEFS_ITERATE_END && 344 ctx->pos > od->end) { 345 r = orangefs_dir_more(oi, od, dentry); 346 if (r) 347 return r; 348 } 349 if (od->token == ORANGEFS_ITERATE_END && ctx->pos > od->end) 350 return -EIO; 351 352 /* Then try to fill if there's any left in the buffer. */ 353 if (ctx->pos < od->end) { 354 r = orangefs_dir_fill(oi, od, dentry, ctx); 355 if (r) 356 return r; 357 } 358 359 /* Finally get some more and try to fill. */ 360 if (od->token != ORANGEFS_ITERATE_END) { 361 r = orangefs_dir_more(oi, od, dentry); 362 if (r) 363 return r; 364 r = orangefs_dir_fill(oi, od, dentry, ctx); 365 } 366 367 return r; 368 } 369 370 static int orangefs_dir_open(struct inode *inode, struct file *file) 371 { 372 struct orangefs_dir *od; 373 file->private_data = kmalloc(sizeof(struct orangefs_dir), 374 GFP_KERNEL); 375 if (!file->private_data) 376 return -ENOMEM; 377 od = file->private_data; 378 od->token = ORANGEFS_ITERATE_START; 379 od->part = NULL; 380 od->end = 1 << PART_SHIFT; 381 od->error = 0; 382 return 0; 383 } 384 385 static int orangefs_dir_release(struct inode *inode, struct file *file) 386 { 387 struct orangefs_dir *od = file->private_data; 388 struct orangefs_dir_part *part = od->part; 389 while (part) { 390 struct orangefs_dir_part *next = part->next; 391 vfree(part); 392 part = next; 393 } 394 kfree(od); 395 return 0; 396 } 397 398 const struct file_operations orangefs_dir_operations = { 399 .llseek = orangefs_dir_llseek, 400 .read = generic_read_dir, 401 .iterate = orangefs_dir_iterate, 402 .open = orangefs_dir_open, 403 .release = orangefs_dir_release 404 }; 405