1 /* 2 * dev_cgroup.c - device cgroup subsystem 3 * 4 * Copyright 2007 IBM Corp 5 */ 6 7 #include <linux/device_cgroup.h> 8 #include <linux/cgroup.h> 9 #include <linux/ctype.h> 10 #include <linux/list.h> 11 #include <linux/uaccess.h> 12 #include <linux/seq_file.h> 13 14 #define ACC_MKNOD 1 15 #define ACC_READ 2 16 #define ACC_WRITE 4 17 #define ACC_MASK (ACC_MKNOD | ACC_READ | ACC_WRITE) 18 19 #define DEV_BLOCK 1 20 #define DEV_CHAR 2 21 #define DEV_ALL 4 /* this represents all devices */ 22 23 /* 24 * whitelist locking rules: 25 * cgroup_lock() cannot be taken under dev_cgroup->lock. 26 * dev_cgroup->lock can be taken with or without cgroup_lock(). 27 * 28 * modifications always require cgroup_lock 29 * modifications to a list which is visible require the 30 * dev_cgroup->lock *and* cgroup_lock() 31 * walking the list requires dev_cgroup->lock or cgroup_lock(). 32 * 33 * reasoning: dev_whitelist_copy() needs to kmalloc, so needs 34 * a mutex, which the cgroup_lock() is. Since modifying 35 * a visible list requires both locks, either lock can be 36 * taken for walking the list. 37 */ 38 39 struct dev_whitelist_item { 40 u32 major, minor; 41 short type; 42 short access; 43 struct list_head list; 44 }; 45 46 struct dev_cgroup { 47 struct cgroup_subsys_state css; 48 struct list_head whitelist; 49 spinlock_t lock; 50 }; 51 52 static inline struct dev_cgroup *css_to_devcgroup(struct cgroup_subsys_state *s) 53 { 54 return container_of(s, struct dev_cgroup, css); 55 } 56 57 static inline struct dev_cgroup *cgroup_to_devcgroup(struct cgroup *cgroup) 58 { 59 return css_to_devcgroup(cgroup_subsys_state(cgroup, devices_subsys_id)); 60 } 61 62 struct cgroup_subsys devices_subsys; 63 64 static int devcgroup_can_attach(struct cgroup_subsys *ss, 65 struct cgroup *new_cgroup, struct task_struct *task) 66 { 67 if (current != task && !capable(CAP_SYS_ADMIN)) 68 return -EPERM; 69 70 return 0; 71 } 72 73 /* 74 * called under cgroup_lock() 75 */ 76 static int dev_whitelist_copy(struct list_head *dest, struct list_head *orig) 77 { 78 struct dev_whitelist_item *wh, *tmp, *new; 79 80 list_for_each_entry(wh, orig, list) { 81 new = kmalloc(sizeof(*wh), GFP_KERNEL); 82 if (!new) 83 goto free_and_exit; 84 new->major = wh->major; 85 new->minor = wh->minor; 86 new->type = wh->type; 87 new->access = wh->access; 88 list_add_tail(&new->list, dest); 89 } 90 91 return 0; 92 93 free_and_exit: 94 list_for_each_entry_safe(wh, tmp, dest, list) { 95 list_del(&wh->list); 96 kfree(wh); 97 } 98 return -ENOMEM; 99 } 100 101 /* Stupid prototype - don't bother combining existing entries */ 102 /* 103 * called under cgroup_lock() 104 * since the list is visible to other tasks, we need the spinlock also 105 */ 106 static int dev_whitelist_add(struct dev_cgroup *dev_cgroup, 107 struct dev_whitelist_item *wh) 108 { 109 struct dev_whitelist_item *whcopy, *walk; 110 111 whcopy = kmalloc(sizeof(*whcopy), GFP_KERNEL); 112 if (!whcopy) 113 return -ENOMEM; 114 115 memcpy(whcopy, wh, sizeof(*whcopy)); 116 spin_lock(&dev_cgroup->lock); 117 list_for_each_entry(walk, &dev_cgroup->whitelist, list) { 118 if (walk->type != wh->type) 119 continue; 120 if (walk->major != wh->major) 121 continue; 122 if (walk->minor != wh->minor) 123 continue; 124 125 walk->access |= wh->access; 126 kfree(whcopy); 127 whcopy = NULL; 128 } 129 130 if (whcopy != NULL) 131 list_add_tail(&whcopy->list, &dev_cgroup->whitelist); 132 spin_unlock(&dev_cgroup->lock); 133 return 0; 134 } 135 136 /* 137 * called under cgroup_lock() 138 * since the list is visible to other tasks, we need the spinlock also 139 */ 140 static void dev_whitelist_rm(struct dev_cgroup *dev_cgroup, 141 struct dev_whitelist_item *wh) 142 { 143 struct dev_whitelist_item *walk, *tmp; 144 145 spin_lock(&dev_cgroup->lock); 146 list_for_each_entry_safe(walk, tmp, &dev_cgroup->whitelist, list) { 147 if (walk->type == DEV_ALL) 148 goto remove; 149 if (walk->type != wh->type) 150 continue; 151 if (walk->major != ~0 && walk->major != wh->major) 152 continue; 153 if (walk->minor != ~0 && walk->minor != wh->minor) 154 continue; 155 156 remove: 157 walk->access &= ~wh->access; 158 if (!walk->access) { 159 list_del(&walk->list); 160 kfree(walk); 161 } 162 } 163 spin_unlock(&dev_cgroup->lock); 164 } 165 166 /* 167 * called from kernel/cgroup.c with cgroup_lock() held. 168 */ 169 static struct cgroup_subsys_state *devcgroup_create(struct cgroup_subsys *ss, 170 struct cgroup *cgroup) 171 { 172 struct dev_cgroup *dev_cgroup, *parent_dev_cgroup; 173 struct cgroup *parent_cgroup; 174 int ret; 175 176 dev_cgroup = kzalloc(sizeof(*dev_cgroup), GFP_KERNEL); 177 if (!dev_cgroup) 178 return ERR_PTR(-ENOMEM); 179 INIT_LIST_HEAD(&dev_cgroup->whitelist); 180 parent_cgroup = cgroup->parent; 181 182 if (parent_cgroup == NULL) { 183 struct dev_whitelist_item *wh; 184 wh = kmalloc(sizeof(*wh), GFP_KERNEL); 185 if (!wh) { 186 kfree(dev_cgroup); 187 return ERR_PTR(-ENOMEM); 188 } 189 wh->minor = wh->major = ~0; 190 wh->type = DEV_ALL; 191 wh->access = ACC_MKNOD | ACC_READ | ACC_WRITE; 192 list_add(&wh->list, &dev_cgroup->whitelist); 193 } else { 194 parent_dev_cgroup = cgroup_to_devcgroup(parent_cgroup); 195 ret = dev_whitelist_copy(&dev_cgroup->whitelist, 196 &parent_dev_cgroup->whitelist); 197 if (ret) { 198 kfree(dev_cgroup); 199 return ERR_PTR(ret); 200 } 201 } 202 203 spin_lock_init(&dev_cgroup->lock); 204 return &dev_cgroup->css; 205 } 206 207 static void devcgroup_destroy(struct cgroup_subsys *ss, 208 struct cgroup *cgroup) 209 { 210 struct dev_cgroup *dev_cgroup; 211 struct dev_whitelist_item *wh, *tmp; 212 213 dev_cgroup = cgroup_to_devcgroup(cgroup); 214 list_for_each_entry_safe(wh, tmp, &dev_cgroup->whitelist, list) { 215 list_del(&wh->list); 216 kfree(wh); 217 } 218 kfree(dev_cgroup); 219 } 220 221 #define DEVCG_ALLOW 1 222 #define DEVCG_DENY 2 223 #define DEVCG_LIST 3 224 225 #define MAJMINLEN 10 226 #define ACCLEN 4 227 228 static void set_access(char *acc, short access) 229 { 230 int idx = 0; 231 memset(acc, 0, ACCLEN); 232 if (access & ACC_READ) 233 acc[idx++] = 'r'; 234 if (access & ACC_WRITE) 235 acc[idx++] = 'w'; 236 if (access & ACC_MKNOD) 237 acc[idx++] = 'm'; 238 } 239 240 static char type_to_char(short type) 241 { 242 if (type == DEV_ALL) 243 return 'a'; 244 if (type == DEV_CHAR) 245 return 'c'; 246 if (type == DEV_BLOCK) 247 return 'b'; 248 return 'X'; 249 } 250 251 static void set_majmin(char *str, unsigned m) 252 { 253 memset(str, 0, MAJMINLEN); 254 if (m == ~0) 255 sprintf(str, "*"); 256 else 257 snprintf(str, MAJMINLEN, "%d", m); 258 } 259 260 static int devcgroup_seq_read(struct cgroup *cgroup, struct cftype *cft, 261 struct seq_file *m) 262 { 263 struct dev_cgroup *devcgroup = cgroup_to_devcgroup(cgroup); 264 struct dev_whitelist_item *wh; 265 char maj[MAJMINLEN], min[MAJMINLEN], acc[ACCLEN]; 266 267 spin_lock(&devcgroup->lock); 268 list_for_each_entry(wh, &devcgroup->whitelist, list) { 269 set_access(acc, wh->access); 270 set_majmin(maj, wh->major); 271 set_majmin(min, wh->minor); 272 seq_printf(m, "%c %s:%s %s\n", type_to_char(wh->type), 273 maj, min, acc); 274 } 275 spin_unlock(&devcgroup->lock); 276 277 return 0; 278 } 279 280 /* 281 * may_access_whitelist: 282 * does the access granted to dev_cgroup c contain the access 283 * requested in whitelist item refwh. 284 * return 1 if yes, 0 if no. 285 * call with c->lock held 286 */ 287 static int may_access_whitelist(struct dev_cgroup *c, 288 struct dev_whitelist_item *refwh) 289 { 290 struct dev_whitelist_item *whitem; 291 292 list_for_each_entry(whitem, &c->whitelist, list) { 293 if (whitem->type & DEV_ALL) 294 return 1; 295 if ((refwh->type & DEV_BLOCK) && !(whitem->type & DEV_BLOCK)) 296 continue; 297 if ((refwh->type & DEV_CHAR) && !(whitem->type & DEV_CHAR)) 298 continue; 299 if (whitem->major != ~0 && whitem->major != refwh->major) 300 continue; 301 if (whitem->minor != ~0 && whitem->minor != refwh->minor) 302 continue; 303 if (refwh->access & (~(whitem->access | ACC_MASK))) 304 continue; 305 return 1; 306 } 307 return 0; 308 } 309 310 /* 311 * parent_has_perm: 312 * when adding a new allow rule to a device whitelist, the rule 313 * must be allowed in the parent device 314 */ 315 static int parent_has_perm(struct cgroup *childcg, 316 struct dev_whitelist_item *wh) 317 { 318 struct cgroup *pcg = childcg->parent; 319 struct dev_cgroup *parent; 320 int ret; 321 322 if (!pcg) 323 return 1; 324 parent = cgroup_to_devcgroup(pcg); 325 spin_lock(&parent->lock); 326 ret = may_access_whitelist(parent, wh); 327 spin_unlock(&parent->lock); 328 return ret; 329 } 330 331 /* 332 * Modify the whitelist using allow/deny rules. 333 * CAP_SYS_ADMIN is needed for this. It's at least separate from CAP_MKNOD 334 * so we can give a container CAP_MKNOD to let it create devices but not 335 * modify the whitelist. 336 * It seems likely we'll want to add a CAP_CONTAINER capability to allow 337 * us to also grant CAP_SYS_ADMIN to containers without giving away the 338 * device whitelist controls, but for now we'll stick with CAP_SYS_ADMIN 339 * 340 * Taking rules away is always allowed (given CAP_SYS_ADMIN). Granting 341 * new access is only allowed if you're in the top-level cgroup, or your 342 * parent cgroup has the access you're asking for. 343 */ 344 static ssize_t devcgroup_access_write(struct cgroup *cgroup, struct cftype *cft, 345 struct file *file, const char __user *userbuf, 346 size_t nbytes, loff_t *ppos) 347 { 348 struct cgroup *cur_cgroup; 349 struct dev_cgroup *devcgroup, *cur_devcgroup; 350 int filetype = cft->private; 351 char *buffer, *b; 352 int retval = 0, count; 353 struct dev_whitelist_item wh; 354 355 if (!capable(CAP_SYS_ADMIN)) 356 return -EPERM; 357 358 devcgroup = cgroup_to_devcgroup(cgroup); 359 cur_cgroup = task_cgroup(current, devices_subsys.subsys_id); 360 cur_devcgroup = cgroup_to_devcgroup(cur_cgroup); 361 362 buffer = kmalloc(nbytes+1, GFP_KERNEL); 363 if (!buffer) 364 return -ENOMEM; 365 366 if (copy_from_user(buffer, userbuf, nbytes)) { 367 retval = -EFAULT; 368 goto out1; 369 } 370 buffer[nbytes] = 0; /* nul-terminate */ 371 372 cgroup_lock(); 373 if (cgroup_is_removed(cgroup)) { 374 retval = -ENODEV; 375 goto out2; 376 } 377 378 memset(&wh, 0, sizeof(wh)); 379 b = buffer; 380 381 switch (*b) { 382 case 'a': 383 wh.type = DEV_ALL; 384 wh.access = ACC_MASK; 385 goto handle; 386 case 'b': 387 wh.type = DEV_BLOCK; 388 break; 389 case 'c': 390 wh.type = DEV_CHAR; 391 break; 392 default: 393 retval = -EINVAL; 394 goto out2; 395 } 396 b++; 397 if (!isspace(*b)) { 398 retval = -EINVAL; 399 goto out2; 400 } 401 b++; 402 if (*b == '*') { 403 wh.major = ~0; 404 b++; 405 } else if (isdigit(*b)) { 406 wh.major = 0; 407 while (isdigit(*b)) { 408 wh.major = wh.major*10+(*b-'0'); 409 b++; 410 } 411 } else { 412 retval = -EINVAL; 413 goto out2; 414 } 415 if (*b != ':') { 416 retval = -EINVAL; 417 goto out2; 418 } 419 b++; 420 421 /* read minor */ 422 if (*b == '*') { 423 wh.minor = ~0; 424 b++; 425 } else if (isdigit(*b)) { 426 wh.minor = 0; 427 while (isdigit(*b)) { 428 wh.minor = wh.minor*10+(*b-'0'); 429 b++; 430 } 431 } else { 432 retval = -EINVAL; 433 goto out2; 434 } 435 if (!isspace(*b)) { 436 retval = -EINVAL; 437 goto out2; 438 } 439 for (b++, count = 0; count < 3; count++, b++) { 440 switch (*b) { 441 case 'r': 442 wh.access |= ACC_READ; 443 break; 444 case 'w': 445 wh.access |= ACC_WRITE; 446 break; 447 case 'm': 448 wh.access |= ACC_MKNOD; 449 break; 450 case '\n': 451 case '\0': 452 count = 3; 453 break; 454 default: 455 retval = -EINVAL; 456 goto out2; 457 } 458 } 459 460 handle: 461 retval = 0; 462 switch (filetype) { 463 case DEVCG_ALLOW: 464 if (!parent_has_perm(cgroup, &wh)) 465 retval = -EPERM; 466 else 467 retval = dev_whitelist_add(devcgroup, &wh); 468 break; 469 case DEVCG_DENY: 470 dev_whitelist_rm(devcgroup, &wh); 471 break; 472 default: 473 retval = -EINVAL; 474 goto out2; 475 } 476 477 if (retval == 0) 478 retval = nbytes; 479 480 out2: 481 cgroup_unlock(); 482 out1: 483 kfree(buffer); 484 return retval; 485 } 486 487 static struct cftype dev_cgroup_files[] = { 488 { 489 .name = "allow", 490 .write = devcgroup_access_write, 491 .private = DEVCG_ALLOW, 492 }, 493 { 494 .name = "deny", 495 .write = devcgroup_access_write, 496 .private = DEVCG_DENY, 497 }, 498 { 499 .name = "list", 500 .read_seq_string = devcgroup_seq_read, 501 .private = DEVCG_LIST, 502 }, 503 }; 504 505 static int devcgroup_populate(struct cgroup_subsys *ss, 506 struct cgroup *cgroup) 507 { 508 return cgroup_add_files(cgroup, ss, dev_cgroup_files, 509 ARRAY_SIZE(dev_cgroup_files)); 510 } 511 512 struct cgroup_subsys devices_subsys = { 513 .name = "devices", 514 .can_attach = devcgroup_can_attach, 515 .create = devcgroup_create, 516 .destroy = devcgroup_destroy, 517 .populate = devcgroup_populate, 518 .subsys_id = devices_subsys_id, 519 }; 520 521 int devcgroup_inode_permission(struct inode *inode, int mask) 522 { 523 struct dev_cgroup *dev_cgroup; 524 struct dev_whitelist_item *wh; 525 526 dev_t device = inode->i_rdev; 527 if (!device) 528 return 0; 529 if (!S_ISBLK(inode->i_mode) && !S_ISCHR(inode->i_mode)) 530 return 0; 531 dev_cgroup = css_to_devcgroup(task_subsys_state(current, 532 devices_subsys_id)); 533 if (!dev_cgroup) 534 return 0; 535 536 spin_lock(&dev_cgroup->lock); 537 list_for_each_entry(wh, &dev_cgroup->whitelist, list) { 538 if (wh->type & DEV_ALL) 539 goto acc_check; 540 if ((wh->type & DEV_BLOCK) && !S_ISBLK(inode->i_mode)) 541 continue; 542 if ((wh->type & DEV_CHAR) && !S_ISCHR(inode->i_mode)) 543 continue; 544 if (wh->major != ~0 && wh->major != imajor(inode)) 545 continue; 546 if (wh->minor != ~0 && wh->minor != iminor(inode)) 547 continue; 548 acc_check: 549 if ((mask & MAY_WRITE) && !(wh->access & ACC_WRITE)) 550 continue; 551 if ((mask & MAY_READ) && !(wh->access & ACC_READ)) 552 continue; 553 spin_unlock(&dev_cgroup->lock); 554 return 0; 555 } 556 spin_unlock(&dev_cgroup->lock); 557 558 return -EPERM; 559 } 560 561 int devcgroup_inode_mknod(int mode, dev_t dev) 562 { 563 struct dev_cgroup *dev_cgroup; 564 struct dev_whitelist_item *wh; 565 566 dev_cgroup = css_to_devcgroup(task_subsys_state(current, 567 devices_subsys_id)); 568 if (!dev_cgroup) 569 return 0; 570 571 spin_lock(&dev_cgroup->lock); 572 list_for_each_entry(wh, &dev_cgroup->whitelist, list) { 573 if (wh->type & DEV_ALL) 574 goto acc_check; 575 if ((wh->type & DEV_BLOCK) && !S_ISBLK(mode)) 576 continue; 577 if ((wh->type & DEV_CHAR) && !S_ISCHR(mode)) 578 continue; 579 if (wh->major != ~0 && wh->major != MAJOR(dev)) 580 continue; 581 if (wh->minor != ~0 && wh->minor != MINOR(dev)) 582 continue; 583 acc_check: 584 if (!(wh->access & ACC_MKNOD)) 585 continue; 586 spin_unlock(&dev_cgroup->lock); 587 return 0; 588 } 589 spin_unlock(&dev_cgroup->lock); 590 return -EPERM; 591 } 592