1 /* 2 * dev_cgroup.c - device cgroup subsystem 3 * 4 * Copyright 2007 IBM Corp 5 */ 6 7 #include <linux/device_cgroup.h> 8 #include <linux/cgroup.h> 9 #include <linux/ctype.h> 10 #include <linux/list.h> 11 #include <linux/uaccess.h> 12 #include <linux/seq_file.h> 13 14 #define ACC_MKNOD 1 15 #define ACC_READ 2 16 #define ACC_WRITE 4 17 #define ACC_MASK (ACC_MKNOD | ACC_READ | ACC_WRITE) 18 19 #define DEV_BLOCK 1 20 #define DEV_CHAR 2 21 #define DEV_ALL 4 /* this represents all devices */ 22 23 /* 24 * whitelist locking rules: 25 * cgroup_lock() cannot be taken under dev_cgroup->lock. 26 * dev_cgroup->lock can be taken with or without cgroup_lock(). 27 * 28 * modifications always require cgroup_lock 29 * modifications to a list which is visible require the 30 * dev_cgroup->lock *and* cgroup_lock() 31 * walking the list requires dev_cgroup->lock or cgroup_lock(). 32 * 33 * reasoning: dev_whitelist_copy() needs to kmalloc, so needs 34 * a mutex, which the cgroup_lock() is. Since modifying 35 * a visible list requires both locks, either lock can be 36 * taken for walking the list. 37 */ 38 39 struct dev_whitelist_item { 40 u32 major, minor; 41 short type; 42 short access; 43 struct list_head list; 44 }; 45 46 struct dev_cgroup { 47 struct cgroup_subsys_state css; 48 struct list_head whitelist; 49 spinlock_t lock; 50 }; 51 52 static inline struct dev_cgroup *cgroup_to_devcgroup(struct cgroup *cgroup) 53 { 54 return container_of(cgroup_subsys_state(cgroup, devices_subsys_id), 55 struct dev_cgroup, css); 56 } 57 58 struct cgroup_subsys devices_subsys; 59 60 static int devcgroup_can_attach(struct cgroup_subsys *ss, 61 struct cgroup *new_cgroup, struct task_struct *task) 62 { 63 if (current != task && !capable(CAP_SYS_ADMIN)) 64 return -EPERM; 65 66 return 0; 67 } 68 69 /* 70 * called under cgroup_lock() 71 */ 72 static int dev_whitelist_copy(struct list_head *dest, struct list_head *orig) 73 { 74 struct dev_whitelist_item *wh, *tmp, *new; 75 76 list_for_each_entry(wh, orig, list) { 77 new = kmalloc(sizeof(*wh), GFP_KERNEL); 78 if (!new) 79 goto free_and_exit; 80 new->major = wh->major; 81 new->minor = wh->minor; 82 new->type = wh->type; 83 new->access = wh->access; 84 list_add_tail(&new->list, dest); 85 } 86 87 return 0; 88 89 free_and_exit: 90 list_for_each_entry_safe(wh, tmp, dest, list) { 91 list_del(&wh->list); 92 kfree(wh); 93 } 94 return -ENOMEM; 95 } 96 97 /* Stupid prototype - don't bother combining existing entries */ 98 /* 99 * called under cgroup_lock() 100 * since the list is visible to other tasks, we need the spinlock also 101 */ 102 static int dev_whitelist_add(struct dev_cgroup *dev_cgroup, 103 struct dev_whitelist_item *wh) 104 { 105 struct dev_whitelist_item *whcopy; 106 107 whcopy = kmalloc(sizeof(*whcopy), GFP_KERNEL); 108 if (!whcopy) 109 return -ENOMEM; 110 111 memcpy(whcopy, wh, sizeof(*whcopy)); 112 spin_lock(&dev_cgroup->lock); 113 list_add_tail(&whcopy->list, &dev_cgroup->whitelist); 114 spin_unlock(&dev_cgroup->lock); 115 return 0; 116 } 117 118 /* 119 * called under cgroup_lock() 120 * since the list is visible to other tasks, we need the spinlock also 121 */ 122 static void dev_whitelist_rm(struct dev_cgroup *dev_cgroup, 123 struct dev_whitelist_item *wh) 124 { 125 struct dev_whitelist_item *walk, *tmp; 126 127 spin_lock(&dev_cgroup->lock); 128 list_for_each_entry_safe(walk, tmp, &dev_cgroup->whitelist, list) { 129 if (walk->type == DEV_ALL) 130 goto remove; 131 if (walk->type != wh->type) 132 continue; 133 if (walk->major != ~0 && walk->major != wh->major) 134 continue; 135 if (walk->minor != ~0 && walk->minor != wh->minor) 136 continue; 137 138 remove: 139 walk->access &= ~wh->access; 140 if (!walk->access) { 141 list_del(&walk->list); 142 kfree(walk); 143 } 144 } 145 spin_unlock(&dev_cgroup->lock); 146 } 147 148 /* 149 * called from kernel/cgroup.c with cgroup_lock() held. 150 */ 151 static struct cgroup_subsys_state *devcgroup_create(struct cgroup_subsys *ss, 152 struct cgroup *cgroup) 153 { 154 struct dev_cgroup *dev_cgroup, *parent_dev_cgroup; 155 struct cgroup *parent_cgroup; 156 int ret; 157 158 dev_cgroup = kzalloc(sizeof(*dev_cgroup), GFP_KERNEL); 159 if (!dev_cgroup) 160 return ERR_PTR(-ENOMEM); 161 INIT_LIST_HEAD(&dev_cgroup->whitelist); 162 parent_cgroup = cgroup->parent; 163 164 if (parent_cgroup == NULL) { 165 struct dev_whitelist_item *wh; 166 wh = kmalloc(sizeof(*wh), GFP_KERNEL); 167 if (!wh) { 168 kfree(dev_cgroup); 169 return ERR_PTR(-ENOMEM); 170 } 171 wh->minor = wh->major = ~0; 172 wh->type = DEV_ALL; 173 wh->access = ACC_MKNOD | ACC_READ | ACC_WRITE; 174 list_add(&wh->list, &dev_cgroup->whitelist); 175 } else { 176 parent_dev_cgroup = cgroup_to_devcgroup(parent_cgroup); 177 ret = dev_whitelist_copy(&dev_cgroup->whitelist, 178 &parent_dev_cgroup->whitelist); 179 if (ret) { 180 kfree(dev_cgroup); 181 return ERR_PTR(ret); 182 } 183 } 184 185 spin_lock_init(&dev_cgroup->lock); 186 return &dev_cgroup->css; 187 } 188 189 static void devcgroup_destroy(struct cgroup_subsys *ss, 190 struct cgroup *cgroup) 191 { 192 struct dev_cgroup *dev_cgroup; 193 struct dev_whitelist_item *wh, *tmp; 194 195 dev_cgroup = cgroup_to_devcgroup(cgroup); 196 list_for_each_entry_safe(wh, tmp, &dev_cgroup->whitelist, list) { 197 list_del(&wh->list); 198 kfree(wh); 199 } 200 kfree(dev_cgroup); 201 } 202 203 #define DEVCG_ALLOW 1 204 #define DEVCG_DENY 2 205 #define DEVCG_LIST 3 206 207 #define MAJMINLEN 10 208 #define ACCLEN 4 209 210 static void set_access(char *acc, short access) 211 { 212 int idx = 0; 213 memset(acc, 0, ACCLEN); 214 if (access & ACC_READ) 215 acc[idx++] = 'r'; 216 if (access & ACC_WRITE) 217 acc[idx++] = 'w'; 218 if (access & ACC_MKNOD) 219 acc[idx++] = 'm'; 220 } 221 222 static char type_to_char(short type) 223 { 224 if (type == DEV_ALL) 225 return 'a'; 226 if (type == DEV_CHAR) 227 return 'c'; 228 if (type == DEV_BLOCK) 229 return 'b'; 230 return 'X'; 231 } 232 233 static void set_majmin(char *str, unsigned m) 234 { 235 memset(str, 0, MAJMINLEN); 236 if (m == ~0) 237 sprintf(str, "*"); 238 else 239 snprintf(str, MAJMINLEN, "%d", m); 240 } 241 242 static int devcgroup_seq_read(struct cgroup *cgroup, struct cftype *cft, 243 struct seq_file *m) 244 { 245 struct dev_cgroup *devcgroup = cgroup_to_devcgroup(cgroup); 246 struct dev_whitelist_item *wh; 247 char maj[MAJMINLEN], min[MAJMINLEN], acc[ACCLEN]; 248 249 spin_lock(&devcgroup->lock); 250 list_for_each_entry(wh, &devcgroup->whitelist, list) { 251 set_access(acc, wh->access); 252 set_majmin(maj, wh->major); 253 set_majmin(min, wh->minor); 254 seq_printf(m, "%c %s:%s %s\n", type_to_char(wh->type), 255 maj, min, acc); 256 } 257 spin_unlock(&devcgroup->lock); 258 259 return 0; 260 } 261 262 /* 263 * may_access_whitelist: 264 * does the access granted to dev_cgroup c contain the access 265 * requested in whitelist item refwh. 266 * return 1 if yes, 0 if no. 267 * call with c->lock held 268 */ 269 static int may_access_whitelist(struct dev_cgroup *c, 270 struct dev_whitelist_item *refwh) 271 { 272 struct dev_whitelist_item *whitem; 273 274 list_for_each_entry(whitem, &c->whitelist, list) { 275 if (whitem->type & DEV_ALL) 276 return 1; 277 if ((refwh->type & DEV_BLOCK) && !(whitem->type & DEV_BLOCK)) 278 continue; 279 if ((refwh->type & DEV_CHAR) && !(whitem->type & DEV_CHAR)) 280 continue; 281 if (whitem->major != ~0 && whitem->major != refwh->major) 282 continue; 283 if (whitem->minor != ~0 && whitem->minor != refwh->minor) 284 continue; 285 if (refwh->access & (~(whitem->access | ACC_MASK))) 286 continue; 287 return 1; 288 } 289 return 0; 290 } 291 292 /* 293 * parent_has_perm: 294 * when adding a new allow rule to a device whitelist, the rule 295 * must be allowed in the parent device 296 */ 297 static int parent_has_perm(struct cgroup *childcg, 298 struct dev_whitelist_item *wh) 299 { 300 struct cgroup *pcg = childcg->parent; 301 struct dev_cgroup *parent; 302 int ret; 303 304 if (!pcg) 305 return 1; 306 parent = cgroup_to_devcgroup(pcg); 307 spin_lock(&parent->lock); 308 ret = may_access_whitelist(parent, wh); 309 spin_unlock(&parent->lock); 310 return ret; 311 } 312 313 /* 314 * Modify the whitelist using allow/deny rules. 315 * CAP_SYS_ADMIN is needed for this. It's at least separate from CAP_MKNOD 316 * so we can give a container CAP_MKNOD to let it create devices but not 317 * modify the whitelist. 318 * It seems likely we'll want to add a CAP_CONTAINER capability to allow 319 * us to also grant CAP_SYS_ADMIN to containers without giving away the 320 * device whitelist controls, but for now we'll stick with CAP_SYS_ADMIN 321 * 322 * Taking rules away is always allowed (given CAP_SYS_ADMIN). Granting 323 * new access is only allowed if you're in the top-level cgroup, or your 324 * parent cgroup has the access you're asking for. 325 */ 326 static ssize_t devcgroup_access_write(struct cgroup *cgroup, struct cftype *cft, 327 struct file *file, const char __user *userbuf, 328 size_t nbytes, loff_t *ppos) 329 { 330 struct cgroup *cur_cgroup; 331 struct dev_cgroup *devcgroup, *cur_devcgroup; 332 int filetype = cft->private; 333 char *buffer, *b; 334 int retval = 0, count; 335 struct dev_whitelist_item wh; 336 337 if (!capable(CAP_SYS_ADMIN)) 338 return -EPERM; 339 340 devcgroup = cgroup_to_devcgroup(cgroup); 341 cur_cgroup = task_cgroup(current, devices_subsys.subsys_id); 342 cur_devcgroup = cgroup_to_devcgroup(cur_cgroup); 343 344 buffer = kmalloc(nbytes+1, GFP_KERNEL); 345 if (!buffer) 346 return -ENOMEM; 347 348 if (copy_from_user(buffer, userbuf, nbytes)) { 349 retval = -EFAULT; 350 goto out1; 351 } 352 buffer[nbytes] = 0; /* nul-terminate */ 353 354 cgroup_lock(); 355 if (cgroup_is_removed(cgroup)) { 356 retval = -ENODEV; 357 goto out2; 358 } 359 360 memset(&wh, 0, sizeof(wh)); 361 b = buffer; 362 363 switch (*b) { 364 case 'a': 365 wh.type = DEV_ALL; 366 wh.access = ACC_MASK; 367 goto handle; 368 case 'b': 369 wh.type = DEV_BLOCK; 370 break; 371 case 'c': 372 wh.type = DEV_CHAR; 373 break; 374 default: 375 retval = -EINVAL; 376 goto out2; 377 } 378 b++; 379 if (!isspace(*b)) { 380 retval = -EINVAL; 381 goto out2; 382 } 383 b++; 384 if (*b == '*') { 385 wh.major = ~0; 386 b++; 387 } else if (isdigit(*b)) { 388 wh.major = 0; 389 while (isdigit(*b)) { 390 wh.major = wh.major*10+(*b-'0'); 391 b++; 392 } 393 } else { 394 retval = -EINVAL; 395 goto out2; 396 } 397 if (*b != ':') { 398 retval = -EINVAL; 399 goto out2; 400 } 401 b++; 402 403 /* read minor */ 404 if (*b == '*') { 405 wh.minor = ~0; 406 b++; 407 } else if (isdigit(*b)) { 408 wh.minor = 0; 409 while (isdigit(*b)) { 410 wh.minor = wh.minor*10+(*b-'0'); 411 b++; 412 } 413 } else { 414 retval = -EINVAL; 415 goto out2; 416 } 417 if (!isspace(*b)) { 418 retval = -EINVAL; 419 goto out2; 420 } 421 for (b++, count = 0; count < 3; count++, b++) { 422 switch (*b) { 423 case 'r': 424 wh.access |= ACC_READ; 425 break; 426 case 'w': 427 wh.access |= ACC_WRITE; 428 break; 429 case 'm': 430 wh.access |= ACC_MKNOD; 431 break; 432 case '\n': 433 case '\0': 434 count = 3; 435 break; 436 default: 437 retval = -EINVAL; 438 goto out2; 439 } 440 } 441 442 handle: 443 retval = 0; 444 switch (filetype) { 445 case DEVCG_ALLOW: 446 if (!parent_has_perm(cgroup, &wh)) 447 retval = -EPERM; 448 else 449 retval = dev_whitelist_add(devcgroup, &wh); 450 break; 451 case DEVCG_DENY: 452 dev_whitelist_rm(devcgroup, &wh); 453 break; 454 default: 455 retval = -EINVAL; 456 goto out2; 457 } 458 459 if (retval == 0) 460 retval = nbytes; 461 462 out2: 463 cgroup_unlock(); 464 out1: 465 kfree(buffer); 466 return retval; 467 } 468 469 static struct cftype dev_cgroup_files[] = { 470 { 471 .name = "allow", 472 .write = devcgroup_access_write, 473 .private = DEVCG_ALLOW, 474 }, 475 { 476 .name = "deny", 477 .write = devcgroup_access_write, 478 .private = DEVCG_DENY, 479 }, 480 { 481 .name = "list", 482 .read_seq_string = devcgroup_seq_read, 483 .private = DEVCG_LIST, 484 }, 485 }; 486 487 static int devcgroup_populate(struct cgroup_subsys *ss, 488 struct cgroup *cgroup) 489 { 490 return cgroup_add_files(cgroup, ss, dev_cgroup_files, 491 ARRAY_SIZE(dev_cgroup_files)); 492 } 493 494 struct cgroup_subsys devices_subsys = { 495 .name = "devices", 496 .can_attach = devcgroup_can_attach, 497 .create = devcgroup_create, 498 .destroy = devcgroup_destroy, 499 .populate = devcgroup_populate, 500 .subsys_id = devices_subsys_id, 501 }; 502 503 int devcgroup_inode_permission(struct inode *inode, int mask) 504 { 505 struct cgroup *cgroup; 506 struct dev_cgroup *dev_cgroup; 507 struct dev_whitelist_item *wh; 508 509 dev_t device = inode->i_rdev; 510 if (!device) 511 return 0; 512 if (!S_ISBLK(inode->i_mode) && !S_ISCHR(inode->i_mode)) 513 return 0; 514 cgroup = task_cgroup(current, devices_subsys.subsys_id); 515 dev_cgroup = cgroup_to_devcgroup(cgroup); 516 if (!dev_cgroup) 517 return 0; 518 519 spin_lock(&dev_cgroup->lock); 520 list_for_each_entry(wh, &dev_cgroup->whitelist, list) { 521 if (wh->type & DEV_ALL) 522 goto acc_check; 523 if ((wh->type & DEV_BLOCK) && !S_ISBLK(inode->i_mode)) 524 continue; 525 if ((wh->type & DEV_CHAR) && !S_ISCHR(inode->i_mode)) 526 continue; 527 if (wh->major != ~0 && wh->major != imajor(inode)) 528 continue; 529 if (wh->minor != ~0 && wh->minor != iminor(inode)) 530 continue; 531 acc_check: 532 if ((mask & MAY_WRITE) && !(wh->access & ACC_WRITE)) 533 continue; 534 if ((mask & MAY_READ) && !(wh->access & ACC_READ)) 535 continue; 536 spin_unlock(&dev_cgroup->lock); 537 return 0; 538 } 539 spin_unlock(&dev_cgroup->lock); 540 541 return -EPERM; 542 } 543 544 int devcgroup_inode_mknod(int mode, dev_t dev) 545 { 546 struct cgroup *cgroup; 547 struct dev_cgroup *dev_cgroup; 548 struct dev_whitelist_item *wh; 549 550 cgroup = task_cgroup(current, devices_subsys.subsys_id); 551 dev_cgroup = cgroup_to_devcgroup(cgroup); 552 if (!dev_cgroup) 553 return 0; 554 555 spin_lock(&dev_cgroup->lock); 556 list_for_each_entry(wh, &dev_cgroup->whitelist, list) { 557 if (wh->type & DEV_ALL) 558 goto acc_check; 559 if ((wh->type & DEV_BLOCK) && !S_ISBLK(mode)) 560 continue; 561 if ((wh->type & DEV_CHAR) && !S_ISCHR(mode)) 562 continue; 563 if (wh->major != ~0 && wh->major != MAJOR(dev)) 564 continue; 565 if (wh->minor != ~0 && wh->minor != MINOR(dev)) 566 continue; 567 acc_check: 568 if (!(wh->access & ACC_MKNOD)) 569 continue; 570 spin_unlock(&dev_cgroup->lock); 571 return 0; 572 } 573 spin_unlock(&dev_cgroup->lock); 574 return -EPERM; 575 } 576