1 /* 2 * dev_cgroup.c - device cgroup subsystem 3 * 4 * Copyright 2007 IBM Corp 5 */ 6 7 #include <linux/device_cgroup.h> 8 #include <linux/cgroup.h> 9 #include <linux/ctype.h> 10 #include <linux/list.h> 11 #include <linux/uaccess.h> 12 #include <linux/seq_file.h> 13 14 #define ACC_MKNOD 1 15 #define ACC_READ 2 16 #define ACC_WRITE 4 17 #define ACC_MASK (ACC_MKNOD | ACC_READ | ACC_WRITE) 18 19 #define DEV_BLOCK 1 20 #define DEV_CHAR 2 21 #define DEV_ALL 4 /* this represents all devices */ 22 23 /* 24 * whitelist locking rules: 25 * cgroup_lock() cannot be taken under dev_cgroup->lock. 26 * dev_cgroup->lock can be taken with or without cgroup_lock(). 27 * 28 * modifications always require cgroup_lock 29 * modifications to a list which is visible require the 30 * dev_cgroup->lock *and* cgroup_lock() 31 * walking the list requires dev_cgroup->lock or cgroup_lock(). 32 * 33 * reasoning: dev_whitelist_copy() needs to kmalloc, so needs 34 * a mutex, which the cgroup_lock() is. Since modifying 35 * a visible list requires both locks, either lock can be 36 * taken for walking the list. 37 */ 38 39 struct dev_whitelist_item { 40 u32 major, minor; 41 short type; 42 short access; 43 struct list_head list; 44 struct rcu_head rcu; 45 }; 46 47 struct dev_cgroup { 48 struct cgroup_subsys_state css; 49 struct list_head whitelist; 50 spinlock_t lock; 51 }; 52 53 static inline struct dev_cgroup *css_to_devcgroup(struct cgroup_subsys_state *s) 54 { 55 return container_of(s, struct dev_cgroup, css); 56 } 57 58 static inline struct dev_cgroup *cgroup_to_devcgroup(struct cgroup *cgroup) 59 { 60 return css_to_devcgroup(cgroup_subsys_state(cgroup, devices_subsys_id)); 61 } 62 63 static inline struct dev_cgroup *task_devcgroup(struct task_struct *task) 64 { 65 return css_to_devcgroup(task_subsys_state(task, devices_subsys_id)); 66 } 67 68 struct cgroup_subsys devices_subsys; 69 70 static int devcgroup_can_attach(struct cgroup_subsys *ss, 71 struct cgroup *new_cgroup, struct task_struct *task) 72 { 73 if (current != task && !capable(CAP_SYS_ADMIN)) 74 return -EPERM; 75 76 return 0; 77 } 78 79 /* 80 * called under cgroup_lock() 81 */ 82 static int dev_whitelist_copy(struct list_head *dest, struct list_head *orig) 83 { 84 struct dev_whitelist_item *wh, *tmp, *new; 85 86 list_for_each_entry(wh, orig, list) { 87 new = kmalloc(sizeof(*wh), GFP_KERNEL); 88 if (!new) 89 goto free_and_exit; 90 new->major = wh->major; 91 new->minor = wh->minor; 92 new->type = wh->type; 93 new->access = wh->access; 94 list_add_tail(&new->list, dest); 95 } 96 97 return 0; 98 99 free_and_exit: 100 list_for_each_entry_safe(wh, tmp, dest, list) { 101 list_del(&wh->list); 102 kfree(wh); 103 } 104 return -ENOMEM; 105 } 106 107 /* Stupid prototype - don't bother combining existing entries */ 108 /* 109 * called under cgroup_lock() 110 * since the list is visible to other tasks, we need the spinlock also 111 */ 112 static int dev_whitelist_add(struct dev_cgroup *dev_cgroup, 113 struct dev_whitelist_item *wh) 114 { 115 struct dev_whitelist_item *whcopy, *walk; 116 117 whcopy = kmalloc(sizeof(*whcopy), GFP_KERNEL); 118 if (!whcopy) 119 return -ENOMEM; 120 121 memcpy(whcopy, wh, sizeof(*whcopy)); 122 spin_lock(&dev_cgroup->lock); 123 list_for_each_entry(walk, &dev_cgroup->whitelist, list) { 124 if (walk->type != wh->type) 125 continue; 126 if (walk->major != wh->major) 127 continue; 128 if (walk->minor != wh->minor) 129 continue; 130 131 walk->access |= wh->access; 132 kfree(whcopy); 133 whcopy = NULL; 134 } 135 136 if (whcopy != NULL) 137 list_add_tail_rcu(&whcopy->list, &dev_cgroup->whitelist); 138 spin_unlock(&dev_cgroup->lock); 139 return 0; 140 } 141 142 static void whitelist_item_free(struct rcu_head *rcu) 143 { 144 struct dev_whitelist_item *item; 145 146 item = container_of(rcu, struct dev_whitelist_item, rcu); 147 kfree(item); 148 } 149 150 /* 151 * called under cgroup_lock() 152 * since the list is visible to other tasks, we need the spinlock also 153 */ 154 static void dev_whitelist_rm(struct dev_cgroup *dev_cgroup, 155 struct dev_whitelist_item *wh) 156 { 157 struct dev_whitelist_item *walk, *tmp; 158 159 spin_lock(&dev_cgroup->lock); 160 list_for_each_entry_safe(walk, tmp, &dev_cgroup->whitelist, list) { 161 if (walk->type == DEV_ALL) 162 goto remove; 163 if (walk->type != wh->type) 164 continue; 165 if (walk->major != ~0 && walk->major != wh->major) 166 continue; 167 if (walk->minor != ~0 && walk->minor != wh->minor) 168 continue; 169 170 remove: 171 walk->access &= ~wh->access; 172 if (!walk->access) { 173 list_del_rcu(&walk->list); 174 call_rcu(&walk->rcu, whitelist_item_free); 175 } 176 } 177 spin_unlock(&dev_cgroup->lock); 178 } 179 180 /* 181 * called from kernel/cgroup.c with cgroup_lock() held. 182 */ 183 static struct cgroup_subsys_state *devcgroup_create(struct cgroup_subsys *ss, 184 struct cgroup *cgroup) 185 { 186 struct dev_cgroup *dev_cgroup, *parent_dev_cgroup; 187 struct cgroup *parent_cgroup; 188 int ret; 189 190 dev_cgroup = kzalloc(sizeof(*dev_cgroup), GFP_KERNEL); 191 if (!dev_cgroup) 192 return ERR_PTR(-ENOMEM); 193 INIT_LIST_HEAD(&dev_cgroup->whitelist); 194 parent_cgroup = cgroup->parent; 195 196 if (parent_cgroup == NULL) { 197 struct dev_whitelist_item *wh; 198 wh = kmalloc(sizeof(*wh), GFP_KERNEL); 199 if (!wh) { 200 kfree(dev_cgroup); 201 return ERR_PTR(-ENOMEM); 202 } 203 wh->minor = wh->major = ~0; 204 wh->type = DEV_ALL; 205 wh->access = ACC_MASK; 206 list_add(&wh->list, &dev_cgroup->whitelist); 207 } else { 208 parent_dev_cgroup = cgroup_to_devcgroup(parent_cgroup); 209 ret = dev_whitelist_copy(&dev_cgroup->whitelist, 210 &parent_dev_cgroup->whitelist); 211 if (ret) { 212 kfree(dev_cgroup); 213 return ERR_PTR(ret); 214 } 215 } 216 217 spin_lock_init(&dev_cgroup->lock); 218 return &dev_cgroup->css; 219 } 220 221 static void devcgroup_destroy(struct cgroup_subsys *ss, 222 struct cgroup *cgroup) 223 { 224 struct dev_cgroup *dev_cgroup; 225 struct dev_whitelist_item *wh, *tmp; 226 227 dev_cgroup = cgroup_to_devcgroup(cgroup); 228 list_for_each_entry_safe(wh, tmp, &dev_cgroup->whitelist, list) { 229 list_del(&wh->list); 230 kfree(wh); 231 } 232 kfree(dev_cgroup); 233 } 234 235 #define DEVCG_ALLOW 1 236 #define DEVCG_DENY 2 237 #define DEVCG_LIST 3 238 239 #define MAJMINLEN 13 240 #define ACCLEN 4 241 242 static void set_access(char *acc, short access) 243 { 244 int idx = 0; 245 memset(acc, 0, ACCLEN); 246 if (access & ACC_READ) 247 acc[idx++] = 'r'; 248 if (access & ACC_WRITE) 249 acc[idx++] = 'w'; 250 if (access & ACC_MKNOD) 251 acc[idx++] = 'm'; 252 } 253 254 static char type_to_char(short type) 255 { 256 if (type == DEV_ALL) 257 return 'a'; 258 if (type == DEV_CHAR) 259 return 'c'; 260 if (type == DEV_BLOCK) 261 return 'b'; 262 return 'X'; 263 } 264 265 static void set_majmin(char *str, unsigned m) 266 { 267 if (m == ~0) 268 strcpy(str, "*"); 269 else 270 sprintf(str, "%u", m); 271 } 272 273 static int devcgroup_seq_read(struct cgroup *cgroup, struct cftype *cft, 274 struct seq_file *m) 275 { 276 struct dev_cgroup *devcgroup = cgroup_to_devcgroup(cgroup); 277 struct dev_whitelist_item *wh; 278 char maj[MAJMINLEN], min[MAJMINLEN], acc[ACCLEN]; 279 280 rcu_read_lock(); 281 list_for_each_entry_rcu(wh, &devcgroup->whitelist, list) { 282 set_access(acc, wh->access); 283 set_majmin(maj, wh->major); 284 set_majmin(min, wh->minor); 285 seq_printf(m, "%c %s:%s %s\n", type_to_char(wh->type), 286 maj, min, acc); 287 } 288 rcu_read_unlock(); 289 290 return 0; 291 } 292 293 /* 294 * may_access_whitelist: 295 * does the access granted to dev_cgroup c contain the access 296 * requested in whitelist item refwh. 297 * return 1 if yes, 0 if no. 298 * call with c->lock held 299 */ 300 static int may_access_whitelist(struct dev_cgroup *c, 301 struct dev_whitelist_item *refwh) 302 { 303 struct dev_whitelist_item *whitem; 304 305 list_for_each_entry(whitem, &c->whitelist, list) { 306 if (whitem->type & DEV_ALL) 307 return 1; 308 if ((refwh->type & DEV_BLOCK) && !(whitem->type & DEV_BLOCK)) 309 continue; 310 if ((refwh->type & DEV_CHAR) && !(whitem->type & DEV_CHAR)) 311 continue; 312 if (whitem->major != ~0 && whitem->major != refwh->major) 313 continue; 314 if (whitem->minor != ~0 && whitem->minor != refwh->minor) 315 continue; 316 if (refwh->access & (~whitem->access)) 317 continue; 318 return 1; 319 } 320 return 0; 321 } 322 323 /* 324 * parent_has_perm: 325 * when adding a new allow rule to a device whitelist, the rule 326 * must be allowed in the parent device 327 */ 328 static int parent_has_perm(struct dev_cgroup *childcg, 329 struct dev_whitelist_item *wh) 330 { 331 struct cgroup *pcg = childcg->css.cgroup->parent; 332 struct dev_cgroup *parent; 333 int ret; 334 335 if (!pcg) 336 return 1; 337 parent = cgroup_to_devcgroup(pcg); 338 spin_lock(&parent->lock); 339 ret = may_access_whitelist(parent, wh); 340 spin_unlock(&parent->lock); 341 return ret; 342 } 343 344 /* 345 * Modify the whitelist using allow/deny rules. 346 * CAP_SYS_ADMIN is needed for this. It's at least separate from CAP_MKNOD 347 * so we can give a container CAP_MKNOD to let it create devices but not 348 * modify the whitelist. 349 * It seems likely we'll want to add a CAP_CONTAINER capability to allow 350 * us to also grant CAP_SYS_ADMIN to containers without giving away the 351 * device whitelist controls, but for now we'll stick with CAP_SYS_ADMIN 352 * 353 * Taking rules away is always allowed (given CAP_SYS_ADMIN). Granting 354 * new access is only allowed if you're in the top-level cgroup, or your 355 * parent cgroup has the access you're asking for. 356 */ 357 static int devcgroup_update_access(struct dev_cgroup *devcgroup, 358 int filetype, const char *buffer) 359 { 360 struct dev_cgroup *cur_devcgroup; 361 const char *b; 362 char *endp; 363 int retval = 0, count; 364 struct dev_whitelist_item wh; 365 366 if (!capable(CAP_SYS_ADMIN)) 367 return -EPERM; 368 369 cur_devcgroup = task_devcgroup(current); 370 371 memset(&wh, 0, sizeof(wh)); 372 b = buffer; 373 374 switch (*b) { 375 case 'a': 376 wh.type = DEV_ALL; 377 wh.access = ACC_MASK; 378 wh.major = ~0; 379 wh.minor = ~0; 380 goto handle; 381 case 'b': 382 wh.type = DEV_BLOCK; 383 break; 384 case 'c': 385 wh.type = DEV_CHAR; 386 break; 387 default: 388 return -EINVAL; 389 } 390 b++; 391 if (!isspace(*b)) 392 return -EINVAL; 393 b++; 394 if (*b == '*') { 395 wh.major = ~0; 396 b++; 397 } else if (isdigit(*b)) { 398 wh.major = simple_strtoul(b, &endp, 10); 399 b = endp; 400 } else { 401 return -EINVAL; 402 } 403 if (*b != ':') 404 return -EINVAL; 405 b++; 406 407 /* read minor */ 408 if (*b == '*') { 409 wh.minor = ~0; 410 b++; 411 } else if (isdigit(*b)) { 412 wh.minor = simple_strtoul(b, &endp, 10); 413 b = endp; 414 } else { 415 return -EINVAL; 416 } 417 if (!isspace(*b)) 418 return -EINVAL; 419 for (b++, count = 0; count < 3; count++, b++) { 420 switch (*b) { 421 case 'r': 422 wh.access |= ACC_READ; 423 break; 424 case 'w': 425 wh.access |= ACC_WRITE; 426 break; 427 case 'm': 428 wh.access |= ACC_MKNOD; 429 break; 430 case '\n': 431 case '\0': 432 count = 3; 433 break; 434 default: 435 return -EINVAL; 436 } 437 } 438 439 handle: 440 retval = 0; 441 switch (filetype) { 442 case DEVCG_ALLOW: 443 if (!parent_has_perm(devcgroup, &wh)) 444 return -EPERM; 445 return dev_whitelist_add(devcgroup, &wh); 446 case DEVCG_DENY: 447 dev_whitelist_rm(devcgroup, &wh); 448 break; 449 default: 450 return -EINVAL; 451 } 452 return 0; 453 } 454 455 static int devcgroup_access_write(struct cgroup *cgrp, struct cftype *cft, 456 const char *buffer) 457 { 458 int retval; 459 if (!cgroup_lock_live_group(cgrp)) 460 return -ENODEV; 461 retval = devcgroup_update_access(cgroup_to_devcgroup(cgrp), 462 cft->private, buffer); 463 cgroup_unlock(); 464 return retval; 465 } 466 467 static struct cftype dev_cgroup_files[] = { 468 { 469 .name = "allow", 470 .write_string = devcgroup_access_write, 471 .private = DEVCG_ALLOW, 472 }, 473 { 474 .name = "deny", 475 .write_string = devcgroup_access_write, 476 .private = DEVCG_DENY, 477 }, 478 { 479 .name = "list", 480 .read_seq_string = devcgroup_seq_read, 481 .private = DEVCG_LIST, 482 }, 483 }; 484 485 static int devcgroup_populate(struct cgroup_subsys *ss, 486 struct cgroup *cgroup) 487 { 488 return cgroup_add_files(cgroup, ss, dev_cgroup_files, 489 ARRAY_SIZE(dev_cgroup_files)); 490 } 491 492 struct cgroup_subsys devices_subsys = { 493 .name = "devices", 494 .can_attach = devcgroup_can_attach, 495 .create = devcgroup_create, 496 .destroy = devcgroup_destroy, 497 .populate = devcgroup_populate, 498 .subsys_id = devices_subsys_id, 499 }; 500 501 int devcgroup_inode_permission(struct inode *inode, int mask) 502 { 503 struct dev_cgroup *dev_cgroup; 504 struct dev_whitelist_item *wh; 505 506 dev_t device = inode->i_rdev; 507 if (!device) 508 return 0; 509 if (!S_ISBLK(inode->i_mode) && !S_ISCHR(inode->i_mode)) 510 return 0; 511 dev_cgroup = css_to_devcgroup(task_subsys_state(current, 512 devices_subsys_id)); 513 if (!dev_cgroup) 514 return 0; 515 516 rcu_read_lock(); 517 list_for_each_entry_rcu(wh, &dev_cgroup->whitelist, list) { 518 if (wh->type & DEV_ALL) 519 goto acc_check; 520 if ((wh->type & DEV_BLOCK) && !S_ISBLK(inode->i_mode)) 521 continue; 522 if ((wh->type & DEV_CHAR) && !S_ISCHR(inode->i_mode)) 523 continue; 524 if (wh->major != ~0 && wh->major != imajor(inode)) 525 continue; 526 if (wh->minor != ~0 && wh->minor != iminor(inode)) 527 continue; 528 acc_check: 529 if ((mask & MAY_WRITE) && !(wh->access & ACC_WRITE)) 530 continue; 531 if ((mask & MAY_READ) && !(wh->access & ACC_READ)) 532 continue; 533 rcu_read_unlock(); 534 return 0; 535 } 536 rcu_read_unlock(); 537 538 return -EPERM; 539 } 540 541 int devcgroup_inode_mknod(int mode, dev_t dev) 542 { 543 struct dev_cgroup *dev_cgroup; 544 struct dev_whitelist_item *wh; 545 546 dev_cgroup = css_to_devcgroup(task_subsys_state(current, 547 devices_subsys_id)); 548 if (!dev_cgroup) 549 return 0; 550 551 rcu_read_lock(); 552 list_for_each_entry(wh, &dev_cgroup->whitelist, list) { 553 if (wh->type & DEV_ALL) 554 goto acc_check; 555 if ((wh->type & DEV_BLOCK) && !S_ISBLK(mode)) 556 continue; 557 if ((wh->type & DEV_CHAR) && !S_ISCHR(mode)) 558 continue; 559 if (wh->major != ~0 && wh->major != MAJOR(dev)) 560 continue; 561 if (wh->minor != ~0 && wh->minor != MINOR(dev)) 562 continue; 563 acc_check: 564 if (!(wh->access & ACC_MKNOD)) 565 continue; 566 rcu_read_unlock(); 567 return 0; 568 } 569 rcu_read_unlock(); 570 return -EPERM; 571 } 572