1 /* 2 * device_cgroup.c - device cgroup subsystem 3 * 4 * Copyright 2007 IBM Corp 5 */ 6 7 #include <linux/device_cgroup.h> 8 #include <linux/cgroup.h> 9 #include <linux/ctype.h> 10 #include <linux/list.h> 11 #include <linux/uaccess.h> 12 #include <linux/seq_file.h> 13 #include <linux/rcupdate.h> 14 #include <linux/mutex.h> 15 16 #define ACC_MKNOD 1 17 #define ACC_READ 2 18 #define ACC_WRITE 4 19 #define ACC_MASK (ACC_MKNOD | ACC_READ | ACC_WRITE) 20 21 #define DEV_BLOCK 1 22 #define DEV_CHAR 2 23 #define DEV_ALL 4 /* this represents all devices */ 24 25 static DEFINE_MUTEX(devcgroup_mutex); 26 27 /* 28 * whitelist locking rules: 29 * hold devcgroup_mutex for update/read. 30 * hold rcu_read_lock() for read. 31 */ 32 33 struct dev_whitelist_item { 34 u32 major, minor; 35 short type; 36 short access; 37 struct list_head list; 38 struct rcu_head rcu; 39 }; 40 41 struct dev_cgroup { 42 struct cgroup_subsys_state css; 43 struct list_head whitelist; 44 }; 45 46 static inline struct dev_cgroup *css_to_devcgroup(struct cgroup_subsys_state *s) 47 { 48 return container_of(s, struct dev_cgroup, css); 49 } 50 51 static inline struct dev_cgroup *cgroup_to_devcgroup(struct cgroup *cgroup) 52 { 53 return css_to_devcgroup(cgroup_subsys_state(cgroup, devices_subsys_id)); 54 } 55 56 static inline struct dev_cgroup *task_devcgroup(struct task_struct *task) 57 { 58 return css_to_devcgroup(task_subsys_state(task, devices_subsys_id)); 59 } 60 61 struct cgroup_subsys devices_subsys; 62 63 static int devcgroup_can_attach(struct cgroup_subsys *ss, 64 struct cgroup *new_cgroup, struct task_struct *task) 65 { 66 if (current != task && !capable(CAP_SYS_ADMIN)) 67 return -EPERM; 68 69 return 0; 70 } 71 72 /* 73 * called under devcgroup_mutex 74 */ 75 static int dev_whitelist_copy(struct list_head *dest, struct list_head *orig) 76 { 77 struct dev_whitelist_item *wh, *tmp, *new; 78 79 list_for_each_entry(wh, orig, list) { 80 new = kmemdup(wh, sizeof(*wh), GFP_KERNEL); 81 if (!new) 82 goto free_and_exit; 83 list_add_tail(&new->list, dest); 84 } 85 86 return 0; 87 88 free_and_exit: 89 list_for_each_entry_safe(wh, tmp, dest, list) { 90 list_del(&wh->list); 91 kfree(wh); 92 } 93 return -ENOMEM; 94 } 95 96 /* Stupid prototype - don't bother combining existing entries */ 97 /* 98 * called under devcgroup_mutex 99 */ 100 static int dev_whitelist_add(struct dev_cgroup *dev_cgroup, 101 struct dev_whitelist_item *wh) 102 { 103 struct dev_whitelist_item *whcopy, *walk; 104 105 whcopy = kmemdup(wh, sizeof(*wh), GFP_KERNEL); 106 if (!whcopy) 107 return -ENOMEM; 108 109 list_for_each_entry(walk, &dev_cgroup->whitelist, list) { 110 if (walk->type != wh->type) 111 continue; 112 if (walk->major != wh->major) 113 continue; 114 if (walk->minor != wh->minor) 115 continue; 116 117 walk->access |= wh->access; 118 kfree(whcopy); 119 whcopy = NULL; 120 } 121 122 if (whcopy != NULL) 123 list_add_tail_rcu(&whcopy->list, &dev_cgroup->whitelist); 124 return 0; 125 } 126 127 static void whitelist_item_free(struct rcu_head *rcu) 128 { 129 struct dev_whitelist_item *item; 130 131 item = container_of(rcu, struct dev_whitelist_item, rcu); 132 kfree(item); 133 } 134 135 /* 136 * called under devcgroup_mutex 137 */ 138 static void dev_whitelist_rm(struct dev_cgroup *dev_cgroup, 139 struct dev_whitelist_item *wh) 140 { 141 struct dev_whitelist_item *walk, *tmp; 142 143 list_for_each_entry_safe(walk, tmp, &dev_cgroup->whitelist, list) { 144 if (walk->type == DEV_ALL) 145 goto remove; 146 if (walk->type != wh->type) 147 continue; 148 if (walk->major != ~0 && walk->major != wh->major) 149 continue; 150 if (walk->minor != ~0 && walk->minor != wh->minor) 151 continue; 152 153 remove: 154 walk->access &= ~wh->access; 155 if (!walk->access) { 156 list_del_rcu(&walk->list); 157 call_rcu(&walk->rcu, whitelist_item_free); 158 } 159 } 160 } 161 162 /* 163 * called from kernel/cgroup.c with cgroup_lock() held. 164 */ 165 static struct cgroup_subsys_state *devcgroup_create(struct cgroup_subsys *ss, 166 struct cgroup *cgroup) 167 { 168 struct dev_cgroup *dev_cgroup, *parent_dev_cgroup; 169 struct cgroup *parent_cgroup; 170 int ret; 171 172 dev_cgroup = kzalloc(sizeof(*dev_cgroup), GFP_KERNEL); 173 if (!dev_cgroup) 174 return ERR_PTR(-ENOMEM); 175 INIT_LIST_HEAD(&dev_cgroup->whitelist); 176 parent_cgroup = cgroup->parent; 177 178 if (parent_cgroup == NULL) { 179 struct dev_whitelist_item *wh; 180 wh = kmalloc(sizeof(*wh), GFP_KERNEL); 181 if (!wh) { 182 kfree(dev_cgroup); 183 return ERR_PTR(-ENOMEM); 184 } 185 wh->minor = wh->major = ~0; 186 wh->type = DEV_ALL; 187 wh->access = ACC_MASK; 188 list_add(&wh->list, &dev_cgroup->whitelist); 189 } else { 190 parent_dev_cgroup = cgroup_to_devcgroup(parent_cgroup); 191 mutex_lock(&devcgroup_mutex); 192 ret = dev_whitelist_copy(&dev_cgroup->whitelist, 193 &parent_dev_cgroup->whitelist); 194 mutex_unlock(&devcgroup_mutex); 195 if (ret) { 196 kfree(dev_cgroup); 197 return ERR_PTR(ret); 198 } 199 } 200 201 return &dev_cgroup->css; 202 } 203 204 static void devcgroup_destroy(struct cgroup_subsys *ss, 205 struct cgroup *cgroup) 206 { 207 struct dev_cgroup *dev_cgroup; 208 struct dev_whitelist_item *wh, *tmp; 209 210 dev_cgroup = cgroup_to_devcgroup(cgroup); 211 list_for_each_entry_safe(wh, tmp, &dev_cgroup->whitelist, list) { 212 list_del(&wh->list); 213 kfree(wh); 214 } 215 kfree(dev_cgroup); 216 } 217 218 #define DEVCG_ALLOW 1 219 #define DEVCG_DENY 2 220 #define DEVCG_LIST 3 221 222 #define MAJMINLEN 13 223 #define ACCLEN 4 224 225 static void set_access(char *acc, short access) 226 { 227 int idx = 0; 228 memset(acc, 0, ACCLEN); 229 if (access & ACC_READ) 230 acc[idx++] = 'r'; 231 if (access & ACC_WRITE) 232 acc[idx++] = 'w'; 233 if (access & ACC_MKNOD) 234 acc[idx++] = 'm'; 235 } 236 237 static char type_to_char(short type) 238 { 239 if (type == DEV_ALL) 240 return 'a'; 241 if (type == DEV_CHAR) 242 return 'c'; 243 if (type == DEV_BLOCK) 244 return 'b'; 245 return 'X'; 246 } 247 248 static void set_majmin(char *str, unsigned m) 249 { 250 if (m == ~0) 251 strcpy(str, "*"); 252 else 253 sprintf(str, "%u", m); 254 } 255 256 static int devcgroup_seq_read(struct cgroup *cgroup, struct cftype *cft, 257 struct seq_file *m) 258 { 259 struct dev_cgroup *devcgroup = cgroup_to_devcgroup(cgroup); 260 struct dev_whitelist_item *wh; 261 char maj[MAJMINLEN], min[MAJMINLEN], acc[ACCLEN]; 262 263 rcu_read_lock(); 264 list_for_each_entry_rcu(wh, &devcgroup->whitelist, list) { 265 set_access(acc, wh->access); 266 set_majmin(maj, wh->major); 267 set_majmin(min, wh->minor); 268 seq_printf(m, "%c %s:%s %s\n", type_to_char(wh->type), 269 maj, min, acc); 270 } 271 rcu_read_unlock(); 272 273 return 0; 274 } 275 276 /* 277 * may_access_whitelist: 278 * does the access granted to dev_cgroup c contain the access 279 * requested in whitelist item refwh. 280 * return 1 if yes, 0 if no. 281 * call with devcgroup_mutex held 282 */ 283 static int may_access_whitelist(struct dev_cgroup *c, 284 struct dev_whitelist_item *refwh) 285 { 286 struct dev_whitelist_item *whitem; 287 288 list_for_each_entry(whitem, &c->whitelist, list) { 289 if (whitem->type & DEV_ALL) 290 return 1; 291 if ((refwh->type & DEV_BLOCK) && !(whitem->type & DEV_BLOCK)) 292 continue; 293 if ((refwh->type & DEV_CHAR) && !(whitem->type & DEV_CHAR)) 294 continue; 295 if (whitem->major != ~0 && whitem->major != refwh->major) 296 continue; 297 if (whitem->minor != ~0 && whitem->minor != refwh->minor) 298 continue; 299 if (refwh->access & (~whitem->access)) 300 continue; 301 return 1; 302 } 303 return 0; 304 } 305 306 /* 307 * parent_has_perm: 308 * when adding a new allow rule to a device whitelist, the rule 309 * must be allowed in the parent device 310 */ 311 static int parent_has_perm(struct dev_cgroup *childcg, 312 struct dev_whitelist_item *wh) 313 { 314 struct cgroup *pcg = childcg->css.cgroup->parent; 315 struct dev_cgroup *parent; 316 317 if (!pcg) 318 return 1; 319 parent = cgroup_to_devcgroup(pcg); 320 return may_access_whitelist(parent, wh); 321 } 322 323 /* 324 * Modify the whitelist using allow/deny rules. 325 * CAP_SYS_ADMIN is needed for this. It's at least separate from CAP_MKNOD 326 * so we can give a container CAP_MKNOD to let it create devices but not 327 * modify the whitelist. 328 * It seems likely we'll want to add a CAP_CONTAINER capability to allow 329 * us to also grant CAP_SYS_ADMIN to containers without giving away the 330 * device whitelist controls, but for now we'll stick with CAP_SYS_ADMIN 331 * 332 * Taking rules away is always allowed (given CAP_SYS_ADMIN). Granting 333 * new access is only allowed if you're in the top-level cgroup, or your 334 * parent cgroup has the access you're asking for. 335 */ 336 static int devcgroup_update_access(struct dev_cgroup *devcgroup, 337 int filetype, const char *buffer) 338 { 339 const char *b; 340 char *endp; 341 int count; 342 struct dev_whitelist_item wh; 343 344 if (!capable(CAP_SYS_ADMIN)) 345 return -EPERM; 346 347 memset(&wh, 0, sizeof(wh)); 348 b = buffer; 349 350 switch (*b) { 351 case 'a': 352 wh.type = DEV_ALL; 353 wh.access = ACC_MASK; 354 wh.major = ~0; 355 wh.minor = ~0; 356 goto handle; 357 case 'b': 358 wh.type = DEV_BLOCK; 359 break; 360 case 'c': 361 wh.type = DEV_CHAR; 362 break; 363 default: 364 return -EINVAL; 365 } 366 b++; 367 if (!isspace(*b)) 368 return -EINVAL; 369 b++; 370 if (*b == '*') { 371 wh.major = ~0; 372 b++; 373 } else if (isdigit(*b)) { 374 wh.major = simple_strtoul(b, &endp, 10); 375 b = endp; 376 } else { 377 return -EINVAL; 378 } 379 if (*b != ':') 380 return -EINVAL; 381 b++; 382 383 /* read minor */ 384 if (*b == '*') { 385 wh.minor = ~0; 386 b++; 387 } else if (isdigit(*b)) { 388 wh.minor = simple_strtoul(b, &endp, 10); 389 b = endp; 390 } else { 391 return -EINVAL; 392 } 393 if (!isspace(*b)) 394 return -EINVAL; 395 for (b++, count = 0; count < 3; count++, b++) { 396 switch (*b) { 397 case 'r': 398 wh.access |= ACC_READ; 399 break; 400 case 'w': 401 wh.access |= ACC_WRITE; 402 break; 403 case 'm': 404 wh.access |= ACC_MKNOD; 405 break; 406 case '\n': 407 case '\0': 408 count = 3; 409 break; 410 default: 411 return -EINVAL; 412 } 413 } 414 415 handle: 416 switch (filetype) { 417 case DEVCG_ALLOW: 418 if (!parent_has_perm(devcgroup, &wh)) 419 return -EPERM; 420 return dev_whitelist_add(devcgroup, &wh); 421 case DEVCG_DENY: 422 dev_whitelist_rm(devcgroup, &wh); 423 break; 424 default: 425 return -EINVAL; 426 } 427 return 0; 428 } 429 430 static int devcgroup_access_write(struct cgroup *cgrp, struct cftype *cft, 431 const char *buffer) 432 { 433 int retval; 434 435 mutex_lock(&devcgroup_mutex); 436 retval = devcgroup_update_access(cgroup_to_devcgroup(cgrp), 437 cft->private, buffer); 438 mutex_unlock(&devcgroup_mutex); 439 return retval; 440 } 441 442 static struct cftype dev_cgroup_files[] = { 443 { 444 .name = "allow", 445 .write_string = devcgroup_access_write, 446 .private = DEVCG_ALLOW, 447 }, 448 { 449 .name = "deny", 450 .write_string = devcgroup_access_write, 451 .private = DEVCG_DENY, 452 }, 453 { 454 .name = "list", 455 .read_seq_string = devcgroup_seq_read, 456 .private = DEVCG_LIST, 457 }, 458 }; 459 460 static int devcgroup_populate(struct cgroup_subsys *ss, 461 struct cgroup *cgroup) 462 { 463 return cgroup_add_files(cgroup, ss, dev_cgroup_files, 464 ARRAY_SIZE(dev_cgroup_files)); 465 } 466 467 struct cgroup_subsys devices_subsys = { 468 .name = "devices", 469 .can_attach = devcgroup_can_attach, 470 .create = devcgroup_create, 471 .destroy = devcgroup_destroy, 472 .populate = devcgroup_populate, 473 .subsys_id = devices_subsys_id, 474 }; 475 476 int devcgroup_inode_permission(struct inode *inode, int mask) 477 { 478 struct dev_cgroup *dev_cgroup; 479 struct dev_whitelist_item *wh; 480 481 dev_t device = inode->i_rdev; 482 if (!device) 483 return 0; 484 if (!S_ISBLK(inode->i_mode) && !S_ISCHR(inode->i_mode)) 485 return 0; 486 487 rcu_read_lock(); 488 489 dev_cgroup = task_devcgroup(current); 490 491 list_for_each_entry_rcu(wh, &dev_cgroup->whitelist, list) { 492 if (wh->type & DEV_ALL) 493 goto found; 494 if ((wh->type & DEV_BLOCK) && !S_ISBLK(inode->i_mode)) 495 continue; 496 if ((wh->type & DEV_CHAR) && !S_ISCHR(inode->i_mode)) 497 continue; 498 if (wh->major != ~0 && wh->major != imajor(inode)) 499 continue; 500 if (wh->minor != ~0 && wh->minor != iminor(inode)) 501 continue; 502 503 if ((mask & MAY_WRITE) && !(wh->access & ACC_WRITE)) 504 continue; 505 if ((mask & MAY_READ) && !(wh->access & ACC_READ)) 506 continue; 507 found: 508 rcu_read_unlock(); 509 return 0; 510 } 511 512 rcu_read_unlock(); 513 514 return -EPERM; 515 } 516 517 int devcgroup_inode_mknod(int mode, dev_t dev) 518 { 519 struct dev_cgroup *dev_cgroup; 520 struct dev_whitelist_item *wh; 521 522 if (!S_ISBLK(mode) && !S_ISCHR(mode)) 523 return 0; 524 525 rcu_read_lock(); 526 527 dev_cgroup = task_devcgroup(current); 528 529 list_for_each_entry_rcu(wh, &dev_cgroup->whitelist, list) { 530 if (wh->type & DEV_ALL) 531 goto found; 532 if ((wh->type & DEV_BLOCK) && !S_ISBLK(mode)) 533 continue; 534 if ((wh->type & DEV_CHAR) && !S_ISCHR(mode)) 535 continue; 536 if (wh->major != ~0 && wh->major != MAJOR(dev)) 537 continue; 538 if (wh->minor != ~0 && wh->minor != MINOR(dev)) 539 continue; 540 541 if (!(wh->access & ACC_MKNOD)) 542 continue; 543 found: 544 rcu_read_unlock(); 545 return 0; 546 } 547 548 rcu_read_unlock(); 549 550 return -EPERM; 551 } 552