1 /* 2 * device_cgroup.c - device cgroup subsystem 3 * 4 * Copyright 2007 IBM Corp 5 */ 6 7 #include <linux/device_cgroup.h> 8 #include <linux/cgroup.h> 9 #include <linux/ctype.h> 10 #include <linux/list.h> 11 #include <linux/uaccess.h> 12 #include <linux/seq_file.h> 13 #include <linux/slab.h> 14 #include <linux/rcupdate.h> 15 #include <linux/mutex.h> 16 17 #define ACC_MKNOD 1 18 #define ACC_READ 2 19 #define ACC_WRITE 4 20 #define ACC_MASK (ACC_MKNOD | ACC_READ | ACC_WRITE) 21 22 #define DEV_BLOCK 1 23 #define DEV_CHAR 2 24 #define DEV_ALL 4 /* this represents all devices */ 25 26 static DEFINE_MUTEX(devcgroup_mutex); 27 28 /* 29 * whitelist locking rules: 30 * hold devcgroup_mutex for update/read. 31 * hold rcu_read_lock() for read. 32 */ 33 34 struct dev_whitelist_item { 35 u32 major, minor; 36 short type; 37 short access; 38 struct list_head list; 39 struct rcu_head rcu; 40 }; 41 42 struct dev_cgroup { 43 struct cgroup_subsys_state css; 44 struct list_head whitelist; 45 }; 46 47 static inline struct dev_cgroup *css_to_devcgroup(struct cgroup_subsys_state *s) 48 { 49 return container_of(s, struct dev_cgroup, css); 50 } 51 52 static inline struct dev_cgroup *cgroup_to_devcgroup(struct cgroup *cgroup) 53 { 54 return css_to_devcgroup(cgroup_subsys_state(cgroup, devices_subsys_id)); 55 } 56 57 static inline struct dev_cgroup *task_devcgroup(struct task_struct *task) 58 { 59 return css_to_devcgroup(task_subsys_state(task, devices_subsys_id)); 60 } 61 62 struct cgroup_subsys devices_subsys; 63 64 static int devcgroup_can_attach(struct cgroup_subsys *ss, 65 struct cgroup *new_cgrp, struct cgroup_taskset *set) 66 { 67 struct task_struct *task = cgroup_taskset_first(set); 68 69 if (current != task && !capable(CAP_SYS_ADMIN)) 70 return -EPERM; 71 return 0; 72 } 73 74 /* 75 * called under devcgroup_mutex 76 */ 77 static int dev_whitelist_copy(struct list_head *dest, struct list_head *orig) 78 { 79 struct dev_whitelist_item *wh, *tmp, *new; 80 81 list_for_each_entry(wh, orig, list) { 82 new = kmemdup(wh, sizeof(*wh), GFP_KERNEL); 83 if (!new) 84 goto free_and_exit; 85 list_add_tail(&new->list, dest); 86 } 87 88 return 0; 89 90 free_and_exit: 91 list_for_each_entry_safe(wh, tmp, dest, list) { 92 list_del(&wh->list); 93 kfree(wh); 94 } 95 return -ENOMEM; 96 } 97 98 /* Stupid prototype - don't bother combining existing entries */ 99 /* 100 * called under devcgroup_mutex 101 */ 102 static int dev_whitelist_add(struct dev_cgroup *dev_cgroup, 103 struct dev_whitelist_item *wh) 104 { 105 struct dev_whitelist_item *whcopy, *walk; 106 107 whcopy = kmemdup(wh, sizeof(*wh), GFP_KERNEL); 108 if (!whcopy) 109 return -ENOMEM; 110 111 list_for_each_entry(walk, &dev_cgroup->whitelist, list) { 112 if (walk->type != wh->type) 113 continue; 114 if (walk->major != wh->major) 115 continue; 116 if (walk->minor != wh->minor) 117 continue; 118 119 walk->access |= wh->access; 120 kfree(whcopy); 121 whcopy = NULL; 122 } 123 124 if (whcopy != NULL) 125 list_add_tail_rcu(&whcopy->list, &dev_cgroup->whitelist); 126 return 0; 127 } 128 129 /* 130 * called under devcgroup_mutex 131 */ 132 static void dev_whitelist_rm(struct dev_cgroup *dev_cgroup, 133 struct dev_whitelist_item *wh) 134 { 135 struct dev_whitelist_item *walk, *tmp; 136 137 list_for_each_entry_safe(walk, tmp, &dev_cgroup->whitelist, list) { 138 if (walk->type == DEV_ALL) 139 goto remove; 140 if (walk->type != wh->type) 141 continue; 142 if (walk->major != ~0 && walk->major != wh->major) 143 continue; 144 if (walk->minor != ~0 && walk->minor != wh->minor) 145 continue; 146 147 remove: 148 walk->access &= ~wh->access; 149 if (!walk->access) { 150 list_del_rcu(&walk->list); 151 kfree_rcu(walk, rcu); 152 } 153 } 154 } 155 156 /* 157 * called from kernel/cgroup.c with cgroup_lock() held. 158 */ 159 static struct cgroup_subsys_state *devcgroup_create(struct cgroup_subsys *ss, 160 struct cgroup *cgroup) 161 { 162 struct dev_cgroup *dev_cgroup, *parent_dev_cgroup; 163 struct cgroup *parent_cgroup; 164 int ret; 165 166 dev_cgroup = kzalloc(sizeof(*dev_cgroup), GFP_KERNEL); 167 if (!dev_cgroup) 168 return ERR_PTR(-ENOMEM); 169 INIT_LIST_HEAD(&dev_cgroup->whitelist); 170 parent_cgroup = cgroup->parent; 171 172 if (parent_cgroup == NULL) { 173 struct dev_whitelist_item *wh; 174 wh = kmalloc(sizeof(*wh), GFP_KERNEL); 175 if (!wh) { 176 kfree(dev_cgroup); 177 return ERR_PTR(-ENOMEM); 178 } 179 wh->minor = wh->major = ~0; 180 wh->type = DEV_ALL; 181 wh->access = ACC_MASK; 182 list_add(&wh->list, &dev_cgroup->whitelist); 183 } else { 184 parent_dev_cgroup = cgroup_to_devcgroup(parent_cgroup); 185 mutex_lock(&devcgroup_mutex); 186 ret = dev_whitelist_copy(&dev_cgroup->whitelist, 187 &parent_dev_cgroup->whitelist); 188 mutex_unlock(&devcgroup_mutex); 189 if (ret) { 190 kfree(dev_cgroup); 191 return ERR_PTR(ret); 192 } 193 } 194 195 return &dev_cgroup->css; 196 } 197 198 static void devcgroup_destroy(struct cgroup_subsys *ss, 199 struct cgroup *cgroup) 200 { 201 struct dev_cgroup *dev_cgroup; 202 struct dev_whitelist_item *wh, *tmp; 203 204 dev_cgroup = cgroup_to_devcgroup(cgroup); 205 list_for_each_entry_safe(wh, tmp, &dev_cgroup->whitelist, list) { 206 list_del(&wh->list); 207 kfree(wh); 208 } 209 kfree(dev_cgroup); 210 } 211 212 #define DEVCG_ALLOW 1 213 #define DEVCG_DENY 2 214 #define DEVCG_LIST 3 215 216 #define MAJMINLEN 13 217 #define ACCLEN 4 218 219 static void set_access(char *acc, short access) 220 { 221 int idx = 0; 222 memset(acc, 0, ACCLEN); 223 if (access & ACC_READ) 224 acc[idx++] = 'r'; 225 if (access & ACC_WRITE) 226 acc[idx++] = 'w'; 227 if (access & ACC_MKNOD) 228 acc[idx++] = 'm'; 229 } 230 231 static char type_to_char(short type) 232 { 233 if (type == DEV_ALL) 234 return 'a'; 235 if (type == DEV_CHAR) 236 return 'c'; 237 if (type == DEV_BLOCK) 238 return 'b'; 239 return 'X'; 240 } 241 242 static void set_majmin(char *str, unsigned m) 243 { 244 if (m == ~0) 245 strcpy(str, "*"); 246 else 247 sprintf(str, "%u", m); 248 } 249 250 static int devcgroup_seq_read(struct cgroup *cgroup, struct cftype *cft, 251 struct seq_file *m) 252 { 253 struct dev_cgroup *devcgroup = cgroup_to_devcgroup(cgroup); 254 struct dev_whitelist_item *wh; 255 char maj[MAJMINLEN], min[MAJMINLEN], acc[ACCLEN]; 256 257 rcu_read_lock(); 258 list_for_each_entry_rcu(wh, &devcgroup->whitelist, list) { 259 set_access(acc, wh->access); 260 set_majmin(maj, wh->major); 261 set_majmin(min, wh->minor); 262 seq_printf(m, "%c %s:%s %s\n", type_to_char(wh->type), 263 maj, min, acc); 264 } 265 rcu_read_unlock(); 266 267 return 0; 268 } 269 270 /* 271 * may_access_whitelist: 272 * does the access granted to dev_cgroup c contain the access 273 * requested in whitelist item refwh. 274 * return 1 if yes, 0 if no. 275 * call with devcgroup_mutex held 276 */ 277 static int may_access_whitelist(struct dev_cgroup *c, 278 struct dev_whitelist_item *refwh) 279 { 280 struct dev_whitelist_item *whitem; 281 282 list_for_each_entry(whitem, &c->whitelist, list) { 283 if (whitem->type & DEV_ALL) 284 return 1; 285 if ((refwh->type & DEV_BLOCK) && !(whitem->type & DEV_BLOCK)) 286 continue; 287 if ((refwh->type & DEV_CHAR) && !(whitem->type & DEV_CHAR)) 288 continue; 289 if (whitem->major != ~0 && whitem->major != refwh->major) 290 continue; 291 if (whitem->minor != ~0 && whitem->minor != refwh->minor) 292 continue; 293 if (refwh->access & (~whitem->access)) 294 continue; 295 return 1; 296 } 297 return 0; 298 } 299 300 /* 301 * parent_has_perm: 302 * when adding a new allow rule to a device whitelist, the rule 303 * must be allowed in the parent device 304 */ 305 static int parent_has_perm(struct dev_cgroup *childcg, 306 struct dev_whitelist_item *wh) 307 { 308 struct cgroup *pcg = childcg->css.cgroup->parent; 309 struct dev_cgroup *parent; 310 311 if (!pcg) 312 return 1; 313 parent = cgroup_to_devcgroup(pcg); 314 return may_access_whitelist(parent, wh); 315 } 316 317 /* 318 * Modify the whitelist using allow/deny rules. 319 * CAP_SYS_ADMIN is needed for this. It's at least separate from CAP_MKNOD 320 * so we can give a container CAP_MKNOD to let it create devices but not 321 * modify the whitelist. 322 * It seems likely we'll want to add a CAP_CONTAINER capability to allow 323 * us to also grant CAP_SYS_ADMIN to containers without giving away the 324 * device whitelist controls, but for now we'll stick with CAP_SYS_ADMIN 325 * 326 * Taking rules away is always allowed (given CAP_SYS_ADMIN). Granting 327 * new access is only allowed if you're in the top-level cgroup, or your 328 * parent cgroup has the access you're asking for. 329 */ 330 static int devcgroup_update_access(struct dev_cgroup *devcgroup, 331 int filetype, const char *buffer) 332 { 333 const char *b; 334 char *endp; 335 int count; 336 struct dev_whitelist_item wh; 337 338 if (!capable(CAP_SYS_ADMIN)) 339 return -EPERM; 340 341 memset(&wh, 0, sizeof(wh)); 342 b = buffer; 343 344 switch (*b) { 345 case 'a': 346 wh.type = DEV_ALL; 347 wh.access = ACC_MASK; 348 wh.major = ~0; 349 wh.minor = ~0; 350 goto handle; 351 case 'b': 352 wh.type = DEV_BLOCK; 353 break; 354 case 'c': 355 wh.type = DEV_CHAR; 356 break; 357 default: 358 return -EINVAL; 359 } 360 b++; 361 if (!isspace(*b)) 362 return -EINVAL; 363 b++; 364 if (*b == '*') { 365 wh.major = ~0; 366 b++; 367 } else if (isdigit(*b)) { 368 wh.major = simple_strtoul(b, &endp, 10); 369 b = endp; 370 } else { 371 return -EINVAL; 372 } 373 if (*b != ':') 374 return -EINVAL; 375 b++; 376 377 /* read minor */ 378 if (*b == '*') { 379 wh.minor = ~0; 380 b++; 381 } else if (isdigit(*b)) { 382 wh.minor = simple_strtoul(b, &endp, 10); 383 b = endp; 384 } else { 385 return -EINVAL; 386 } 387 if (!isspace(*b)) 388 return -EINVAL; 389 for (b++, count = 0; count < 3; count++, b++) { 390 switch (*b) { 391 case 'r': 392 wh.access |= ACC_READ; 393 break; 394 case 'w': 395 wh.access |= ACC_WRITE; 396 break; 397 case 'm': 398 wh.access |= ACC_MKNOD; 399 break; 400 case '\n': 401 case '\0': 402 count = 3; 403 break; 404 default: 405 return -EINVAL; 406 } 407 } 408 409 handle: 410 switch (filetype) { 411 case DEVCG_ALLOW: 412 if (!parent_has_perm(devcgroup, &wh)) 413 return -EPERM; 414 return dev_whitelist_add(devcgroup, &wh); 415 case DEVCG_DENY: 416 dev_whitelist_rm(devcgroup, &wh); 417 break; 418 default: 419 return -EINVAL; 420 } 421 return 0; 422 } 423 424 static int devcgroup_access_write(struct cgroup *cgrp, struct cftype *cft, 425 const char *buffer) 426 { 427 int retval; 428 429 mutex_lock(&devcgroup_mutex); 430 retval = devcgroup_update_access(cgroup_to_devcgroup(cgrp), 431 cft->private, buffer); 432 mutex_unlock(&devcgroup_mutex); 433 return retval; 434 } 435 436 static struct cftype dev_cgroup_files[] = { 437 { 438 .name = "allow", 439 .write_string = devcgroup_access_write, 440 .private = DEVCG_ALLOW, 441 }, 442 { 443 .name = "deny", 444 .write_string = devcgroup_access_write, 445 .private = DEVCG_DENY, 446 }, 447 { 448 .name = "list", 449 .read_seq_string = devcgroup_seq_read, 450 .private = DEVCG_LIST, 451 }, 452 }; 453 454 static int devcgroup_populate(struct cgroup_subsys *ss, 455 struct cgroup *cgroup) 456 { 457 return cgroup_add_files(cgroup, ss, dev_cgroup_files, 458 ARRAY_SIZE(dev_cgroup_files)); 459 } 460 461 struct cgroup_subsys devices_subsys = { 462 .name = "devices", 463 .can_attach = devcgroup_can_attach, 464 .create = devcgroup_create, 465 .destroy = devcgroup_destroy, 466 .populate = devcgroup_populate, 467 .subsys_id = devices_subsys_id, 468 }; 469 470 int __devcgroup_inode_permission(struct inode *inode, int mask) 471 { 472 struct dev_cgroup *dev_cgroup; 473 struct dev_whitelist_item *wh; 474 475 rcu_read_lock(); 476 477 dev_cgroup = task_devcgroup(current); 478 479 list_for_each_entry_rcu(wh, &dev_cgroup->whitelist, list) { 480 if (wh->type & DEV_ALL) 481 goto found; 482 if ((wh->type & DEV_BLOCK) && !S_ISBLK(inode->i_mode)) 483 continue; 484 if ((wh->type & DEV_CHAR) && !S_ISCHR(inode->i_mode)) 485 continue; 486 if (wh->major != ~0 && wh->major != imajor(inode)) 487 continue; 488 if (wh->minor != ~0 && wh->minor != iminor(inode)) 489 continue; 490 491 if ((mask & MAY_WRITE) && !(wh->access & ACC_WRITE)) 492 continue; 493 if ((mask & MAY_READ) && !(wh->access & ACC_READ)) 494 continue; 495 found: 496 rcu_read_unlock(); 497 return 0; 498 } 499 500 rcu_read_unlock(); 501 502 return -EPERM; 503 } 504 505 int devcgroup_inode_mknod(int mode, dev_t dev) 506 { 507 struct dev_cgroup *dev_cgroup; 508 struct dev_whitelist_item *wh; 509 510 if (!S_ISBLK(mode) && !S_ISCHR(mode)) 511 return 0; 512 513 rcu_read_lock(); 514 515 dev_cgroup = task_devcgroup(current); 516 517 list_for_each_entry_rcu(wh, &dev_cgroup->whitelist, list) { 518 if (wh->type & DEV_ALL) 519 goto found; 520 if ((wh->type & DEV_BLOCK) && !S_ISBLK(mode)) 521 continue; 522 if ((wh->type & DEV_CHAR) && !S_ISCHR(mode)) 523 continue; 524 if (wh->major != ~0 && wh->major != MAJOR(dev)) 525 continue; 526 if (wh->minor != ~0 && wh->minor != MINOR(dev)) 527 continue; 528 529 if (!(wh->access & ACC_MKNOD)) 530 continue; 531 found: 532 rcu_read_unlock(); 533 return 0; 534 } 535 536 rcu_read_unlock(); 537 538 return -EPERM; 539 } 540