1 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com 2 * 3 * This program is free software; you can redistribute it and/or 4 * modify it under the terms of version 2 of the GNU General Public 5 * License as published by the Free Software Foundation. 6 * 7 * This program is distributed in the hope that it will be useful, but 8 * WITHOUT ANY WARRANTY; without even the implied warranty of 9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 10 * General Public License for more details. 11 */ 12 #include <linux/bpf.h> 13 #include <linux/err.h> 14 #include <linux/vmalloc.h> 15 #include <linux/slab.h> 16 #include <linux/mm.h> 17 #include <linux/filter.h> 18 #include <linux/perf_event.h> 19 20 static void bpf_array_free_percpu(struct bpf_array *array) 21 { 22 int i; 23 24 for (i = 0; i < array->map.max_entries; i++) 25 free_percpu(array->pptrs[i]); 26 } 27 28 static int bpf_array_alloc_percpu(struct bpf_array *array) 29 { 30 void __percpu *ptr; 31 int i; 32 33 for (i = 0; i < array->map.max_entries; i++) { 34 ptr = __alloc_percpu_gfp(array->elem_size, 8, 35 GFP_USER | __GFP_NOWARN); 36 if (!ptr) { 37 bpf_array_free_percpu(array); 38 return -ENOMEM; 39 } 40 array->pptrs[i] = ptr; 41 } 42 43 return 0; 44 } 45 46 /* Called from syscall */ 47 static struct bpf_map *array_map_alloc(union bpf_attr *attr) 48 { 49 bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY; 50 struct bpf_array *array; 51 u64 array_size; 52 u32 elem_size; 53 54 /* check sanity of attributes */ 55 if (attr->max_entries == 0 || attr->key_size != 4 || 56 attr->value_size == 0 || attr->map_flags) 57 return ERR_PTR(-EINVAL); 58 59 if (attr->value_size >= 1 << (KMALLOC_SHIFT_MAX - 1)) 60 /* if value_size is bigger, the user space won't be able to 61 * access the elements. 62 */ 63 return ERR_PTR(-E2BIG); 64 65 elem_size = round_up(attr->value_size, 8); 66 67 array_size = sizeof(*array); 68 if (percpu) 69 array_size += (u64) attr->max_entries * sizeof(void *); 70 else 71 array_size += (u64) attr->max_entries * elem_size; 72 73 /* make sure there is no u32 overflow later in round_up() */ 74 if (array_size >= U32_MAX - PAGE_SIZE) 75 return ERR_PTR(-ENOMEM); 76 77 78 /* allocate all map elements and zero-initialize them */ 79 array = kzalloc(array_size, GFP_USER | __GFP_NOWARN); 80 if (!array) { 81 array = vzalloc(array_size); 82 if (!array) 83 return ERR_PTR(-ENOMEM); 84 } 85 86 /* copy mandatory map attributes */ 87 array->map.map_type = attr->map_type; 88 array->map.key_size = attr->key_size; 89 array->map.value_size = attr->value_size; 90 array->map.max_entries = attr->max_entries; 91 array->elem_size = elem_size; 92 93 if (!percpu) 94 goto out; 95 96 array_size += (u64) attr->max_entries * elem_size * num_possible_cpus(); 97 98 if (array_size >= U32_MAX - PAGE_SIZE || 99 elem_size > PCPU_MIN_UNIT_SIZE || bpf_array_alloc_percpu(array)) { 100 kvfree(array); 101 return ERR_PTR(-ENOMEM); 102 } 103 out: 104 array->map.pages = round_up(array_size, PAGE_SIZE) >> PAGE_SHIFT; 105 106 return &array->map; 107 } 108 109 /* Called from syscall or from eBPF program */ 110 static void *array_map_lookup_elem(struct bpf_map *map, void *key) 111 { 112 struct bpf_array *array = container_of(map, struct bpf_array, map); 113 u32 index = *(u32 *)key; 114 115 if (unlikely(index >= array->map.max_entries)) 116 return NULL; 117 118 return array->value + array->elem_size * index; 119 } 120 121 /* Called from eBPF program */ 122 static void *percpu_array_map_lookup_elem(struct bpf_map *map, void *key) 123 { 124 struct bpf_array *array = container_of(map, struct bpf_array, map); 125 u32 index = *(u32 *)key; 126 127 if (unlikely(index >= array->map.max_entries)) 128 return NULL; 129 130 return this_cpu_ptr(array->pptrs[index]); 131 } 132 133 int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value) 134 { 135 struct bpf_array *array = container_of(map, struct bpf_array, map); 136 u32 index = *(u32 *)key; 137 void __percpu *pptr; 138 int cpu, off = 0; 139 u32 size; 140 141 if (unlikely(index >= array->map.max_entries)) 142 return -ENOENT; 143 144 /* per_cpu areas are zero-filled and bpf programs can only 145 * access 'value_size' of them, so copying rounded areas 146 * will not leak any kernel data 147 */ 148 size = round_up(map->value_size, 8); 149 rcu_read_lock(); 150 pptr = array->pptrs[index]; 151 for_each_possible_cpu(cpu) { 152 bpf_long_memcpy(value + off, per_cpu_ptr(pptr, cpu), size); 153 off += size; 154 } 155 rcu_read_unlock(); 156 return 0; 157 } 158 159 /* Called from syscall */ 160 static int array_map_get_next_key(struct bpf_map *map, void *key, void *next_key) 161 { 162 struct bpf_array *array = container_of(map, struct bpf_array, map); 163 u32 index = *(u32 *)key; 164 u32 *next = (u32 *)next_key; 165 166 if (index >= array->map.max_entries) { 167 *next = 0; 168 return 0; 169 } 170 171 if (index == array->map.max_entries - 1) 172 return -ENOENT; 173 174 *next = index + 1; 175 return 0; 176 } 177 178 /* Called from syscall or from eBPF program */ 179 static int array_map_update_elem(struct bpf_map *map, void *key, void *value, 180 u64 map_flags) 181 { 182 struct bpf_array *array = container_of(map, struct bpf_array, map); 183 u32 index = *(u32 *)key; 184 185 if (unlikely(map_flags > BPF_EXIST)) 186 /* unknown flags */ 187 return -EINVAL; 188 189 if (unlikely(index >= array->map.max_entries)) 190 /* all elements were pre-allocated, cannot insert a new one */ 191 return -E2BIG; 192 193 if (unlikely(map_flags == BPF_NOEXIST)) 194 /* all elements already exist */ 195 return -EEXIST; 196 197 if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY) 198 memcpy(this_cpu_ptr(array->pptrs[index]), 199 value, map->value_size); 200 else 201 memcpy(array->value + array->elem_size * index, 202 value, map->value_size); 203 return 0; 204 } 205 206 int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value, 207 u64 map_flags) 208 { 209 struct bpf_array *array = container_of(map, struct bpf_array, map); 210 u32 index = *(u32 *)key; 211 void __percpu *pptr; 212 int cpu, off = 0; 213 u32 size; 214 215 if (unlikely(map_flags > BPF_EXIST)) 216 /* unknown flags */ 217 return -EINVAL; 218 219 if (unlikely(index >= array->map.max_entries)) 220 /* all elements were pre-allocated, cannot insert a new one */ 221 return -E2BIG; 222 223 if (unlikely(map_flags == BPF_NOEXIST)) 224 /* all elements already exist */ 225 return -EEXIST; 226 227 /* the user space will provide round_up(value_size, 8) bytes that 228 * will be copied into per-cpu area. bpf programs can only access 229 * value_size of it. During lookup the same extra bytes will be 230 * returned or zeros which were zero-filled by percpu_alloc, 231 * so no kernel data leaks possible 232 */ 233 size = round_up(map->value_size, 8); 234 rcu_read_lock(); 235 pptr = array->pptrs[index]; 236 for_each_possible_cpu(cpu) { 237 bpf_long_memcpy(per_cpu_ptr(pptr, cpu), value + off, size); 238 off += size; 239 } 240 rcu_read_unlock(); 241 return 0; 242 } 243 244 /* Called from syscall or from eBPF program */ 245 static int array_map_delete_elem(struct bpf_map *map, void *key) 246 { 247 return -EINVAL; 248 } 249 250 /* Called when map->refcnt goes to zero, either from workqueue or from syscall */ 251 static void array_map_free(struct bpf_map *map) 252 { 253 struct bpf_array *array = container_of(map, struct bpf_array, map); 254 255 /* at this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0, 256 * so the programs (can be more than one that used this map) were 257 * disconnected from events. Wait for outstanding programs to complete 258 * and free the array 259 */ 260 synchronize_rcu(); 261 262 if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY) 263 bpf_array_free_percpu(array); 264 265 kvfree(array); 266 } 267 268 static const struct bpf_map_ops array_ops = { 269 .map_alloc = array_map_alloc, 270 .map_free = array_map_free, 271 .map_get_next_key = array_map_get_next_key, 272 .map_lookup_elem = array_map_lookup_elem, 273 .map_update_elem = array_map_update_elem, 274 .map_delete_elem = array_map_delete_elem, 275 }; 276 277 static struct bpf_map_type_list array_type __read_mostly = { 278 .ops = &array_ops, 279 .type = BPF_MAP_TYPE_ARRAY, 280 }; 281 282 static const struct bpf_map_ops percpu_array_ops = { 283 .map_alloc = array_map_alloc, 284 .map_free = array_map_free, 285 .map_get_next_key = array_map_get_next_key, 286 .map_lookup_elem = percpu_array_map_lookup_elem, 287 .map_update_elem = array_map_update_elem, 288 .map_delete_elem = array_map_delete_elem, 289 }; 290 291 static struct bpf_map_type_list percpu_array_type __read_mostly = { 292 .ops = &percpu_array_ops, 293 .type = BPF_MAP_TYPE_PERCPU_ARRAY, 294 }; 295 296 static int __init register_array_map(void) 297 { 298 bpf_register_map_type(&array_type); 299 bpf_register_map_type(&percpu_array_type); 300 return 0; 301 } 302 late_initcall(register_array_map); 303 304 static struct bpf_map *fd_array_map_alloc(union bpf_attr *attr) 305 { 306 /* only file descriptors can be stored in this type of map */ 307 if (attr->value_size != sizeof(u32)) 308 return ERR_PTR(-EINVAL); 309 return array_map_alloc(attr); 310 } 311 312 static void fd_array_map_free(struct bpf_map *map) 313 { 314 struct bpf_array *array = container_of(map, struct bpf_array, map); 315 int i; 316 317 synchronize_rcu(); 318 319 /* make sure it's empty */ 320 for (i = 0; i < array->map.max_entries; i++) 321 BUG_ON(array->ptrs[i] != NULL); 322 kvfree(array); 323 } 324 325 static void *fd_array_map_lookup_elem(struct bpf_map *map, void *key) 326 { 327 return NULL; 328 } 329 330 /* only called from syscall */ 331 static int fd_array_map_update_elem(struct bpf_map *map, void *key, 332 void *value, u64 map_flags) 333 { 334 struct bpf_array *array = container_of(map, struct bpf_array, map); 335 void *new_ptr, *old_ptr; 336 u32 index = *(u32 *)key, ufd; 337 338 if (map_flags != BPF_ANY) 339 return -EINVAL; 340 341 if (index >= array->map.max_entries) 342 return -E2BIG; 343 344 ufd = *(u32 *)value; 345 new_ptr = map->ops->map_fd_get_ptr(map, ufd); 346 if (IS_ERR(new_ptr)) 347 return PTR_ERR(new_ptr); 348 349 old_ptr = xchg(array->ptrs + index, new_ptr); 350 if (old_ptr) 351 map->ops->map_fd_put_ptr(old_ptr); 352 353 return 0; 354 } 355 356 static int fd_array_map_delete_elem(struct bpf_map *map, void *key) 357 { 358 struct bpf_array *array = container_of(map, struct bpf_array, map); 359 void *old_ptr; 360 u32 index = *(u32 *)key; 361 362 if (index >= array->map.max_entries) 363 return -E2BIG; 364 365 old_ptr = xchg(array->ptrs + index, NULL); 366 if (old_ptr) { 367 map->ops->map_fd_put_ptr(old_ptr); 368 return 0; 369 } else { 370 return -ENOENT; 371 } 372 } 373 374 static void *prog_fd_array_get_ptr(struct bpf_map *map, int fd) 375 { 376 struct bpf_array *array = container_of(map, struct bpf_array, map); 377 struct bpf_prog *prog = bpf_prog_get(fd); 378 if (IS_ERR(prog)) 379 return prog; 380 381 if (!bpf_prog_array_compatible(array, prog)) { 382 bpf_prog_put(prog); 383 return ERR_PTR(-EINVAL); 384 } 385 return prog; 386 } 387 388 static void prog_fd_array_put_ptr(void *ptr) 389 { 390 struct bpf_prog *prog = ptr; 391 392 bpf_prog_put_rcu(prog); 393 } 394 395 /* decrement refcnt of all bpf_progs that are stored in this map */ 396 void bpf_fd_array_map_clear(struct bpf_map *map) 397 { 398 struct bpf_array *array = container_of(map, struct bpf_array, map); 399 int i; 400 401 for (i = 0; i < array->map.max_entries; i++) 402 fd_array_map_delete_elem(map, &i); 403 } 404 405 static const struct bpf_map_ops prog_array_ops = { 406 .map_alloc = fd_array_map_alloc, 407 .map_free = fd_array_map_free, 408 .map_get_next_key = array_map_get_next_key, 409 .map_lookup_elem = fd_array_map_lookup_elem, 410 .map_update_elem = fd_array_map_update_elem, 411 .map_delete_elem = fd_array_map_delete_elem, 412 .map_fd_get_ptr = prog_fd_array_get_ptr, 413 .map_fd_put_ptr = prog_fd_array_put_ptr, 414 }; 415 416 static struct bpf_map_type_list prog_array_type __read_mostly = { 417 .ops = &prog_array_ops, 418 .type = BPF_MAP_TYPE_PROG_ARRAY, 419 }; 420 421 static int __init register_prog_array_map(void) 422 { 423 bpf_register_map_type(&prog_array_type); 424 return 0; 425 } 426 late_initcall(register_prog_array_map); 427 428 static void perf_event_array_map_free(struct bpf_map *map) 429 { 430 bpf_fd_array_map_clear(map); 431 fd_array_map_free(map); 432 } 433 434 static void *perf_event_fd_array_get_ptr(struct bpf_map *map, int fd) 435 { 436 struct perf_event *event; 437 const struct perf_event_attr *attr; 438 struct file *file; 439 440 file = perf_event_get(fd); 441 if (IS_ERR(file)) 442 return file; 443 444 event = file->private_data; 445 446 attr = perf_event_attrs(event); 447 if (IS_ERR(attr)) 448 goto err; 449 450 if (attr->inherit) 451 goto err; 452 453 if (attr->type == PERF_TYPE_RAW) 454 return file; 455 456 if (attr->type == PERF_TYPE_HARDWARE) 457 return file; 458 459 if (attr->type == PERF_TYPE_SOFTWARE && 460 attr->config == PERF_COUNT_SW_BPF_OUTPUT) 461 return file; 462 err: 463 fput(file); 464 return ERR_PTR(-EINVAL); 465 } 466 467 static void perf_event_fd_array_put_ptr(void *ptr) 468 { 469 fput((struct file *)ptr); 470 } 471 472 static const struct bpf_map_ops perf_event_array_ops = { 473 .map_alloc = fd_array_map_alloc, 474 .map_free = perf_event_array_map_free, 475 .map_get_next_key = array_map_get_next_key, 476 .map_lookup_elem = fd_array_map_lookup_elem, 477 .map_update_elem = fd_array_map_update_elem, 478 .map_delete_elem = fd_array_map_delete_elem, 479 .map_fd_get_ptr = perf_event_fd_array_get_ptr, 480 .map_fd_put_ptr = perf_event_fd_array_put_ptr, 481 }; 482 483 static struct bpf_map_type_list perf_event_array_type __read_mostly = { 484 .ops = &perf_event_array_ops, 485 .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, 486 }; 487 488 static int __init register_perf_event_array_map(void) 489 { 490 bpf_register_map_type(&perf_event_array_type); 491 return 0; 492 } 493 late_initcall(register_perf_event_array_map); 494