1 /* Copyright (c) 2016 Facebook 2 * 3 * This program is free software; you can redistribute it and/or 4 * modify it under the terms of version 2 of the GNU General Public 5 * License as published by the Free Software Foundation. 6 */ 7 #include <stdio.h> 8 #include <unistd.h> 9 #include <stdlib.h> 10 #include <stdbool.h> 11 #include <string.h> 12 #include <fcntl.h> 13 #include <poll.h> 14 #include <sys/ioctl.h> 15 #include <linux/perf_event.h> 16 #include <linux/bpf.h> 17 #include <signal.h> 18 #include <assert.h> 19 #include <errno.h> 20 #include <sys/resource.h> 21 #include "libbpf.h" 22 #include "bpf_load.h" 23 #include "perf-sys.h" 24 #include "trace_helpers.h" 25 26 #define SAMPLE_FREQ 50 27 28 static bool sys_read_seen, sys_write_seen; 29 30 static void print_ksym(__u64 addr) 31 { 32 struct ksym *sym; 33 34 if (!addr) 35 return; 36 sym = ksym_search(addr); 37 printf("%s;", sym->name); 38 if (!strcmp(sym->name, "sys_read")) 39 sys_read_seen = true; 40 else if (!strcmp(sym->name, "sys_write")) 41 sys_write_seen = true; 42 } 43 44 static void print_addr(__u64 addr) 45 { 46 if (!addr) 47 return; 48 printf("%llx;", addr); 49 } 50 51 #define TASK_COMM_LEN 16 52 53 struct key_t { 54 char comm[TASK_COMM_LEN]; 55 __u32 kernstack; 56 __u32 userstack; 57 }; 58 59 static void print_stack(struct key_t *key, __u64 count) 60 { 61 __u64 ip[PERF_MAX_STACK_DEPTH] = {}; 62 static bool warned; 63 int i; 64 65 printf("%3lld %s;", count, key->comm); 66 if (bpf_map_lookup_elem(map_fd[1], &key->kernstack, ip) != 0) { 67 printf("---;"); 68 } else { 69 for (i = PERF_MAX_STACK_DEPTH - 1; i >= 0; i--) 70 print_ksym(ip[i]); 71 } 72 printf("-;"); 73 if (bpf_map_lookup_elem(map_fd[1], &key->userstack, ip) != 0) { 74 printf("---;"); 75 } else { 76 for (i = PERF_MAX_STACK_DEPTH - 1; i >= 0; i--) 77 print_addr(ip[i]); 78 } 79 if (count < 6) 80 printf("\r"); 81 else 82 printf("\n"); 83 84 if (key->kernstack == -EEXIST && !warned) { 85 printf("stackmap collisions seen. Consider increasing size\n"); 86 warned = true; 87 } else if ((int)key->kernstack < 0 && (int)key->userstack < 0) { 88 printf("err stackid %d %d\n", key->kernstack, key->userstack); 89 } 90 } 91 92 static void int_exit(int sig) 93 { 94 kill(0, SIGKILL); 95 exit(0); 96 } 97 98 static void print_stacks(void) 99 { 100 struct key_t key = {}, next_key; 101 __u64 value; 102 __u32 stackid = 0, next_id; 103 int fd = map_fd[0], stack_map = map_fd[1]; 104 105 sys_read_seen = sys_write_seen = false; 106 while (bpf_map_get_next_key(fd, &key, &next_key) == 0) { 107 bpf_map_lookup_elem(fd, &next_key, &value); 108 print_stack(&next_key, value); 109 bpf_map_delete_elem(fd, &next_key); 110 key = next_key; 111 } 112 printf("\n"); 113 if (!sys_read_seen || !sys_write_seen) { 114 printf("BUG kernel stack doesn't contain sys_read() and sys_write()\n"); 115 int_exit(0); 116 } 117 118 /* clear stack map */ 119 while (bpf_map_get_next_key(stack_map, &stackid, &next_id) == 0) { 120 bpf_map_delete_elem(stack_map, &next_id); 121 stackid = next_id; 122 } 123 } 124 125 static inline int generate_load(void) 126 { 127 if (system("dd if=/dev/zero of=/dev/null count=5000k status=none") < 0) { 128 printf("failed to generate some load with dd: %s\n", strerror(errno)); 129 return -1; 130 } 131 132 return 0; 133 } 134 135 static void test_perf_event_all_cpu(struct perf_event_attr *attr) 136 { 137 int nr_cpus = sysconf(_SC_NPROCESSORS_CONF); 138 int *pmu_fd = malloc(nr_cpus * sizeof(int)); 139 int i, error = 0; 140 141 /* system wide perf event, no need to inherit */ 142 attr->inherit = 0; 143 144 /* open perf_event on all cpus */ 145 for (i = 0; i < nr_cpus; i++) { 146 pmu_fd[i] = sys_perf_event_open(attr, -1, i, -1, 0); 147 if (pmu_fd[i] < 0) { 148 printf("sys_perf_event_open failed\n"); 149 error = 1; 150 goto all_cpu_err; 151 } 152 assert(ioctl(pmu_fd[i], PERF_EVENT_IOC_SET_BPF, prog_fd[0]) == 0); 153 assert(ioctl(pmu_fd[i], PERF_EVENT_IOC_ENABLE) == 0); 154 } 155 156 if (generate_load() < 0) { 157 error = 1; 158 goto all_cpu_err; 159 } 160 print_stacks(); 161 all_cpu_err: 162 for (i--; i >= 0; i--) { 163 ioctl(pmu_fd[i], PERF_EVENT_IOC_DISABLE); 164 close(pmu_fd[i]); 165 } 166 free(pmu_fd); 167 if (error) 168 int_exit(0); 169 } 170 171 static void test_perf_event_task(struct perf_event_attr *attr) 172 { 173 int pmu_fd, error = 0; 174 175 /* per task perf event, enable inherit so the "dd ..." command can be traced properly. 176 * Enabling inherit will cause bpf_perf_prog_read_time helper failure. 177 */ 178 attr->inherit = 1; 179 180 /* open task bound event */ 181 pmu_fd = sys_perf_event_open(attr, 0, -1, -1, 0); 182 if (pmu_fd < 0) { 183 printf("sys_perf_event_open failed\n"); 184 int_exit(0); 185 } 186 assert(ioctl(pmu_fd, PERF_EVENT_IOC_SET_BPF, prog_fd[0]) == 0); 187 assert(ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE) == 0); 188 189 if (generate_load() < 0) { 190 error = 1; 191 goto err; 192 } 193 print_stacks(); 194 err: 195 ioctl(pmu_fd, PERF_EVENT_IOC_DISABLE); 196 close(pmu_fd); 197 if (error) 198 int_exit(0); 199 } 200 201 static void test_bpf_perf_event(void) 202 { 203 struct perf_event_attr attr_type_hw = { 204 .sample_freq = SAMPLE_FREQ, 205 .freq = 1, 206 .type = PERF_TYPE_HARDWARE, 207 .config = PERF_COUNT_HW_CPU_CYCLES, 208 }; 209 struct perf_event_attr attr_type_sw = { 210 .sample_freq = SAMPLE_FREQ, 211 .freq = 1, 212 .type = PERF_TYPE_SOFTWARE, 213 .config = PERF_COUNT_SW_CPU_CLOCK, 214 }; 215 struct perf_event_attr attr_hw_cache_l1d = { 216 .sample_freq = SAMPLE_FREQ, 217 .freq = 1, 218 .type = PERF_TYPE_HW_CACHE, 219 .config = 220 PERF_COUNT_HW_CACHE_L1D | 221 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 222 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16), 223 }; 224 struct perf_event_attr attr_hw_cache_branch_miss = { 225 .sample_freq = SAMPLE_FREQ, 226 .freq = 1, 227 .type = PERF_TYPE_HW_CACHE, 228 .config = 229 PERF_COUNT_HW_CACHE_BPU | 230 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 231 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16), 232 }; 233 struct perf_event_attr attr_type_raw = { 234 .sample_freq = SAMPLE_FREQ, 235 .freq = 1, 236 .type = PERF_TYPE_RAW, 237 /* Intel Instruction Retired */ 238 .config = 0xc0, 239 }; 240 struct perf_event_attr attr_type_raw_lock_load = { 241 .sample_freq = SAMPLE_FREQ, 242 .freq = 1, 243 .type = PERF_TYPE_RAW, 244 /* Intel MEM_UOPS_RETIRED.LOCK_LOADS */ 245 .config = 0x21d0, 246 /* Request to record lock address from PEBS */ 247 .sample_type = PERF_SAMPLE_ADDR, 248 /* Record address value requires precise event */ 249 .precise_ip = 2, 250 }; 251 252 printf("Test HW_CPU_CYCLES\n"); 253 test_perf_event_all_cpu(&attr_type_hw); 254 test_perf_event_task(&attr_type_hw); 255 256 printf("Test SW_CPU_CLOCK\n"); 257 test_perf_event_all_cpu(&attr_type_sw); 258 test_perf_event_task(&attr_type_sw); 259 260 printf("Test HW_CACHE_L1D\n"); 261 test_perf_event_all_cpu(&attr_hw_cache_l1d); 262 test_perf_event_task(&attr_hw_cache_l1d); 263 264 printf("Test HW_CACHE_BPU\n"); 265 test_perf_event_all_cpu(&attr_hw_cache_branch_miss); 266 test_perf_event_task(&attr_hw_cache_branch_miss); 267 268 printf("Test Instruction Retired\n"); 269 test_perf_event_all_cpu(&attr_type_raw); 270 test_perf_event_task(&attr_type_raw); 271 272 printf("Test Lock Load\n"); 273 test_perf_event_all_cpu(&attr_type_raw_lock_load); 274 test_perf_event_task(&attr_type_raw_lock_load); 275 276 printf("*** PASS ***\n"); 277 } 278 279 280 int main(int argc, char **argv) 281 { 282 struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; 283 char filename[256]; 284 285 snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); 286 setrlimit(RLIMIT_MEMLOCK, &r); 287 288 signal(SIGINT, int_exit); 289 signal(SIGTERM, int_exit); 290 291 if (load_kallsyms()) { 292 printf("failed to process /proc/kallsyms\n"); 293 return 1; 294 } 295 296 if (load_bpf_file(filename)) { 297 printf("%s", bpf_log_buf); 298 return 2; 299 } 300 301 if (fork() == 0) { 302 read_trace_pipe(); 303 return 0; 304 } 305 test_bpf_perf_event(); 306 int_exit(0); 307 return 0; 308 } 309