1 /* Copyright (c) 2016 Facebook 2 * 3 * This program is free software; you can redistribute it and/or 4 * modify it under the terms of version 2 of the GNU General Public 5 * License as published by the Free Software Foundation. 6 */ 7 #include <stdio.h> 8 #include <unistd.h> 9 #include <stdlib.h> 10 #include <stdbool.h> 11 #include <string.h> 12 #include <fcntl.h> 13 #include <poll.h> 14 #include <sys/ioctl.h> 15 #include <linux/perf_event.h> 16 #include <linux/bpf.h> 17 #include <signal.h> 18 #include <assert.h> 19 #include <errno.h> 20 #include <sys/resource.h> 21 #include "libbpf.h" 22 #include "bpf_load.h" 23 #include "perf-sys.h" 24 25 #define SAMPLE_FREQ 50 26 27 static bool sys_read_seen, sys_write_seen; 28 29 static void print_ksym(__u64 addr) 30 { 31 struct ksym *sym; 32 33 if (!addr) 34 return; 35 sym = ksym_search(addr); 36 printf("%s;", sym->name); 37 if (!strcmp(sym->name, "sys_read")) 38 sys_read_seen = true; 39 else if (!strcmp(sym->name, "sys_write")) 40 sys_write_seen = true; 41 } 42 43 static void print_addr(__u64 addr) 44 { 45 if (!addr) 46 return; 47 printf("%llx;", addr); 48 } 49 50 #define TASK_COMM_LEN 16 51 52 struct key_t { 53 char comm[TASK_COMM_LEN]; 54 __u32 kernstack; 55 __u32 userstack; 56 }; 57 58 static void print_stack(struct key_t *key, __u64 count) 59 { 60 __u64 ip[PERF_MAX_STACK_DEPTH] = {}; 61 static bool warned; 62 int i; 63 64 printf("%3lld %s;", count, key->comm); 65 if (bpf_map_lookup_elem(map_fd[1], &key->kernstack, ip) != 0) { 66 printf("---;"); 67 } else { 68 for (i = PERF_MAX_STACK_DEPTH - 1; i >= 0; i--) 69 print_ksym(ip[i]); 70 } 71 printf("-;"); 72 if (bpf_map_lookup_elem(map_fd[1], &key->userstack, ip) != 0) { 73 printf("---;"); 74 } else { 75 for (i = PERF_MAX_STACK_DEPTH - 1; i >= 0; i--) 76 print_addr(ip[i]); 77 } 78 if (count < 6) 79 printf("\r"); 80 else 81 printf("\n"); 82 83 if (key->kernstack == -EEXIST && !warned) { 84 printf("stackmap collisions seen. Consider increasing size\n"); 85 warned = true; 86 } else if ((int)key->kernstack < 0 && (int)key->userstack < 0) { 87 printf("err stackid %d %d\n", key->kernstack, key->userstack); 88 } 89 } 90 91 static void int_exit(int sig) 92 { 93 kill(0, SIGKILL); 94 exit(0); 95 } 96 97 static void print_stacks(void) 98 { 99 struct key_t key = {}, next_key; 100 __u64 value; 101 __u32 stackid = 0, next_id; 102 int fd = map_fd[0], stack_map = map_fd[1]; 103 104 sys_read_seen = sys_write_seen = false; 105 while (bpf_map_get_next_key(fd, &key, &next_key) == 0) { 106 bpf_map_lookup_elem(fd, &next_key, &value); 107 print_stack(&next_key, value); 108 bpf_map_delete_elem(fd, &next_key); 109 key = next_key; 110 } 111 printf("\n"); 112 if (!sys_read_seen || !sys_write_seen) { 113 printf("BUG kernel stack doesn't contain sys_read() and sys_write()\n"); 114 int_exit(0); 115 } 116 117 /* clear stack map */ 118 while (bpf_map_get_next_key(stack_map, &stackid, &next_id) == 0) { 119 bpf_map_delete_elem(stack_map, &next_id); 120 stackid = next_id; 121 } 122 } 123 124 static void test_perf_event_all_cpu(struct perf_event_attr *attr) 125 { 126 int nr_cpus = sysconf(_SC_NPROCESSORS_CONF); 127 int *pmu_fd = malloc(nr_cpus * sizeof(int)); 128 int i, error = 0; 129 130 /* system wide perf event, no need to inherit */ 131 attr->inherit = 0; 132 133 /* open perf_event on all cpus */ 134 for (i = 0; i < nr_cpus; i++) { 135 pmu_fd[i] = sys_perf_event_open(attr, -1, i, -1, 0); 136 if (pmu_fd[i] < 0) { 137 printf("sys_perf_event_open failed\n"); 138 error = 1; 139 goto all_cpu_err; 140 } 141 assert(ioctl(pmu_fd[i], PERF_EVENT_IOC_SET_BPF, prog_fd[0]) == 0); 142 assert(ioctl(pmu_fd[i], PERF_EVENT_IOC_ENABLE) == 0); 143 } 144 system("dd if=/dev/zero of=/dev/null count=5000k status=none"); 145 print_stacks(); 146 all_cpu_err: 147 for (i--; i >= 0; i--) { 148 ioctl(pmu_fd[i], PERF_EVENT_IOC_DISABLE); 149 close(pmu_fd[i]); 150 } 151 free(pmu_fd); 152 if (error) 153 int_exit(0); 154 } 155 156 static void test_perf_event_task(struct perf_event_attr *attr) 157 { 158 int pmu_fd; 159 160 /* per task perf event, enable inherit so the "dd ..." command can be traced properly. 161 * Enabling inherit will cause bpf_perf_prog_read_time helper failure. 162 */ 163 attr->inherit = 1; 164 165 /* open task bound event */ 166 pmu_fd = sys_perf_event_open(attr, 0, -1, -1, 0); 167 if (pmu_fd < 0) { 168 printf("sys_perf_event_open failed\n"); 169 int_exit(0); 170 } 171 assert(ioctl(pmu_fd, PERF_EVENT_IOC_SET_BPF, prog_fd[0]) == 0); 172 assert(ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE) == 0); 173 system("dd if=/dev/zero of=/dev/null count=5000k status=none"); 174 print_stacks(); 175 ioctl(pmu_fd, PERF_EVENT_IOC_DISABLE); 176 close(pmu_fd); 177 } 178 179 static void test_bpf_perf_event(void) 180 { 181 struct perf_event_attr attr_type_hw = { 182 .sample_freq = SAMPLE_FREQ, 183 .freq = 1, 184 .type = PERF_TYPE_HARDWARE, 185 .config = PERF_COUNT_HW_CPU_CYCLES, 186 }; 187 struct perf_event_attr attr_type_sw = { 188 .sample_freq = SAMPLE_FREQ, 189 .freq = 1, 190 .type = PERF_TYPE_SOFTWARE, 191 .config = PERF_COUNT_SW_CPU_CLOCK, 192 }; 193 struct perf_event_attr attr_hw_cache_l1d = { 194 .sample_freq = SAMPLE_FREQ, 195 .freq = 1, 196 .type = PERF_TYPE_HW_CACHE, 197 .config = 198 PERF_COUNT_HW_CACHE_L1D | 199 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 200 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16), 201 }; 202 struct perf_event_attr attr_hw_cache_branch_miss = { 203 .sample_freq = SAMPLE_FREQ, 204 .freq = 1, 205 .type = PERF_TYPE_HW_CACHE, 206 .config = 207 PERF_COUNT_HW_CACHE_BPU | 208 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 209 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16), 210 }; 211 struct perf_event_attr attr_type_raw = { 212 .sample_freq = SAMPLE_FREQ, 213 .freq = 1, 214 .type = PERF_TYPE_RAW, 215 /* Intel Instruction Retired */ 216 .config = 0xc0, 217 }; 218 struct perf_event_attr attr_type_raw_lock_load = { 219 .sample_freq = SAMPLE_FREQ, 220 .freq = 1, 221 .type = PERF_TYPE_RAW, 222 /* Intel MEM_UOPS_RETIRED.LOCK_LOADS */ 223 .config = 0x21d0, 224 /* Request to record lock address from PEBS */ 225 .sample_type = PERF_SAMPLE_ADDR, 226 /* Record address value requires precise event */ 227 .precise_ip = 2, 228 }; 229 230 printf("Test HW_CPU_CYCLES\n"); 231 test_perf_event_all_cpu(&attr_type_hw); 232 test_perf_event_task(&attr_type_hw); 233 234 printf("Test SW_CPU_CLOCK\n"); 235 test_perf_event_all_cpu(&attr_type_sw); 236 test_perf_event_task(&attr_type_sw); 237 238 printf("Test HW_CACHE_L1D\n"); 239 test_perf_event_all_cpu(&attr_hw_cache_l1d); 240 test_perf_event_task(&attr_hw_cache_l1d); 241 242 printf("Test HW_CACHE_BPU\n"); 243 test_perf_event_all_cpu(&attr_hw_cache_branch_miss); 244 test_perf_event_task(&attr_hw_cache_branch_miss); 245 246 printf("Test Instruction Retired\n"); 247 test_perf_event_all_cpu(&attr_type_raw); 248 test_perf_event_task(&attr_type_raw); 249 250 printf("Test Lock Load\n"); 251 test_perf_event_all_cpu(&attr_type_raw_lock_load); 252 test_perf_event_task(&attr_type_raw_lock_load); 253 254 printf("*** PASS ***\n"); 255 } 256 257 258 int main(int argc, char **argv) 259 { 260 struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; 261 char filename[256]; 262 263 snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); 264 setrlimit(RLIMIT_MEMLOCK, &r); 265 266 signal(SIGINT, int_exit); 267 signal(SIGTERM, int_exit); 268 269 if (load_kallsyms()) { 270 printf("failed to process /proc/kallsyms\n"); 271 return 1; 272 } 273 274 if (load_bpf_file(filename)) { 275 printf("%s", bpf_log_buf); 276 return 2; 277 } 278 279 if (fork() == 0) { 280 read_trace_pipe(); 281 return 0; 282 } 283 test_bpf_perf_event(); 284 int_exit(0); 285 return 0; 286 } 287