1 /* Copyright (c) 2016 Facebook 2 * 3 * This program is free software; you can redistribute it and/or 4 * modify it under the terms of version 2 of the GNU General Public 5 * License as published by the Free Software Foundation. 6 */ 7 #include <stdio.h> 8 #include <unistd.h> 9 #include <stdlib.h> 10 #include <stdbool.h> 11 #include <string.h> 12 #include <fcntl.h> 13 #include <poll.h> 14 #include <sys/ioctl.h> 15 #include <linux/perf_event.h> 16 #include <linux/bpf.h> 17 #include <signal.h> 18 #include <assert.h> 19 #include <errno.h> 20 #include <sys/resource.h> 21 #include "libbpf.h" 22 #include "bpf_load.h" 23 #include "perf-sys.h" 24 #include "trace_helpers.h" 25 26 #define SAMPLE_FREQ 50 27 28 static bool sys_read_seen, sys_write_seen; 29 30 static void print_ksym(__u64 addr) 31 { 32 struct ksym *sym; 33 34 if (!addr) 35 return; 36 sym = ksym_search(addr); 37 printf("%s;", sym->name); 38 if (!strcmp(sym->name, "sys_read")) 39 sys_read_seen = true; 40 else if (!strcmp(sym->name, "sys_write")) 41 sys_write_seen = true; 42 } 43 44 static void print_addr(__u64 addr) 45 { 46 if (!addr) 47 return; 48 printf("%llx;", addr); 49 } 50 51 #define TASK_COMM_LEN 16 52 53 struct key_t { 54 char comm[TASK_COMM_LEN]; 55 __u32 kernstack; 56 __u32 userstack; 57 }; 58 59 static void print_stack(struct key_t *key, __u64 count) 60 { 61 __u64 ip[PERF_MAX_STACK_DEPTH] = {}; 62 static bool warned; 63 int i; 64 65 printf("%3lld %s;", count, key->comm); 66 if (bpf_map_lookup_elem(map_fd[1], &key->kernstack, ip) != 0) { 67 printf("---;"); 68 } else { 69 for (i = PERF_MAX_STACK_DEPTH - 1; i >= 0; i--) 70 print_ksym(ip[i]); 71 } 72 printf("-;"); 73 if (bpf_map_lookup_elem(map_fd[1], &key->userstack, ip) != 0) { 74 printf("---;"); 75 } else { 76 for (i = PERF_MAX_STACK_DEPTH - 1; i >= 0; i--) 77 print_addr(ip[i]); 78 } 79 if (count < 6) 80 printf("\r"); 81 else 82 printf("\n"); 83 84 if (key->kernstack == -EEXIST && !warned) { 85 printf("stackmap collisions seen. Consider increasing size\n"); 86 warned = true; 87 } else if ((int)key->kernstack < 0 && (int)key->userstack < 0) { 88 printf("err stackid %d %d\n", key->kernstack, key->userstack); 89 } 90 } 91 92 static void int_exit(int sig) 93 { 94 kill(0, SIGKILL); 95 exit(0); 96 } 97 98 static void print_stacks(void) 99 { 100 struct key_t key = {}, next_key; 101 __u64 value; 102 __u32 stackid = 0, next_id; 103 int fd = map_fd[0], stack_map = map_fd[1]; 104 105 sys_read_seen = sys_write_seen = false; 106 while (bpf_map_get_next_key(fd, &key, &next_key) == 0) { 107 bpf_map_lookup_elem(fd, &next_key, &value); 108 print_stack(&next_key, value); 109 bpf_map_delete_elem(fd, &next_key); 110 key = next_key; 111 } 112 printf("\n"); 113 if (!sys_read_seen || !sys_write_seen) { 114 printf("BUG kernel stack doesn't contain sys_read() and sys_write()\n"); 115 int_exit(0); 116 } 117 118 /* clear stack map */ 119 while (bpf_map_get_next_key(stack_map, &stackid, &next_id) == 0) { 120 bpf_map_delete_elem(stack_map, &next_id); 121 stackid = next_id; 122 } 123 } 124 125 static void test_perf_event_all_cpu(struct perf_event_attr *attr) 126 { 127 int nr_cpus = sysconf(_SC_NPROCESSORS_CONF); 128 int *pmu_fd = malloc(nr_cpus * sizeof(int)); 129 int i, error = 0; 130 131 /* system wide perf event, no need to inherit */ 132 attr->inherit = 0; 133 134 /* open perf_event on all cpus */ 135 for (i = 0; i < nr_cpus; i++) { 136 pmu_fd[i] = sys_perf_event_open(attr, -1, i, -1, 0); 137 if (pmu_fd[i] < 0) { 138 printf("sys_perf_event_open failed\n"); 139 error = 1; 140 goto all_cpu_err; 141 } 142 assert(ioctl(pmu_fd[i], PERF_EVENT_IOC_SET_BPF, prog_fd[0]) == 0); 143 assert(ioctl(pmu_fd[i], PERF_EVENT_IOC_ENABLE) == 0); 144 } 145 system("dd if=/dev/zero of=/dev/null count=5000k status=none"); 146 print_stacks(); 147 all_cpu_err: 148 for (i--; i >= 0; i--) { 149 ioctl(pmu_fd[i], PERF_EVENT_IOC_DISABLE); 150 close(pmu_fd[i]); 151 } 152 free(pmu_fd); 153 if (error) 154 int_exit(0); 155 } 156 157 static void test_perf_event_task(struct perf_event_attr *attr) 158 { 159 int pmu_fd; 160 161 /* per task perf event, enable inherit so the "dd ..." command can be traced properly. 162 * Enabling inherit will cause bpf_perf_prog_read_time helper failure. 163 */ 164 attr->inherit = 1; 165 166 /* open task bound event */ 167 pmu_fd = sys_perf_event_open(attr, 0, -1, -1, 0); 168 if (pmu_fd < 0) { 169 printf("sys_perf_event_open failed\n"); 170 int_exit(0); 171 } 172 assert(ioctl(pmu_fd, PERF_EVENT_IOC_SET_BPF, prog_fd[0]) == 0); 173 assert(ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE) == 0); 174 system("dd if=/dev/zero of=/dev/null count=5000k status=none"); 175 print_stacks(); 176 ioctl(pmu_fd, PERF_EVENT_IOC_DISABLE); 177 close(pmu_fd); 178 } 179 180 static void test_bpf_perf_event(void) 181 { 182 struct perf_event_attr attr_type_hw = { 183 .sample_freq = SAMPLE_FREQ, 184 .freq = 1, 185 .type = PERF_TYPE_HARDWARE, 186 .config = PERF_COUNT_HW_CPU_CYCLES, 187 }; 188 struct perf_event_attr attr_type_sw = { 189 .sample_freq = SAMPLE_FREQ, 190 .freq = 1, 191 .type = PERF_TYPE_SOFTWARE, 192 .config = PERF_COUNT_SW_CPU_CLOCK, 193 }; 194 struct perf_event_attr attr_hw_cache_l1d = { 195 .sample_freq = SAMPLE_FREQ, 196 .freq = 1, 197 .type = PERF_TYPE_HW_CACHE, 198 .config = 199 PERF_COUNT_HW_CACHE_L1D | 200 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 201 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16), 202 }; 203 struct perf_event_attr attr_hw_cache_branch_miss = { 204 .sample_freq = SAMPLE_FREQ, 205 .freq = 1, 206 .type = PERF_TYPE_HW_CACHE, 207 .config = 208 PERF_COUNT_HW_CACHE_BPU | 209 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 210 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16), 211 }; 212 struct perf_event_attr attr_type_raw = { 213 .sample_freq = SAMPLE_FREQ, 214 .freq = 1, 215 .type = PERF_TYPE_RAW, 216 /* Intel Instruction Retired */ 217 .config = 0xc0, 218 }; 219 struct perf_event_attr attr_type_raw_lock_load = { 220 .sample_freq = SAMPLE_FREQ, 221 .freq = 1, 222 .type = PERF_TYPE_RAW, 223 /* Intel MEM_UOPS_RETIRED.LOCK_LOADS */ 224 .config = 0x21d0, 225 /* Request to record lock address from PEBS */ 226 .sample_type = PERF_SAMPLE_ADDR, 227 /* Record address value requires precise event */ 228 .precise_ip = 2, 229 }; 230 231 printf("Test HW_CPU_CYCLES\n"); 232 test_perf_event_all_cpu(&attr_type_hw); 233 test_perf_event_task(&attr_type_hw); 234 235 printf("Test SW_CPU_CLOCK\n"); 236 test_perf_event_all_cpu(&attr_type_sw); 237 test_perf_event_task(&attr_type_sw); 238 239 printf("Test HW_CACHE_L1D\n"); 240 test_perf_event_all_cpu(&attr_hw_cache_l1d); 241 test_perf_event_task(&attr_hw_cache_l1d); 242 243 printf("Test HW_CACHE_BPU\n"); 244 test_perf_event_all_cpu(&attr_hw_cache_branch_miss); 245 test_perf_event_task(&attr_hw_cache_branch_miss); 246 247 printf("Test Instruction Retired\n"); 248 test_perf_event_all_cpu(&attr_type_raw); 249 test_perf_event_task(&attr_type_raw); 250 251 printf("Test Lock Load\n"); 252 test_perf_event_all_cpu(&attr_type_raw_lock_load); 253 test_perf_event_task(&attr_type_raw_lock_load); 254 255 printf("*** PASS ***\n"); 256 } 257 258 259 int main(int argc, char **argv) 260 { 261 struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; 262 char filename[256]; 263 264 snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); 265 setrlimit(RLIMIT_MEMLOCK, &r); 266 267 signal(SIGINT, int_exit); 268 signal(SIGTERM, int_exit); 269 270 if (load_kallsyms()) { 271 printf("failed to process /proc/kallsyms\n"); 272 return 1; 273 } 274 275 if (load_bpf_file(filename)) { 276 printf("%s", bpf_log_buf); 277 return 2; 278 } 279 280 if (fork() == 0) { 281 read_trace_pipe(); 282 return 0; 283 } 284 test_bpf_perf_event(); 285 int_exit(0); 286 return 0; 287 } 288