1 // SPDX-License-Identifier: GPL-2.0 2 /* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ 3 4 #include <argp.h> 5 #include <linux/btf.h> 6 7 #include "local_storage_bench.skel.h" 8 #include "bench.h" 9 10 #include <test_btf.h> 11 12 static struct { 13 __u32 nr_maps; 14 __u32 hashmap_nr_keys_used; 15 } args = { 16 .nr_maps = 1000, 17 .hashmap_nr_keys_used = 1000, 18 }; 19 20 enum { 21 ARG_NR_MAPS = 6000, 22 ARG_HASHMAP_NR_KEYS_USED = 6001, 23 }; 24 25 static const struct argp_option opts[] = { 26 { "nr_maps", ARG_NR_MAPS, "NR_MAPS", 0, 27 "Set number of local_storage maps"}, 28 { "hashmap_nr_keys_used", ARG_HASHMAP_NR_KEYS_USED, "NR_KEYS", 29 0, "When doing hashmap test, set number of hashmap keys test uses"}, 30 {}, 31 }; 32 33 static error_t parse_arg(int key, char *arg, struct argp_state *state) 34 { 35 long ret; 36 37 switch (key) { 38 case ARG_NR_MAPS: 39 ret = strtol(arg, NULL, 10); 40 if (ret < 1 || ret > UINT_MAX) { 41 fprintf(stderr, "invalid nr_maps"); 42 argp_usage(state); 43 } 44 args.nr_maps = ret; 45 break; 46 case ARG_HASHMAP_NR_KEYS_USED: 47 ret = strtol(arg, NULL, 10); 48 if (ret < 1 || ret > UINT_MAX) { 49 fprintf(stderr, "invalid hashmap_nr_keys_used"); 50 argp_usage(state); 51 } 52 args.hashmap_nr_keys_used = ret; 53 break; 54 default: 55 return ARGP_ERR_UNKNOWN; 56 } 57 58 return 0; 59 } 60 61 const struct argp bench_local_storage_argp = { 62 .options = opts, 63 .parser = parse_arg, 64 }; 65 66 /* Keep in sync w/ array of maps in bpf */ 67 #define MAX_NR_MAPS 1000 68 /* keep in sync w/ same define in bpf */ 69 #define HASHMAP_SZ 4194304 70 71 static void validate(void) 72 { 73 if (env.producer_cnt != 1) { 74 fprintf(stderr, "benchmark doesn't support multi-producer!\n"); 75 exit(1); 76 } 77 if (env.consumer_cnt != 1) { 78 fprintf(stderr, "benchmark doesn't support multi-consumer!\n"); 79 exit(1); 80 } 81 82 if (args.nr_maps > MAX_NR_MAPS) { 83 fprintf(stderr, "nr_maps must be <= 1000\n"); 84 exit(1); 85 } 86 87 if (args.hashmap_nr_keys_used > HASHMAP_SZ) { 88 fprintf(stderr, "hashmap_nr_keys_used must be <= %u\n", HASHMAP_SZ); 89 exit(1); 90 } 91 } 92 93 static struct { 94 struct local_storage_bench *skel; 95 void *bpf_obj; 96 struct bpf_map *array_of_maps; 97 } ctx; 98 99 static void prepopulate_hashmap(int fd) 100 { 101 int i, key, val; 102 103 /* local_storage gets will have BPF_LOCAL_STORAGE_GET_F_CREATE flag set, so 104 * populate the hashmap for a similar comparison 105 */ 106 for (i = 0; i < HASHMAP_SZ; i++) { 107 key = val = i; 108 if (bpf_map_update_elem(fd, &key, &val, 0)) { 109 fprintf(stderr, "Error prepopulating hashmap (key %d)\n", key); 110 exit(1); 111 } 112 } 113 } 114 115 static void __setup(struct bpf_program *prog, bool hashmap) 116 { 117 struct bpf_map *inner_map; 118 int i, fd, mim_fd, err; 119 120 LIBBPF_OPTS(bpf_map_create_opts, create_opts); 121 122 if (!hashmap) 123 create_opts.map_flags = BPF_F_NO_PREALLOC; 124 125 ctx.skel->rodata->num_maps = args.nr_maps; 126 ctx.skel->rodata->hashmap_num_keys = args.hashmap_nr_keys_used; 127 inner_map = bpf_map__inner_map(ctx.array_of_maps); 128 create_opts.btf_key_type_id = bpf_map__btf_key_type_id(inner_map); 129 create_opts.btf_value_type_id = bpf_map__btf_value_type_id(inner_map); 130 131 err = local_storage_bench__load(ctx.skel); 132 if (err) { 133 fprintf(stderr, "Error loading skeleton\n"); 134 goto err_out; 135 } 136 137 create_opts.btf_fd = bpf_object__btf_fd(ctx.skel->obj); 138 139 mim_fd = bpf_map__fd(ctx.array_of_maps); 140 if (mim_fd < 0) { 141 fprintf(stderr, "Error getting map_in_map fd\n"); 142 goto err_out; 143 } 144 145 for (i = 0; i < args.nr_maps; i++) { 146 if (hashmap) 147 fd = bpf_map_create(BPF_MAP_TYPE_HASH, NULL, sizeof(int), 148 sizeof(int), HASHMAP_SZ, &create_opts); 149 else 150 fd = bpf_map_create(BPF_MAP_TYPE_TASK_STORAGE, NULL, sizeof(int), 151 sizeof(int), 0, &create_opts); 152 if (fd < 0) { 153 fprintf(stderr, "Error creating map %d: %d\n", i, fd); 154 goto err_out; 155 } 156 157 if (hashmap) 158 prepopulate_hashmap(fd); 159 160 err = bpf_map_update_elem(mim_fd, &i, &fd, 0); 161 if (err) { 162 fprintf(stderr, "Error updating array-of-maps w/ map %d\n", i); 163 goto err_out; 164 } 165 } 166 167 if (!bpf_program__attach(prog)) { 168 fprintf(stderr, "Error attaching bpf program\n"); 169 goto err_out; 170 } 171 172 return; 173 err_out: 174 exit(1); 175 } 176 177 static void hashmap_setup(void) 178 { 179 struct local_storage_bench *skel; 180 181 setup_libbpf(); 182 183 skel = local_storage_bench__open(); 184 ctx.skel = skel; 185 ctx.array_of_maps = skel->maps.array_of_hash_maps; 186 skel->rodata->use_hashmap = 1; 187 skel->rodata->interleave = 0; 188 189 __setup(skel->progs.get_local, true); 190 } 191 192 static void local_storage_cache_get_setup(void) 193 { 194 struct local_storage_bench *skel; 195 196 setup_libbpf(); 197 198 skel = local_storage_bench__open(); 199 ctx.skel = skel; 200 ctx.array_of_maps = skel->maps.array_of_local_storage_maps; 201 skel->rodata->use_hashmap = 0; 202 skel->rodata->interleave = 0; 203 204 __setup(skel->progs.get_local, false); 205 } 206 207 static void local_storage_cache_get_interleaved_setup(void) 208 { 209 struct local_storage_bench *skel; 210 211 setup_libbpf(); 212 213 skel = local_storage_bench__open(); 214 ctx.skel = skel; 215 ctx.array_of_maps = skel->maps.array_of_local_storage_maps; 216 skel->rodata->use_hashmap = 0; 217 skel->rodata->interleave = 1; 218 219 __setup(skel->progs.get_local, false); 220 } 221 222 static void measure(struct bench_res *res) 223 { 224 res->hits = atomic_swap(&ctx.skel->bss->hits, 0); 225 res->important_hits = atomic_swap(&ctx.skel->bss->important_hits, 0); 226 } 227 228 static inline void trigger_bpf_program(void) 229 { 230 syscall(__NR_getpgid); 231 } 232 233 static void *consumer(void *input) 234 { 235 return NULL; 236 } 237 238 static void *producer(void *input) 239 { 240 while (true) 241 trigger_bpf_program(); 242 243 return NULL; 244 } 245 246 /* cache sequential and interleaved get benchs test local_storage get 247 * performance, specifically they demonstrate performance cliff of 248 * current list-plus-cache local_storage model. 249 * 250 * cache sequential get: call bpf_task_storage_get on n maps in order 251 * cache interleaved get: like "sequential get", but interleave 4 calls to the 252 * 'important' map (idx 0 in array_of_maps) for every 10 calls. Goal 253 * is to mimic environment where many progs are accessing their local_storage 254 * maps, with 'our' prog needing to access its map more often than others 255 */ 256 const struct bench bench_local_storage_cache_seq_get = { 257 .name = "local-storage-cache-seq-get", 258 .argp = &bench_local_storage_argp, 259 .validate = validate, 260 .setup = local_storage_cache_get_setup, 261 .producer_thread = producer, 262 .consumer_thread = consumer, 263 .measure = measure, 264 .report_progress = local_storage_report_progress, 265 .report_final = local_storage_report_final, 266 }; 267 268 const struct bench bench_local_storage_cache_interleaved_get = { 269 .name = "local-storage-cache-int-get", 270 .argp = &bench_local_storage_argp, 271 .validate = validate, 272 .setup = local_storage_cache_get_interleaved_setup, 273 .producer_thread = producer, 274 .consumer_thread = consumer, 275 .measure = measure, 276 .report_progress = local_storage_report_progress, 277 .report_final = local_storage_report_final, 278 }; 279 280 const struct bench bench_local_storage_cache_hashmap_control = { 281 .name = "local-storage-cache-hashmap-control", 282 .argp = &bench_local_storage_argp, 283 .validate = validate, 284 .setup = hashmap_setup, 285 .producer_thread = producer, 286 .consumer_thread = consumer, 287 .measure = measure, 288 .report_progress = local_storage_report_progress, 289 .report_final = local_storage_report_final, 290 }; 291