xref: /openbmc/qemu/contrib/plugins/howvec.c (revision 6f9ff551)
1 /*
2  * Copyright (C) 2019, Alex Bennée <alex.bennee@linaro.org>
3  *
4  * How vectorised is this code?
5  *
6  * Attempt to measure the amount of vectorisation that has been done
7  * on some code by counting classes of instruction.
8  *
9  * License: GNU GPL, version 2 or later.
10  *   See the COPYING file in the top-level directory.
11  */
12 #include <inttypes.h>
13 #include <assert.h>
14 #include <stdlib.h>
15 #include <inttypes.h>
16 #include <string.h>
17 #include <unistd.h>
18 #include <stdio.h>
19 #include <glib.h>
20 
21 #include <qemu-plugin.h>
22 
23 QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION;
24 
25 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
26 
27 typedef enum {
28     COUNT_CLASS,
29     COUNT_INDIVIDUAL,
30     COUNT_NONE
31 } CountType;
32 
33 static int limit = 50;
34 static bool do_inline;
35 static bool verbose;
36 
37 static GMutex lock;
38 static GHashTable *insns;
39 
40 typedef struct {
41     const char *class;
42     const char *opt;
43     uint32_t mask;
44     uint32_t pattern;
45     CountType what;
46     uint64_t count;
47 } InsnClassExecCount;
48 
49 typedef struct {
50     char *insn;
51     uint32_t opcode;
52     uint64_t count;
53     InsnClassExecCount *class;
54 } InsnExecCount;
55 
56 /*
57  * Matchers for classes of instructions, order is important.
58  *
59  * Your most precise match must be before looser matches. If no match
60  * is found in the table we can create an individual entry.
61  *
62  * 31..28 27..24 23..20 19..16 15..12 11..8 7..4 3..0
63  */
64 static InsnClassExecCount aarch64_insn_classes[] = {
65     /* "Reserved"" */
66     { "  UDEF",              "udef",   0xffff0000, 0x00000000, COUNT_NONE},
67     { "  SVE",               "sve",    0x1e000000, 0x04000000, COUNT_CLASS},
68     { "Reserved",            "res",    0x1e000000, 0x00000000, COUNT_CLASS},
69     /* Data Processing Immediate */
70     { "  PCrel addr",        "pcrel",  0x1f000000, 0x10000000, COUNT_CLASS},
71     { "  Add/Sub (imm,tags)","asit",   0x1f800000, 0x11800000, COUNT_CLASS},
72     { "  Add/Sub (imm)",     "asi",    0x1f000000, 0x11000000, COUNT_CLASS},
73     { "  Logical (imm)",     "logi",   0x1f800000, 0x12000000, COUNT_CLASS},
74     { "  Move Wide (imm)",   "movwi",  0x1f800000, 0x12800000, COUNT_CLASS},
75     { "  Bitfield",          "bitf",   0x1f800000, 0x13000000, COUNT_CLASS},
76     { "  Extract",           "extr",   0x1f800000, 0x13800000, COUNT_CLASS},
77     { "Data Proc Imm",       "dpri",   0x1c000000, 0x10000000, COUNT_CLASS},
78     /* Branches */
79     { "  Cond Branch (imm)", "cndb",   0xfe000000, 0x54000000, COUNT_CLASS},
80     { "  Exception Gen",     "excp",   0xff000000, 0xd4000000, COUNT_CLASS},
81     { "    NOP",             "nop",    0xffffffff, 0xd503201f, COUNT_NONE},
82     { "  Hints",             "hint",   0xfffff000, 0xd5032000, COUNT_CLASS},
83     { "  Barriers",          "barr",   0xfffff000, 0xd5033000, COUNT_CLASS},
84     { "  PSTATE",            "psta",   0xfff8f000, 0xd5004000, COUNT_CLASS},
85     { "  System Insn",       "sins",   0xffd80000, 0xd5080000, COUNT_CLASS},
86     { "  System Reg",        "sreg",   0xffd00000, 0xd5100000, COUNT_CLASS},
87     { "  Branch (reg)",      "breg",   0xfe000000, 0xd6000000, COUNT_CLASS},
88     { "  Branch (imm)",      "bimm",   0x7c000000, 0x14000000, COUNT_CLASS},
89     { "  Cmp & Branch",      "cmpb",   0x7e000000, 0x34000000, COUNT_CLASS},
90     { "  Tst & Branch",      "tstb",   0x7e000000, 0x36000000, COUNT_CLASS},
91     { "Branches",            "branch", 0x1c000000, 0x14000000, COUNT_CLASS},
92     /* Loads and Stores */
93     { "  AdvSimd ldstmult",  "advlsm", 0xbfbf0000, 0x0c000000, COUNT_CLASS},
94     { "  AdvSimd ldstmult++","advlsmp",0xbfb00000, 0x0c800000, COUNT_CLASS},
95     { "  AdvSimd ldst",      "advlss", 0xbf9f0000, 0x0d000000, COUNT_CLASS},
96     { "  AdvSimd ldst++",    "advlssp",0xbf800000, 0x0d800000, COUNT_CLASS},
97     { "  ldst excl",         "ldstx",  0x3f000000, 0x08000000, COUNT_CLASS},
98     { "    Prefetch",        "prfm",   0xff000000, 0xd8000000, COUNT_CLASS},
99     { "  Load Reg (lit)",    "ldlit",  0x1b000000, 0x18000000, COUNT_CLASS},
100     { "  ldst noalloc pair", "ldstnap",0x3b800000, 0x28000000, COUNT_CLASS},
101     { "  ldst pair",         "ldstp",  0x38000000, 0x28000000, COUNT_CLASS},
102     { "  ldst reg",          "ldstr",  0x3b200000, 0x38000000, COUNT_CLASS},
103     { "  Atomic ldst",       "atomic", 0x3b200c00, 0x38200000, COUNT_CLASS},
104     { "  ldst reg (reg off)","ldstro", 0x3b200b00, 0x38200800, COUNT_CLASS},
105     { "  ldst reg (pac)",    "ldstpa", 0x3b200200, 0x38200800, COUNT_CLASS},
106     { "  ldst reg (imm)",    "ldsti",  0x3b000000, 0x39000000, COUNT_CLASS},
107     { "Loads & Stores",      "ldst",   0x0a000000, 0x08000000, COUNT_CLASS},
108     /* Data Processing Register */
109     { "Data Proc Reg",       "dprr",   0x0e000000, 0x0a000000, COUNT_CLASS},
110     /* Scalar FP */
111     { "Scalar FP ",          "fpsimd", 0x0e000000, 0x0e000000, COUNT_CLASS},
112     /* Unclassified */
113     { "Unclassified",        "unclas", 0x00000000, 0x00000000, COUNT_CLASS},
114 };
115 
116 static InsnClassExecCount sparc32_insn_classes[] = {
117     { "Call",                "call",   0xc0000000, 0x40000000, COUNT_CLASS},
118     { "Branch ICond",        "bcc",    0xc1c00000, 0x00800000, COUNT_CLASS},
119     { "Branch Fcond",        "fbcc",   0xc1c00000, 0x01800000, COUNT_CLASS},
120     { "SetHi",               "sethi",  0xc1c00000, 0x01000000, COUNT_CLASS},
121     { "FPU ALU",             "fpu",    0xc1f00000, 0x81a00000, COUNT_CLASS},
122     { "ALU",                 "alu",    0xc0000000, 0x80000000, COUNT_CLASS},
123     { "Load/Store",          "ldst",   0xc0000000, 0xc0000000, COUNT_CLASS},
124     /* Unclassified */
125     { "Unclassified",        "unclas", 0x00000000, 0x00000000, COUNT_INDIVIDUAL},
126 };
127 
128 static InsnClassExecCount sparc64_insn_classes[] = {
129     { "SetHi & Branches",     "op0",   0xc0000000, 0x00000000, COUNT_CLASS},
130     { "Call",                 "op1",   0xc0000000, 0x40000000, COUNT_CLASS},
131     { "Arith/Logical/Move",   "op2",   0xc0000000, 0x80000000, COUNT_CLASS},
132     { "Arith/Logical/Move",   "op3",   0xc0000000, 0xc0000000, COUNT_CLASS},
133     /* Unclassified */
134     { "Unclassified",        "unclas", 0x00000000, 0x00000000, COUNT_INDIVIDUAL},
135 };
136 
137 /* Default matcher for currently unclassified architectures */
138 static InsnClassExecCount default_insn_classes[] = {
139     { "Unclassified",        "unclas", 0x00000000, 0x00000000, COUNT_INDIVIDUAL},
140 };
141 
142 typedef struct {
143     const char *qemu_target;
144     InsnClassExecCount *table;
145     int table_sz;
146 } ClassSelector;
147 
148 static ClassSelector class_tables[] =
149 {
150     { "aarch64", aarch64_insn_classes, ARRAY_SIZE(aarch64_insn_classes) },
151     { "sparc",   sparc32_insn_classes, ARRAY_SIZE(sparc32_insn_classes) },
152     { "sparc64", sparc64_insn_classes, ARRAY_SIZE(sparc64_insn_classes) },
153     { NULL, default_insn_classes, ARRAY_SIZE(default_insn_classes) },
154 };
155 
156 static InsnClassExecCount *class_table;
157 static int class_table_sz;
158 
159 static gint cmp_exec_count(gconstpointer a, gconstpointer b)
160 {
161     InsnExecCount *ea = (InsnExecCount *) a;
162     InsnExecCount *eb = (InsnExecCount *) b;
163     return ea->count > eb->count ? -1 : 1;
164 }
165 
166 static void free_record(gpointer data)
167 {
168     InsnExecCount *rec = (InsnExecCount *) data;
169     g_free(rec->insn);
170     g_free(rec);
171 }
172 
173 static void plugin_exit(qemu_plugin_id_t id, void *p)
174 {
175     g_autoptr(GString) report = g_string_new("Instruction Classes:\n");
176     int i;
177     GList *counts;
178     InsnClassExecCount *class = NULL;
179 
180     for (i = 0; i < class_table_sz; i++) {
181         class = &class_table[i];
182         switch (class->what) {
183         case COUNT_CLASS:
184             if (class->count || verbose) {
185                 g_string_append_printf(report, "Class: %-24s\t(%ld hits)\n",
186                                        class->class,
187                                        class->count);
188             }
189             break;
190         case COUNT_INDIVIDUAL:
191             g_string_append_printf(report, "Class: %-24s\tcounted individually\n",
192                                    class->class);
193             break;
194         case COUNT_NONE:
195             g_string_append_printf(report, "Class: %-24s\tnot counted\n",
196                                    class->class);
197             break;
198         default:
199             break;
200         }
201     }
202 
203     counts = g_hash_table_get_values(insns);
204     if (counts && g_list_next(counts)) {
205         g_string_append_printf(report,"Individual Instructions:\n");
206         counts = g_list_sort(counts, cmp_exec_count);
207 
208         for (i = 0; i < limit && g_list_next(counts);
209              i++, counts = g_list_next(counts)) {
210             InsnExecCount *rec = (InsnExecCount *) counts->data;
211             g_string_append_printf(report,
212                                    "Instr: %-24s\t(%ld hits)\t(op=%#08x/%s)\n",
213                                    rec->insn,
214                                    rec->count,
215                                    rec->opcode,
216                                    rec->class ?
217                                    rec->class->class : "un-categorised");
218         }
219         g_list_free(counts);
220     }
221 
222     g_hash_table_destroy(insns);
223 
224     qemu_plugin_outs(report->str);
225 }
226 
227 static void plugin_init(void)
228 {
229     insns = g_hash_table_new_full(NULL, g_direct_equal, NULL, &free_record);
230 }
231 
232 static void vcpu_insn_exec_before(unsigned int cpu_index, void *udata)
233 {
234     uint64_t *count = (uint64_t *) udata;
235     (*count)++;
236 }
237 
238 static uint64_t * find_counter(struct qemu_plugin_insn *insn)
239 {
240     int i;
241     uint64_t *cnt = NULL;
242     uint32_t opcode;
243     InsnClassExecCount *class = NULL;
244 
245     /*
246      * We only match the first 32 bits of the instruction which is
247      * fine for most RISCs but a bit limiting for CISC architectures.
248      * They would probably benefit from a more tailored plugin.
249      * However we can fall back to individual instruction counting.
250      */
251     opcode = *((uint32_t *)qemu_plugin_insn_data(insn));
252 
253     for (i = 0; !cnt && i < class_table_sz; i++) {
254         class = &class_table[i];
255         uint32_t masked_bits = opcode & class->mask;
256         if (masked_bits == class->pattern) {
257             break;
258         }
259     }
260 
261     g_assert(class);
262 
263     switch (class->what) {
264     case COUNT_NONE:
265         return NULL;
266     case COUNT_CLASS:
267         return &class->count;
268     case COUNT_INDIVIDUAL:
269     {
270         InsnExecCount *icount;
271 
272         g_mutex_lock(&lock);
273         icount = (InsnExecCount *) g_hash_table_lookup(insns,
274                                                        GUINT_TO_POINTER(opcode));
275 
276         if (!icount) {
277             icount = g_new0(InsnExecCount, 1);
278             icount->opcode = opcode;
279             icount->insn = qemu_plugin_insn_disas(insn);
280             icount->class = class;
281 
282             g_hash_table_insert(insns, GUINT_TO_POINTER(opcode),
283                                 (gpointer) icount);
284         }
285         g_mutex_unlock(&lock);
286 
287         return &icount->count;
288     }
289     default:
290         g_assert_not_reached();
291     }
292 
293     return NULL;
294 }
295 
296 static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb)
297 {
298     size_t n = qemu_plugin_tb_n_insns(tb);
299     size_t i;
300 
301     for (i = 0; i < n; i++) {
302         uint64_t *cnt;
303         struct qemu_plugin_insn *insn = qemu_plugin_tb_get_insn(tb, i);
304         cnt = find_counter(insn);
305 
306         if (cnt) {
307             if (do_inline) {
308                 qemu_plugin_register_vcpu_insn_exec_inline(
309                     insn, QEMU_PLUGIN_INLINE_ADD_U64, cnt, 1);
310             } else {
311                 qemu_plugin_register_vcpu_insn_exec_cb(
312                     insn, vcpu_insn_exec_before, QEMU_PLUGIN_CB_NO_REGS, cnt);
313             }
314         }
315     }
316 }
317 
318 QEMU_PLUGIN_EXPORT int qemu_plugin_install(qemu_plugin_id_t id,
319                                            const qemu_info_t *info,
320                                            int argc, char **argv)
321 {
322     int i;
323 
324     /* Select a class table appropriate to the guest architecture */
325     for (i = 0; i < ARRAY_SIZE(class_tables); i++) {
326         ClassSelector *entry = &class_tables[i];
327         if (!entry->qemu_target ||
328             strcmp(entry->qemu_target, info->target_name) == 0) {
329             class_table = entry->table;
330             class_table_sz = entry->table_sz;
331             break;
332         }
333     }
334 
335     for (i = 0; i < argc; i++) {
336         char *p = argv[i];
337         if (strcmp(p, "inline") == 0) {
338             do_inline = true;
339         } else if (strcmp(p, "verbose") == 0) {
340             verbose = true;
341         } else {
342             int j;
343             CountType type = COUNT_INDIVIDUAL;
344             if (*p == '!') {
345                 type = COUNT_NONE;
346                 p++;
347             }
348             for (j = 0; j < class_table_sz; j++) {
349                 if (strcmp(p, class_table[j].opt) == 0) {
350                     class_table[j].what = type;
351                     break;
352                 }
353             }
354         }
355     }
356 
357     plugin_init();
358 
359     qemu_plugin_register_vcpu_tb_trans_cb(id, vcpu_tb_trans);
360     qemu_plugin_register_atexit_cb(id, plugin_exit, NULL);
361     return 0;
362 }
363