/* * Copyright (C) 2019, Alex Bennée * * How vectorised is this code? * * Attempt to measure the amount of vectorisation that has been done * on some code by counting classes of instruction. * * License: GNU GPL, version 2 or later. * See the COPYING file in the top-level directory. */ #include #include #include #include #include #include #include #include #include QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION; #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) typedef enum { COUNT_CLASS, COUNT_INDIVIDUAL, COUNT_NONE } CountType; static int limit = 50; static bool do_inline; static bool verbose; static GMutex lock; static GHashTable *insns; typedef struct { const char *class; const char *opt; uint32_t mask; uint32_t pattern; CountType what; qemu_plugin_u64 count; } InsnClassExecCount; typedef struct { char *insn; uint32_t opcode; qemu_plugin_u64 count; InsnClassExecCount *class; } InsnExecCount; /* * Matchers for classes of instructions, order is important. * * Your most precise match must be before looser matches. If no match * is found in the table we can create an individual entry. * * 31..28 27..24 23..20 19..16 15..12 11..8 7..4 3..0 */ static InsnClassExecCount aarch64_insn_classes[] = { /* "Reserved"" */ { " UDEF", "udef", 0xffff0000, 0x00000000, COUNT_NONE}, { " SVE", "sve", 0x1e000000, 0x04000000, COUNT_CLASS}, { "Reserved", "res", 0x1e000000, 0x00000000, COUNT_CLASS}, /* Data Processing Immediate */ { " PCrel addr", "pcrel", 0x1f000000, 0x10000000, COUNT_CLASS}, { " Add/Sub (imm,tags)", "asit", 0x1f800000, 0x11800000, COUNT_CLASS}, { " Add/Sub (imm)", "asi", 0x1f000000, 0x11000000, COUNT_CLASS}, { " Logical (imm)", "logi", 0x1f800000, 0x12000000, COUNT_CLASS}, { " Move Wide (imm)", "movwi", 0x1f800000, 0x12800000, COUNT_CLASS}, { " Bitfield", "bitf", 0x1f800000, 0x13000000, COUNT_CLASS}, { " Extract", "extr", 0x1f800000, 0x13800000, COUNT_CLASS}, { "Data Proc Imm", "dpri", 0x1c000000, 0x10000000, COUNT_CLASS}, /* Branches */ { " Cond Branch (imm)", "cndb", 0xfe000000, 0x54000000, COUNT_CLASS}, { " Exception Gen", "excp", 0xff000000, 0xd4000000, COUNT_CLASS}, { " NOP", "nop", 0xffffffff, 0xd503201f, COUNT_NONE}, { " Hints", "hint", 0xfffff000, 0xd5032000, COUNT_CLASS}, { " Barriers", "barr", 0xfffff000, 0xd5033000, COUNT_CLASS}, { " PSTATE", "psta", 0xfff8f000, 0xd5004000, COUNT_CLASS}, { " System Insn", "sins", 0xffd80000, 0xd5080000, COUNT_CLASS}, { " System Reg", "sreg", 0xffd00000, 0xd5100000, COUNT_CLASS}, { " Branch (reg)", "breg", 0xfe000000, 0xd6000000, COUNT_CLASS}, { " Branch (imm)", "bimm", 0x7c000000, 0x14000000, COUNT_CLASS}, { " Cmp & Branch", "cmpb", 0x7e000000, 0x34000000, COUNT_CLASS}, { " Tst & Branch", "tstb", 0x7e000000, 0x36000000, COUNT_CLASS}, { "Branches", "branch", 0x1c000000, 0x14000000, COUNT_CLASS}, /* Loads and Stores */ { " AdvSimd ldstmult", "advlsm", 0xbfbf0000, 0x0c000000, COUNT_CLASS}, { " AdvSimd ldstmult++", "advlsmp", 0xbfb00000, 0x0c800000, COUNT_CLASS}, { " AdvSimd ldst", "advlss", 0xbf9f0000, 0x0d000000, COUNT_CLASS}, { " AdvSimd ldst++", "advlssp", 0xbf800000, 0x0d800000, COUNT_CLASS}, { " ldst excl", "ldstx", 0x3f000000, 0x08000000, COUNT_CLASS}, { " Prefetch", "prfm", 0xff000000, 0xd8000000, COUNT_CLASS}, { " Load Reg (lit)", "ldlit", 0x1b000000, 0x18000000, COUNT_CLASS}, { " ldst noalloc pair", "ldstnap", 0x3b800000, 0x28000000, COUNT_CLASS}, { " ldst pair", "ldstp", 0x38000000, 0x28000000, COUNT_CLASS}, { " ldst reg", "ldstr", 0x3b200000, 0x38000000, COUNT_CLASS}, { " Atomic ldst", "atomic", 0x3b200c00, 0x38200000, COUNT_CLASS}, { " ldst reg (reg off)", "ldstro", 0x3b200b00, 0x38200800, COUNT_CLASS}, { " ldst reg (pac)", "ldstpa", 0x3b200200, 0x38200800, COUNT_CLASS}, { " ldst reg (imm)", "ldsti", 0x3b000000, 0x39000000, COUNT_CLASS}, { "Loads & Stores", "ldst", 0x0a000000, 0x08000000, COUNT_CLASS}, /* Data Processing Register */ { "Data Proc Reg", "dprr", 0x0e000000, 0x0a000000, COUNT_CLASS}, /* Scalar FP */ { "Scalar FP ", "fpsimd", 0x0e000000, 0x0e000000, COUNT_CLASS}, /* Unclassified */ { "Unclassified", "unclas", 0x00000000, 0x00000000, COUNT_CLASS}, }; static InsnClassExecCount sparc32_insn_classes[] = { { "Call", "call", 0xc0000000, 0x40000000, COUNT_CLASS}, { "Branch ICond", "bcc", 0xc1c00000, 0x00800000, COUNT_CLASS}, { "Branch Fcond", "fbcc", 0xc1c00000, 0x01800000, COUNT_CLASS}, { "SetHi", "sethi", 0xc1c00000, 0x01000000, COUNT_CLASS}, { "FPU ALU", "fpu", 0xc1f00000, 0x81a00000, COUNT_CLASS}, { "ALU", "alu", 0xc0000000, 0x80000000, COUNT_CLASS}, { "Load/Store", "ldst", 0xc0000000, 0xc0000000, COUNT_CLASS}, /* Unclassified */ { "Unclassified", "unclas", 0x00000000, 0x00000000, COUNT_INDIVIDUAL}, }; static InsnClassExecCount sparc64_insn_classes[] = { { "SetHi & Branches", "op0", 0xc0000000, 0x00000000, COUNT_CLASS}, { "Call", "op1", 0xc0000000, 0x40000000, COUNT_CLASS}, { "Arith/Logical/Move", "op2", 0xc0000000, 0x80000000, COUNT_CLASS}, { "Arith/Logical/Move", "op3", 0xc0000000, 0xc0000000, COUNT_CLASS}, /* Unclassified */ { "Unclassified", "unclas", 0x00000000, 0x00000000, COUNT_INDIVIDUAL}, }; /* Default matcher for currently unclassified architectures */ static InsnClassExecCount default_insn_classes[] = { { "Unclassified", "unclas", 0x00000000, 0x00000000, COUNT_INDIVIDUAL}, }; typedef struct { const char *qemu_target; InsnClassExecCount *table; int table_sz; } ClassSelector; static ClassSelector class_tables[] = { { "aarch64", aarch64_insn_classes, ARRAY_SIZE(aarch64_insn_classes) }, { "sparc", sparc32_insn_classes, ARRAY_SIZE(sparc32_insn_classes) }, { "sparc64", sparc64_insn_classes, ARRAY_SIZE(sparc64_insn_classes) }, { NULL, default_insn_classes, ARRAY_SIZE(default_insn_classes) }, }; static InsnClassExecCount *class_table; static int class_table_sz; static gint cmp_exec_count(gconstpointer a, gconstpointer b) { InsnExecCount *ea = (InsnExecCount *) a; InsnExecCount *eb = (InsnExecCount *) b; uint64_t count_a = qemu_plugin_u64_sum(ea->count); uint64_t count_b = qemu_plugin_u64_sum(eb->count); return count_a > count_b ? -1 : 1; } static void free_record(gpointer data) { InsnExecCount *rec = (InsnExecCount *) data; qemu_plugin_scoreboard_free(rec->count.score); g_free(rec->insn); g_free(rec); } static void plugin_exit(qemu_plugin_id_t id, void *p) { g_autoptr(GString) report = g_string_new("Instruction Classes:\n"); int i; uint64_t total_count; GList *counts; InsnClassExecCount *class = NULL; for (i = 0; i < class_table_sz; i++) { class = &class_table[i]; switch (class->what) { case COUNT_CLASS: total_count = qemu_plugin_u64_sum(class->count); if (total_count || verbose) { g_string_append_printf(report, "Class: %-24s\t(%" PRId64 " hits)\n", class->class, total_count); } break; case COUNT_INDIVIDUAL: g_string_append_printf(report, "Class: %-24s\tcounted individually\n", class->class); break; case COUNT_NONE: g_string_append_printf(report, "Class: %-24s\tnot counted\n", class->class); break; default: break; } } counts = g_hash_table_get_values(insns); if (counts && g_list_next(counts)) { g_string_append_printf(report, "Individual Instructions:\n"); counts = g_list_sort(counts, cmp_exec_count); for (i = 0; i < limit && g_list_next(counts); i++, counts = g_list_next(counts)) { InsnExecCount *rec = (InsnExecCount *) counts->data; g_string_append_printf(report, "Instr: %-24s\t(%" PRId64 " hits)" "\t(op=0x%08x/%s)\n", rec->insn, qemu_plugin_u64_sum(rec->count), rec->opcode, rec->class ? rec->class->class : "un-categorised"); } g_list_free(counts); } g_hash_table_destroy(insns); for (i = 0; i < ARRAY_SIZE(class_tables); i++) { for (int j = 0; j < class_tables[i].table_sz; ++j) { qemu_plugin_scoreboard_free(class_tables[i].table[j].count.score); } } qemu_plugin_outs(report->str); } static void plugin_init(void) { insns = g_hash_table_new_full(NULL, g_direct_equal, NULL, &free_record); } static void vcpu_insn_exec_before(unsigned int cpu_index, void *udata) { struct qemu_plugin_scoreboard *score = udata; qemu_plugin_u64_add(qemu_plugin_scoreboard_u64(score), cpu_index, 1); } static struct qemu_plugin_scoreboard *find_counter( struct qemu_plugin_insn *insn) { int i; uint64_t *cnt = NULL; uint32_t opcode = 0; InsnClassExecCount *class = NULL; /* * We only match the first 32 bits of the instruction which is * fine for most RISCs but a bit limiting for CISC architectures. * They would probably benefit from a more tailored plugin. * However we can fall back to individual instruction counting. */ qemu_plugin_insn_data(insn, &opcode, sizeof(opcode)); for (i = 0; !cnt && i < class_table_sz; i++) { class = &class_table[i]; uint32_t masked_bits = opcode & class->mask; if (masked_bits == class->pattern) { break; } } g_assert(class); switch (class->what) { case COUNT_NONE: return NULL; case COUNT_CLASS: return class->count.score; case COUNT_INDIVIDUAL: { InsnExecCount *icount; g_mutex_lock(&lock); icount = (InsnExecCount *) g_hash_table_lookup(insns, GUINT_TO_POINTER(opcode)); if (!icount) { icount = g_new0(InsnExecCount, 1); icount->opcode = opcode; icount->insn = qemu_plugin_insn_disas(insn); icount->class = class; struct qemu_plugin_scoreboard *score = qemu_plugin_scoreboard_new(sizeof(uint64_t)); icount->count = qemu_plugin_scoreboard_u64(score); g_hash_table_insert(insns, GUINT_TO_POINTER(opcode), (gpointer) icount); } g_mutex_unlock(&lock); return icount->count.score; } default: g_assert_not_reached(); } return NULL; } static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb) { size_t n = qemu_plugin_tb_n_insns(tb); size_t i; for (i = 0; i < n; i++) { struct qemu_plugin_insn *insn = qemu_plugin_tb_get_insn(tb, i); struct qemu_plugin_scoreboard *cnt = find_counter(insn); if (cnt) { if (do_inline) { qemu_plugin_register_vcpu_insn_exec_inline_per_vcpu( insn, QEMU_PLUGIN_INLINE_ADD_U64, qemu_plugin_scoreboard_u64(cnt), 1); } else { qemu_plugin_register_vcpu_insn_exec_cb( insn, vcpu_insn_exec_before, QEMU_PLUGIN_CB_NO_REGS, cnt); } } } } QEMU_PLUGIN_EXPORT int qemu_plugin_install(qemu_plugin_id_t id, const qemu_info_t *info, int argc, char **argv) { int i; for (i = 0; i < ARRAY_SIZE(class_tables); i++) { for (int j = 0; j < class_tables[i].table_sz; ++j) { struct qemu_plugin_scoreboard *score = qemu_plugin_scoreboard_new(sizeof(uint64_t)); class_tables[i].table[j].count = qemu_plugin_scoreboard_u64(score); } } /* Select a class table appropriate to the guest architecture */ for (i = 0; i < ARRAY_SIZE(class_tables); i++) { ClassSelector *entry = &class_tables[i]; if (!entry->qemu_target || strcmp(entry->qemu_target, info->target_name) == 0) { class_table = entry->table; class_table_sz = entry->table_sz; break; } } for (i = 0; i < argc; i++) { char *p = argv[i]; g_auto(GStrv) tokens = g_strsplit(p, "=", -1); if (g_strcmp0(tokens[0], "inline") == 0) { if (!qemu_plugin_bool_parse(tokens[0], tokens[1], &do_inline)) { fprintf(stderr, "boolean argument parsing failed: %s\n", p); return -1; } } else if (g_strcmp0(tokens[0], "verbose") == 0) { if (!qemu_plugin_bool_parse(tokens[0], tokens[1], &verbose)) { fprintf(stderr, "boolean argument parsing failed: %s\n", p); return -1; } } else if (g_strcmp0(tokens[0], "count") == 0) { char *value = tokens[1]; int j; CountType type = COUNT_INDIVIDUAL; if (*value == '!') { type = COUNT_NONE; value++; } for (j = 0; j < class_table_sz; j++) { if (strcmp(value, class_table[j].opt) == 0) { class_table[j].what = type; break; } } } else { fprintf(stderr, "option parsing failed: %s\n", p); return -1; } } plugin_init(); qemu_plugin_register_vcpu_tb_trans_cb(id, vcpu_tb_trans); qemu_plugin_register_atexit_cb(id, plugin_exit, NULL); return 0; }