xref: /openbmc/qemu/target/riscv/kvm/kvm-cpu.c (revision 15f7a80c49cb3637f62fa37fa4a17da913bd91ff)
1 /*
2  * RISC-V implementation of KVM hooks
3  *
4  * Copyright (c) 2020 Huawei Technologies Co., Ltd
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms and conditions of the GNU General Public License,
8  * version 2 or later, as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope it will be useful, but WITHOUT
11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13  * more details.
14  *
15  * You should have received a copy of the GNU General Public License along with
16  * this program.  If not, see <http://www.gnu.org/licenses/>.
17  */
18 
19 #include "qemu/osdep.h"
20 #include <sys/ioctl.h>
21 #include <sys/prctl.h>
22 
23 #include <linux/kvm.h>
24 
25 #include "qemu/timer.h"
26 #include "qapi/error.h"
27 #include "qemu/error-report.h"
28 #include "qemu/main-loop.h"
29 #include "qapi/visitor.h"
30 #include "sysemu/sysemu.h"
31 #include "sysemu/kvm.h"
32 #include "sysemu/kvm_int.h"
33 #include "cpu.h"
34 #include "trace.h"
35 #include "hw/core/accel-cpu.h"
36 #include "hw/pci/pci.h"
37 #include "exec/memattrs.h"
38 #include "exec/address-spaces.h"
39 #include "hw/boards.h"
40 #include "hw/irq.h"
41 #include "hw/intc/riscv_imsic.h"
42 #include "qemu/log.h"
43 #include "hw/loader.h"
44 #include "kvm_riscv.h"
45 #include "sbi_ecall_interface.h"
46 #include "chardev/char-fe.h"
47 #include "migration/misc.h"
48 #include "sysemu/runstate.h"
49 #include "hw/riscv/numa.h"
50 
51 #define PR_RISCV_V_SET_CONTROL            69
52 #define PR_RISCV_V_VSTATE_CTRL_ON          2
53 
54 void riscv_kvm_aplic_request(void *opaque, int irq, int level)
55 {
56     kvm_set_irq(kvm_state, irq, !!level);
57 }
58 
59 static bool cap_has_mp_state;
60 
61 static uint64_t kvm_riscv_reg_id_ulong(CPURISCVState *env, uint64_t type,
62                                  uint64_t idx)
63 {
64     uint64_t id = KVM_REG_RISCV | type | idx;
65 
66     switch (riscv_cpu_mxl(env)) {
67     case MXL_RV32:
68         id |= KVM_REG_SIZE_U32;
69         break;
70     case MXL_RV64:
71         id |= KVM_REG_SIZE_U64;
72         break;
73     default:
74         g_assert_not_reached();
75     }
76     return id;
77 }
78 
79 static uint64_t kvm_riscv_reg_id_u32(uint64_t type, uint64_t idx)
80 {
81     return KVM_REG_RISCV | KVM_REG_SIZE_U32 | type | idx;
82 }
83 
84 static uint64_t kvm_riscv_reg_id_u64(uint64_t type, uint64_t idx)
85 {
86     return KVM_REG_RISCV | KVM_REG_SIZE_U64 | type | idx;
87 }
88 
89 static uint64_t kvm_encode_reg_size_id(uint64_t id, size_t size_b)
90 {
91     uint64_t size_ctz = __builtin_ctz(size_b);
92 
93     return id | (size_ctz << KVM_REG_SIZE_SHIFT);
94 }
95 
96 static uint64_t kvm_riscv_vector_reg_id(RISCVCPU *cpu,
97                                         uint64_t idx)
98 {
99     uint64_t id;
100     size_t size_b;
101 
102     g_assert(idx < 32);
103 
104     id = KVM_REG_RISCV | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(idx);
105     size_b = cpu->cfg.vlenb;
106 
107     return kvm_encode_reg_size_id(id, size_b);
108 }
109 
110 #define RISCV_CORE_REG(env, name) \
111     kvm_riscv_reg_id_ulong(env, KVM_REG_RISCV_CORE, \
112                            KVM_REG_RISCV_CORE_REG(name))
113 
114 #define RISCV_CSR_REG(env, name) \
115     kvm_riscv_reg_id_ulong(env, KVM_REG_RISCV_CSR, \
116                            KVM_REG_RISCV_CSR_REG(name))
117 
118 #define RISCV_CONFIG_REG(env, name) \
119     kvm_riscv_reg_id_ulong(env, KVM_REG_RISCV_CONFIG, \
120                            KVM_REG_RISCV_CONFIG_REG(name))
121 
122 #define RISCV_TIMER_REG(name)  kvm_riscv_reg_id_u64(KVM_REG_RISCV_TIMER, \
123                  KVM_REG_RISCV_TIMER_REG(name))
124 
125 #define RISCV_FP_F_REG(idx)  kvm_riscv_reg_id_u32(KVM_REG_RISCV_FP_F, idx)
126 
127 #define RISCV_FP_D_REG(idx)  kvm_riscv_reg_id_u64(KVM_REG_RISCV_FP_D, idx)
128 
129 #define RISCV_VECTOR_CSR_REG(env, name) \
130     kvm_riscv_reg_id_ulong(env, KVM_REG_RISCV_VECTOR, \
131                            KVM_REG_RISCV_VECTOR_CSR_REG(name))
132 
133 #define KVM_RISCV_GET_CSR(cs, env, csr, reg) \
134     do { \
135         int _ret = kvm_get_one_reg(cs, RISCV_CSR_REG(env, csr), &reg); \
136         if (_ret) { \
137             return _ret; \
138         } \
139     } while (0)
140 
141 #define KVM_RISCV_SET_CSR(cs, env, csr, reg) \
142     do { \
143         int _ret = kvm_set_one_reg(cs, RISCV_CSR_REG(env, csr), &reg); \
144         if (_ret) { \
145             return _ret; \
146         } \
147     } while (0)
148 
149 #define KVM_RISCV_GET_TIMER(cs, name, reg) \
150     do { \
151         int ret = kvm_get_one_reg(cs, RISCV_TIMER_REG(name), &reg); \
152         if (ret) { \
153             abort(); \
154         } \
155     } while (0)
156 
157 #define KVM_RISCV_SET_TIMER(cs, name, reg) \
158     do { \
159         int ret = kvm_set_one_reg(cs, RISCV_TIMER_REG(name), &reg); \
160         if (ret) { \
161             abort(); \
162         } \
163     } while (0)
164 
165 typedef struct KVMCPUConfig {
166     const char *name;
167     const char *description;
168     target_ulong offset;
169     uint64_t kvm_reg_id;
170     bool user_set;
171     bool supported;
172 } KVMCPUConfig;
173 
174 #define KVM_MISA_CFG(_bit, _reg_id) \
175     {.offset = _bit, .kvm_reg_id = _reg_id}
176 
177 /* KVM ISA extensions */
178 static KVMCPUConfig kvm_misa_ext_cfgs[] = {
179     KVM_MISA_CFG(RVA, KVM_RISCV_ISA_EXT_A),
180     KVM_MISA_CFG(RVC, KVM_RISCV_ISA_EXT_C),
181     KVM_MISA_CFG(RVD, KVM_RISCV_ISA_EXT_D),
182     KVM_MISA_CFG(RVF, KVM_RISCV_ISA_EXT_F),
183     KVM_MISA_CFG(RVH, KVM_RISCV_ISA_EXT_H),
184     KVM_MISA_CFG(RVI, KVM_RISCV_ISA_EXT_I),
185     KVM_MISA_CFG(RVM, KVM_RISCV_ISA_EXT_M),
186     KVM_MISA_CFG(RVV, KVM_RISCV_ISA_EXT_V),
187 };
188 
189 static void kvm_cpu_get_misa_ext_cfg(Object *obj, Visitor *v,
190                                      const char *name,
191                                      void *opaque, Error **errp)
192 {
193     KVMCPUConfig *misa_ext_cfg = opaque;
194     target_ulong misa_bit = misa_ext_cfg->offset;
195     RISCVCPU *cpu = RISCV_CPU(obj);
196     CPURISCVState *env = &cpu->env;
197     bool value = env->misa_ext_mask & misa_bit;
198 
199     visit_type_bool(v, name, &value, errp);
200 }
201 
202 static void kvm_cpu_set_misa_ext_cfg(Object *obj, Visitor *v,
203                                      const char *name,
204                                      void *opaque, Error **errp)
205 {
206     KVMCPUConfig *misa_ext_cfg = opaque;
207     target_ulong misa_bit = misa_ext_cfg->offset;
208     RISCVCPU *cpu = RISCV_CPU(obj);
209     CPURISCVState *env = &cpu->env;
210     bool value, host_bit;
211 
212     if (!visit_type_bool(v, name, &value, errp)) {
213         return;
214     }
215 
216     host_bit = env->misa_ext_mask & misa_bit;
217 
218     if (value == host_bit) {
219         return;
220     }
221 
222     if (!value) {
223         misa_ext_cfg->user_set = true;
224         return;
225     }
226 
227     /*
228      * Forbid users to enable extensions that aren't
229      * available in the hart.
230      */
231     error_setg(errp, "Enabling MISA bit '%s' is not allowed: it's not "
232                "enabled in the host", misa_ext_cfg->name);
233 }
234 
235 static void kvm_riscv_update_cpu_misa_ext(RISCVCPU *cpu, CPUState *cs)
236 {
237     CPURISCVState *env = &cpu->env;
238     uint64_t id, reg;
239     int i, ret;
240 
241     for (i = 0; i < ARRAY_SIZE(kvm_misa_ext_cfgs); i++) {
242         KVMCPUConfig *misa_cfg = &kvm_misa_ext_cfgs[i];
243         target_ulong misa_bit = misa_cfg->offset;
244 
245         if (!misa_cfg->user_set) {
246             continue;
247         }
248 
249         /* If we're here we're going to disable the MISA bit */
250         reg = 0;
251         id = kvm_riscv_reg_id_ulong(env, KVM_REG_RISCV_ISA_EXT,
252                                     misa_cfg->kvm_reg_id);
253         ret = kvm_set_one_reg(cs, id, &reg);
254         if (ret != 0) {
255             /*
256              * We're not checking for -EINVAL because if the bit is about
257              * to be disabled, it means that it was already enabled by
258              * KVM. We determined that by fetching the 'isa' register
259              * during init() time. Any error at this point is worth
260              * aborting.
261              */
262             error_report("Unable to set KVM reg %s, error %d",
263                          misa_cfg->name, ret);
264             exit(EXIT_FAILURE);
265         }
266         env->misa_ext &= ~misa_bit;
267     }
268 }
269 
270 #define KVM_EXT_CFG(_name, _prop, _reg_id) \
271     {.name = _name, .offset = CPU_CFG_OFFSET(_prop), \
272      .kvm_reg_id = _reg_id}
273 
274 static KVMCPUConfig kvm_multi_ext_cfgs[] = {
275     KVM_EXT_CFG("zicbom", ext_zicbom, KVM_RISCV_ISA_EXT_ZICBOM),
276     KVM_EXT_CFG("zicboz", ext_zicboz, KVM_RISCV_ISA_EXT_ZICBOZ),
277     KVM_EXT_CFG("zicntr", ext_zicntr, KVM_RISCV_ISA_EXT_ZICNTR),
278     KVM_EXT_CFG("zicond", ext_zicond, KVM_RISCV_ISA_EXT_ZICOND),
279     KVM_EXT_CFG("zicsr", ext_zicsr, KVM_RISCV_ISA_EXT_ZICSR),
280     KVM_EXT_CFG("zifencei", ext_zifencei, KVM_RISCV_ISA_EXT_ZIFENCEI),
281     KVM_EXT_CFG("zihintntl", ext_zihintntl, KVM_RISCV_ISA_EXT_ZIHINTNTL),
282     KVM_EXT_CFG("zihintpause", ext_zihintpause, KVM_RISCV_ISA_EXT_ZIHINTPAUSE),
283     KVM_EXT_CFG("zihpm", ext_zihpm, KVM_RISCV_ISA_EXT_ZIHPM),
284     KVM_EXT_CFG("zfa", ext_zfa, KVM_RISCV_ISA_EXT_ZFA),
285     KVM_EXT_CFG("zfh", ext_zfh, KVM_RISCV_ISA_EXT_ZFH),
286     KVM_EXT_CFG("zfhmin", ext_zfhmin, KVM_RISCV_ISA_EXT_ZFHMIN),
287     KVM_EXT_CFG("zba", ext_zba, KVM_RISCV_ISA_EXT_ZBA),
288     KVM_EXT_CFG("zbb", ext_zbb, KVM_RISCV_ISA_EXT_ZBB),
289     KVM_EXT_CFG("zbc", ext_zbc, KVM_RISCV_ISA_EXT_ZBC),
290     KVM_EXT_CFG("zbkb", ext_zbkb, KVM_RISCV_ISA_EXT_ZBKB),
291     KVM_EXT_CFG("zbkc", ext_zbkc, KVM_RISCV_ISA_EXT_ZBKC),
292     KVM_EXT_CFG("zbkx", ext_zbkx, KVM_RISCV_ISA_EXT_ZBKX),
293     KVM_EXT_CFG("zbs", ext_zbs, KVM_RISCV_ISA_EXT_ZBS),
294     KVM_EXT_CFG("zknd", ext_zknd, KVM_RISCV_ISA_EXT_ZKND),
295     KVM_EXT_CFG("zkne", ext_zkne, KVM_RISCV_ISA_EXT_ZKNE),
296     KVM_EXT_CFG("zknh", ext_zknh, KVM_RISCV_ISA_EXT_ZKNH),
297     KVM_EXT_CFG("zkr", ext_zkr, KVM_RISCV_ISA_EXT_ZKR),
298     KVM_EXT_CFG("zksed", ext_zksed, KVM_RISCV_ISA_EXT_ZKSED),
299     KVM_EXT_CFG("zksh", ext_zksh, KVM_RISCV_ISA_EXT_ZKSH),
300     KVM_EXT_CFG("zkt", ext_zkt, KVM_RISCV_ISA_EXT_ZKT),
301     KVM_EXT_CFG("zvbb", ext_zvbb, KVM_RISCV_ISA_EXT_ZVBB),
302     KVM_EXT_CFG("zvbc", ext_zvbc, KVM_RISCV_ISA_EXT_ZVBC),
303     KVM_EXT_CFG("zvfh", ext_zvfh, KVM_RISCV_ISA_EXT_ZVFH),
304     KVM_EXT_CFG("zvfhmin", ext_zvfhmin, KVM_RISCV_ISA_EXT_ZVFHMIN),
305     KVM_EXT_CFG("zvkb", ext_zvkb, KVM_RISCV_ISA_EXT_ZVKB),
306     KVM_EXT_CFG("zvkg", ext_zvkg, KVM_RISCV_ISA_EXT_ZVKG),
307     KVM_EXT_CFG("zvkned", ext_zvkned, KVM_RISCV_ISA_EXT_ZVKNED),
308     KVM_EXT_CFG("zvknha", ext_zvknha, KVM_RISCV_ISA_EXT_ZVKNHA),
309     KVM_EXT_CFG("zvknhb", ext_zvknhb, KVM_RISCV_ISA_EXT_ZVKNHB),
310     KVM_EXT_CFG("zvksed", ext_zvksed, KVM_RISCV_ISA_EXT_ZVKSED),
311     KVM_EXT_CFG("zvksh", ext_zvksh, KVM_RISCV_ISA_EXT_ZVKSH),
312     KVM_EXT_CFG("zvkt", ext_zvkt, KVM_RISCV_ISA_EXT_ZVKT),
313     KVM_EXT_CFG("smstateen", ext_smstateen, KVM_RISCV_ISA_EXT_SMSTATEEN),
314     KVM_EXT_CFG("ssaia", ext_ssaia, KVM_RISCV_ISA_EXT_SSAIA),
315     KVM_EXT_CFG("sstc", ext_sstc, KVM_RISCV_ISA_EXT_SSTC),
316     KVM_EXT_CFG("svinval", ext_svinval, KVM_RISCV_ISA_EXT_SVINVAL),
317     KVM_EXT_CFG("svnapot", ext_svnapot, KVM_RISCV_ISA_EXT_SVNAPOT),
318     KVM_EXT_CFG("svpbmt", ext_svpbmt, KVM_RISCV_ISA_EXT_SVPBMT),
319 };
320 
321 static void *kvmconfig_get_cfg_addr(RISCVCPU *cpu, KVMCPUConfig *kvmcfg)
322 {
323     return (void *)&cpu->cfg + kvmcfg->offset;
324 }
325 
326 static void kvm_cpu_cfg_set(RISCVCPU *cpu, KVMCPUConfig *multi_ext,
327                             uint32_t val)
328 {
329     bool *ext_enabled = kvmconfig_get_cfg_addr(cpu, multi_ext);
330 
331     *ext_enabled = val;
332 }
333 
334 static uint32_t kvm_cpu_cfg_get(RISCVCPU *cpu,
335                                 KVMCPUConfig *multi_ext)
336 {
337     bool *ext_enabled = kvmconfig_get_cfg_addr(cpu, multi_ext);
338 
339     return *ext_enabled;
340 }
341 
342 static void kvm_cpu_get_multi_ext_cfg(Object *obj, Visitor *v,
343                                       const char *name,
344                                       void *opaque, Error **errp)
345 {
346     KVMCPUConfig *multi_ext_cfg = opaque;
347     RISCVCPU *cpu = RISCV_CPU(obj);
348     bool value = kvm_cpu_cfg_get(cpu, multi_ext_cfg);
349 
350     visit_type_bool(v, name, &value, errp);
351 }
352 
353 static void kvm_cpu_set_multi_ext_cfg(Object *obj, Visitor *v,
354                                       const char *name,
355                                       void *opaque, Error **errp)
356 {
357     KVMCPUConfig *multi_ext_cfg = opaque;
358     RISCVCPU *cpu = RISCV_CPU(obj);
359     bool value, host_val;
360 
361     if (!visit_type_bool(v, name, &value, errp)) {
362         return;
363     }
364 
365     host_val = kvm_cpu_cfg_get(cpu, multi_ext_cfg);
366 
367     /*
368      * Ignore if the user is setting the same value
369      * as the host.
370      */
371     if (value == host_val) {
372         return;
373     }
374 
375     if (!multi_ext_cfg->supported) {
376         /*
377          * Error out if the user is trying to enable an
378          * extension that KVM doesn't support. Ignore
379          * option otherwise.
380          */
381         if (value) {
382             error_setg(errp, "KVM does not support disabling extension %s",
383                        multi_ext_cfg->name);
384         }
385 
386         return;
387     }
388 
389     multi_ext_cfg->user_set = true;
390     kvm_cpu_cfg_set(cpu, multi_ext_cfg, value);
391 }
392 
393 static KVMCPUConfig kvm_cbom_blocksize = {
394     .name = "cbom_blocksize",
395     .offset = CPU_CFG_OFFSET(cbom_blocksize),
396     .kvm_reg_id = KVM_REG_RISCV_CONFIG_REG(zicbom_block_size)
397 };
398 
399 static KVMCPUConfig kvm_cboz_blocksize = {
400     .name = "cboz_blocksize",
401     .offset = CPU_CFG_OFFSET(cboz_blocksize),
402     .kvm_reg_id = KVM_REG_RISCV_CONFIG_REG(zicboz_block_size)
403 };
404 
405 static KVMCPUConfig kvm_v_vlenb = {
406     .name = "vlenb",
407     .offset = CPU_CFG_OFFSET(vlenb),
408     .kvm_reg_id =  KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_VECTOR |
409                    KVM_REG_RISCV_VECTOR_CSR_REG(vlenb)
410 };
411 
412 static void kvm_riscv_update_cpu_cfg_isa_ext(RISCVCPU *cpu, CPUState *cs)
413 {
414     CPURISCVState *env = &cpu->env;
415     uint64_t id, reg;
416     int i, ret;
417 
418     for (i = 0; i < ARRAY_SIZE(kvm_multi_ext_cfgs); i++) {
419         KVMCPUConfig *multi_ext_cfg = &kvm_multi_ext_cfgs[i];
420 
421         if (!multi_ext_cfg->user_set) {
422             continue;
423         }
424 
425         id = kvm_riscv_reg_id_ulong(env, KVM_REG_RISCV_ISA_EXT,
426                                     multi_ext_cfg->kvm_reg_id);
427         reg = kvm_cpu_cfg_get(cpu, multi_ext_cfg);
428         ret = kvm_set_one_reg(cs, id, &reg);
429         if (ret != 0) {
430             error_report("Unable to %s extension %s in KVM, error %d",
431                          reg ? "enable" : "disable",
432                          multi_ext_cfg->name, ret);
433             exit(EXIT_FAILURE);
434         }
435     }
436 }
437 
438 static void cpu_get_cfg_unavailable(Object *obj, Visitor *v,
439                                     const char *name,
440                                     void *opaque, Error **errp)
441 {
442     bool value = false;
443 
444     visit_type_bool(v, name, &value, errp);
445 }
446 
447 static void cpu_set_cfg_unavailable(Object *obj, Visitor *v,
448                                     const char *name,
449                                     void *opaque, Error **errp)
450 {
451     const char *propname = opaque;
452     bool value;
453 
454     if (!visit_type_bool(v, name, &value, errp)) {
455         return;
456     }
457 
458     if (value) {
459         error_setg(errp, "'%s' is not available with KVM",
460                    propname);
461     }
462 }
463 
464 static void riscv_cpu_add_kvm_unavail_prop(Object *obj, const char *prop_name)
465 {
466     /* Check if KVM created the property already */
467     if (object_property_find(obj, prop_name)) {
468         return;
469     }
470 
471     /*
472      * Set the default to disabled for every extension
473      * unknown to KVM and error out if the user attempts
474      * to enable any of them.
475      */
476     object_property_add(obj, prop_name, "bool",
477                         cpu_get_cfg_unavailable,
478                         cpu_set_cfg_unavailable,
479                         NULL, (void *)prop_name);
480 }
481 
482 static void riscv_cpu_add_kvm_unavail_prop_array(Object *obj,
483                                         const RISCVCPUMultiExtConfig *array)
484 {
485     const RISCVCPUMultiExtConfig *prop;
486 
487     g_assert(array);
488 
489     for (prop = array; prop && prop->name; prop++) {
490         riscv_cpu_add_kvm_unavail_prop(obj, prop->name);
491     }
492 }
493 
494 static void kvm_riscv_add_cpu_user_properties(Object *cpu_obj)
495 {
496     int i;
497 
498     riscv_add_satp_mode_properties(cpu_obj);
499 
500     for (i = 0; i < ARRAY_SIZE(kvm_misa_ext_cfgs); i++) {
501         KVMCPUConfig *misa_cfg = &kvm_misa_ext_cfgs[i];
502         int bit = misa_cfg->offset;
503 
504         misa_cfg->name = riscv_get_misa_ext_name(bit);
505         misa_cfg->description = riscv_get_misa_ext_description(bit);
506 
507         object_property_add(cpu_obj, misa_cfg->name, "bool",
508                             kvm_cpu_get_misa_ext_cfg,
509                             kvm_cpu_set_misa_ext_cfg,
510                             NULL, misa_cfg);
511         object_property_set_description(cpu_obj, misa_cfg->name,
512                                         misa_cfg->description);
513     }
514 
515     for (i = 0; misa_bits[i] != 0; i++) {
516         const char *ext_name = riscv_get_misa_ext_name(misa_bits[i]);
517         riscv_cpu_add_kvm_unavail_prop(cpu_obj, ext_name);
518     }
519 
520     for (i = 0; i < ARRAY_SIZE(kvm_multi_ext_cfgs); i++) {
521         KVMCPUConfig *multi_cfg = &kvm_multi_ext_cfgs[i];
522 
523         object_property_add(cpu_obj, multi_cfg->name, "bool",
524                             kvm_cpu_get_multi_ext_cfg,
525                             kvm_cpu_set_multi_ext_cfg,
526                             NULL, multi_cfg);
527     }
528 
529     riscv_cpu_add_kvm_unavail_prop_array(cpu_obj, riscv_cpu_extensions);
530     riscv_cpu_add_kvm_unavail_prop_array(cpu_obj, riscv_cpu_vendor_exts);
531     riscv_cpu_add_kvm_unavail_prop_array(cpu_obj, riscv_cpu_experimental_exts);
532 
533    /* We don't have the needed KVM support for profiles */
534     for (i = 0; riscv_profiles[i] != NULL; i++) {
535         riscv_cpu_add_kvm_unavail_prop(cpu_obj, riscv_profiles[i]->name);
536     }
537 }
538 
539 static int kvm_riscv_get_regs_core(CPUState *cs)
540 {
541     int ret = 0;
542     int i;
543     target_ulong reg;
544     CPURISCVState *env = &RISCV_CPU(cs)->env;
545 
546     ret = kvm_get_one_reg(cs, RISCV_CORE_REG(env, regs.pc), &reg);
547     if (ret) {
548         return ret;
549     }
550     env->pc = reg;
551 
552     for (i = 1; i < 32; i++) {
553         uint64_t id = kvm_riscv_reg_id_ulong(env, KVM_REG_RISCV_CORE, i);
554         ret = kvm_get_one_reg(cs, id, &reg);
555         if (ret) {
556             return ret;
557         }
558         env->gpr[i] = reg;
559     }
560 
561     return ret;
562 }
563 
564 static int kvm_riscv_put_regs_core(CPUState *cs)
565 {
566     int ret = 0;
567     int i;
568     target_ulong reg;
569     CPURISCVState *env = &RISCV_CPU(cs)->env;
570 
571     reg = env->pc;
572     ret = kvm_set_one_reg(cs, RISCV_CORE_REG(env, regs.pc), &reg);
573     if (ret) {
574         return ret;
575     }
576 
577     for (i = 1; i < 32; i++) {
578         uint64_t id = kvm_riscv_reg_id_ulong(env, KVM_REG_RISCV_CORE, i);
579         reg = env->gpr[i];
580         ret = kvm_set_one_reg(cs, id, &reg);
581         if (ret) {
582             return ret;
583         }
584     }
585 
586     return ret;
587 }
588 
589 static int kvm_riscv_get_regs_csr(CPUState *cs)
590 {
591     CPURISCVState *env = &RISCV_CPU(cs)->env;
592 
593     KVM_RISCV_GET_CSR(cs, env, sstatus, env->mstatus);
594     KVM_RISCV_GET_CSR(cs, env, sie, env->mie);
595     KVM_RISCV_GET_CSR(cs, env, stvec, env->stvec);
596     KVM_RISCV_GET_CSR(cs, env, sscratch, env->sscratch);
597     KVM_RISCV_GET_CSR(cs, env, sepc, env->sepc);
598     KVM_RISCV_GET_CSR(cs, env, scause, env->scause);
599     KVM_RISCV_GET_CSR(cs, env, stval, env->stval);
600     KVM_RISCV_GET_CSR(cs, env, sip, env->mip);
601     KVM_RISCV_GET_CSR(cs, env, satp, env->satp);
602 
603     return 0;
604 }
605 
606 static int kvm_riscv_put_regs_csr(CPUState *cs)
607 {
608     CPURISCVState *env = &RISCV_CPU(cs)->env;
609 
610     KVM_RISCV_SET_CSR(cs, env, sstatus, env->mstatus);
611     KVM_RISCV_SET_CSR(cs, env, sie, env->mie);
612     KVM_RISCV_SET_CSR(cs, env, stvec, env->stvec);
613     KVM_RISCV_SET_CSR(cs, env, sscratch, env->sscratch);
614     KVM_RISCV_SET_CSR(cs, env, sepc, env->sepc);
615     KVM_RISCV_SET_CSR(cs, env, scause, env->scause);
616     KVM_RISCV_SET_CSR(cs, env, stval, env->stval);
617     KVM_RISCV_SET_CSR(cs, env, sip, env->mip);
618     KVM_RISCV_SET_CSR(cs, env, satp, env->satp);
619 
620     return 0;
621 }
622 
623 static int kvm_riscv_get_regs_fp(CPUState *cs)
624 {
625     int ret = 0;
626     int i;
627     CPURISCVState *env = &RISCV_CPU(cs)->env;
628 
629     if (riscv_has_ext(env, RVD)) {
630         uint64_t reg;
631         for (i = 0; i < 32; i++) {
632             ret = kvm_get_one_reg(cs, RISCV_FP_D_REG(i), &reg);
633             if (ret) {
634                 return ret;
635             }
636             env->fpr[i] = reg;
637         }
638         return ret;
639     }
640 
641     if (riscv_has_ext(env, RVF)) {
642         uint32_t reg;
643         for (i = 0; i < 32; i++) {
644             ret = kvm_get_one_reg(cs, RISCV_FP_F_REG(i), &reg);
645             if (ret) {
646                 return ret;
647             }
648             env->fpr[i] = reg;
649         }
650         return ret;
651     }
652 
653     return ret;
654 }
655 
656 static int kvm_riscv_put_regs_fp(CPUState *cs)
657 {
658     int ret = 0;
659     int i;
660     CPURISCVState *env = &RISCV_CPU(cs)->env;
661 
662     if (riscv_has_ext(env, RVD)) {
663         uint64_t reg;
664         for (i = 0; i < 32; i++) {
665             reg = env->fpr[i];
666             ret = kvm_set_one_reg(cs, RISCV_FP_D_REG(i), &reg);
667             if (ret) {
668                 return ret;
669             }
670         }
671         return ret;
672     }
673 
674     if (riscv_has_ext(env, RVF)) {
675         uint32_t reg;
676         for (i = 0; i < 32; i++) {
677             reg = env->fpr[i];
678             ret = kvm_set_one_reg(cs, RISCV_FP_F_REG(i), &reg);
679             if (ret) {
680                 return ret;
681             }
682         }
683         return ret;
684     }
685 
686     return ret;
687 }
688 
689 static void kvm_riscv_get_regs_timer(CPUState *cs)
690 {
691     CPURISCVState *env = &RISCV_CPU(cs)->env;
692 
693     if (env->kvm_timer_dirty) {
694         return;
695     }
696 
697     KVM_RISCV_GET_TIMER(cs, time, env->kvm_timer_time);
698     KVM_RISCV_GET_TIMER(cs, compare, env->kvm_timer_compare);
699     KVM_RISCV_GET_TIMER(cs, state, env->kvm_timer_state);
700     KVM_RISCV_GET_TIMER(cs, frequency, env->kvm_timer_frequency);
701 
702     env->kvm_timer_dirty = true;
703 }
704 
705 static void kvm_riscv_put_regs_timer(CPUState *cs)
706 {
707     uint64_t reg;
708     CPURISCVState *env = &RISCV_CPU(cs)->env;
709 
710     if (!env->kvm_timer_dirty) {
711         return;
712     }
713 
714     KVM_RISCV_SET_TIMER(cs, time, env->kvm_timer_time);
715     KVM_RISCV_SET_TIMER(cs, compare, env->kvm_timer_compare);
716 
717     /*
718      * To set register of RISCV_TIMER_REG(state) will occur a error from KVM
719      * on env->kvm_timer_state == 0, It's better to adapt in KVM, but it
720      * doesn't matter that adaping in QEMU now.
721      * TODO If KVM changes, adapt here.
722      */
723     if (env->kvm_timer_state) {
724         KVM_RISCV_SET_TIMER(cs, state, env->kvm_timer_state);
725     }
726 
727     /*
728      * For now, migration will not work between Hosts with different timer
729      * frequency. Therefore, we should check whether they are the same here
730      * during the migration.
731      */
732     if (migration_is_running()) {
733         KVM_RISCV_GET_TIMER(cs, frequency, reg);
734         if (reg != env->kvm_timer_frequency) {
735             error_report("Dst Hosts timer frequency != Src Hosts");
736         }
737     }
738 
739     env->kvm_timer_dirty = false;
740 }
741 
742 uint64_t kvm_riscv_get_timebase_frequency(CPUState *cs)
743 {
744     uint64_t reg;
745 
746     KVM_RISCV_GET_TIMER(cs, frequency, reg);
747 
748     return reg;
749 }
750 
751 static int kvm_riscv_get_regs_vector(CPUState *cs)
752 {
753     RISCVCPU *cpu = RISCV_CPU(cs);
754     CPURISCVState *env = &cpu->env;
755     target_ulong reg;
756     uint64_t vreg_id;
757     int vreg_idx, ret = 0;
758 
759     if (!riscv_has_ext(env, RVV)) {
760         return 0;
761     }
762 
763     ret = kvm_get_one_reg(cs, RISCV_VECTOR_CSR_REG(env, vstart), &reg);
764     if (ret) {
765         return ret;
766     }
767     env->vstart = reg;
768 
769     ret = kvm_get_one_reg(cs, RISCV_VECTOR_CSR_REG(env, vl), &reg);
770     if (ret) {
771         return ret;
772     }
773     env->vl = reg;
774 
775     ret = kvm_get_one_reg(cs, RISCV_VECTOR_CSR_REG(env, vtype), &reg);
776     if (ret) {
777         return ret;
778     }
779     env->vtype = reg;
780 
781     if (kvm_v_vlenb.supported) {
782         ret = kvm_get_one_reg(cs, RISCV_VECTOR_CSR_REG(env, vlenb), &reg);
783         if (ret) {
784             return ret;
785         }
786         cpu->cfg.vlenb = reg;
787 
788         for (int i = 0; i < 32; i++) {
789             /*
790              * vreg[] is statically allocated using RV_VLEN_MAX.
791              * Use it instead of vlenb to calculate vreg_idx for
792              * simplicity.
793              */
794             vreg_idx = i * RV_VLEN_MAX / 64;
795             vreg_id = kvm_riscv_vector_reg_id(cpu, i);
796 
797             ret = kvm_get_one_reg(cs, vreg_id, &env->vreg[vreg_idx]);
798             if (ret) {
799                 return ret;
800             }
801         }
802     }
803 
804     return 0;
805 }
806 
807 static int kvm_riscv_put_regs_vector(CPUState *cs)
808 {
809     RISCVCPU *cpu = RISCV_CPU(cs);
810     CPURISCVState *env = &cpu->env;
811     target_ulong reg;
812     uint64_t vreg_id;
813     int vreg_idx, ret = 0;
814 
815     if (!riscv_has_ext(env, RVV)) {
816         return 0;
817     }
818 
819     reg = env->vstart;
820     ret = kvm_set_one_reg(cs, RISCV_VECTOR_CSR_REG(env, vstart), &reg);
821     if (ret) {
822         return ret;
823     }
824 
825     reg = env->vl;
826     ret = kvm_set_one_reg(cs, RISCV_VECTOR_CSR_REG(env, vl), &reg);
827     if (ret) {
828         return ret;
829     }
830 
831     reg = env->vtype;
832     ret = kvm_set_one_reg(cs, RISCV_VECTOR_CSR_REG(env, vtype), &reg);
833     if (ret) {
834         return ret;
835     }
836 
837     if (kvm_v_vlenb.supported) {
838         reg = cpu->cfg.vlenb;
839         ret = kvm_set_one_reg(cs, RISCV_VECTOR_CSR_REG(env, vlenb), &reg);
840 
841         for (int i = 0; i < 32; i++) {
842             /*
843              * vreg[] is statically allocated using RV_VLEN_MAX.
844              * Use it instead of vlenb to calculate vreg_idx for
845              * simplicity.
846              */
847             vreg_idx = i * RV_VLEN_MAX / 64;
848             vreg_id = kvm_riscv_vector_reg_id(cpu, i);
849 
850             ret = kvm_set_one_reg(cs, vreg_id, &env->vreg[vreg_idx]);
851             if (ret) {
852                 return ret;
853             }
854         }
855     }
856 
857     return ret;
858 }
859 
860 typedef struct KVMScratchCPU {
861     int kvmfd;
862     int vmfd;
863     int cpufd;
864 } KVMScratchCPU;
865 
866 /*
867  * Heavily inspired by kvm_arm_create_scratch_host_vcpu()
868  * from target/arm/kvm.c.
869  */
870 static bool kvm_riscv_create_scratch_vcpu(KVMScratchCPU *scratch)
871 {
872     int kvmfd = -1, vmfd = -1, cpufd = -1;
873 
874     kvmfd = qemu_open_old("/dev/kvm", O_RDWR);
875     if (kvmfd < 0) {
876         goto err;
877     }
878     do {
879         vmfd = ioctl(kvmfd, KVM_CREATE_VM, 0);
880     } while (vmfd == -1 && errno == EINTR);
881     if (vmfd < 0) {
882         goto err;
883     }
884     cpufd = ioctl(vmfd, KVM_CREATE_VCPU, 0);
885     if (cpufd < 0) {
886         goto err;
887     }
888 
889     scratch->kvmfd =  kvmfd;
890     scratch->vmfd = vmfd;
891     scratch->cpufd = cpufd;
892 
893     return true;
894 
895  err:
896     if (cpufd >= 0) {
897         close(cpufd);
898     }
899     if (vmfd >= 0) {
900         close(vmfd);
901     }
902     if (kvmfd >= 0) {
903         close(kvmfd);
904     }
905 
906     return false;
907 }
908 
909 static void kvm_riscv_destroy_scratch_vcpu(KVMScratchCPU *scratch)
910 {
911     close(scratch->cpufd);
912     close(scratch->vmfd);
913     close(scratch->kvmfd);
914 }
915 
916 static void kvm_riscv_init_machine_ids(RISCVCPU *cpu, KVMScratchCPU *kvmcpu)
917 {
918     CPURISCVState *env = &cpu->env;
919     struct kvm_one_reg reg;
920     int ret;
921 
922     reg.id = RISCV_CONFIG_REG(env, mvendorid);
923     reg.addr = (uint64_t)&cpu->cfg.mvendorid;
924     ret = ioctl(kvmcpu->cpufd, KVM_GET_ONE_REG, &reg);
925     if (ret != 0) {
926         error_report("Unable to retrieve mvendorid from host, error %d", ret);
927     }
928 
929     reg.id = RISCV_CONFIG_REG(env, marchid);
930     reg.addr = (uint64_t)&cpu->cfg.marchid;
931     ret = ioctl(kvmcpu->cpufd, KVM_GET_ONE_REG, &reg);
932     if (ret != 0) {
933         error_report("Unable to retrieve marchid from host, error %d", ret);
934     }
935 
936     reg.id = RISCV_CONFIG_REG(env, mimpid);
937     reg.addr = (uint64_t)&cpu->cfg.mimpid;
938     ret = ioctl(kvmcpu->cpufd, KVM_GET_ONE_REG, &reg);
939     if (ret != 0) {
940         error_report("Unable to retrieve mimpid from host, error %d", ret);
941     }
942 }
943 
944 static void kvm_riscv_init_misa_ext_mask(RISCVCPU *cpu,
945                                          KVMScratchCPU *kvmcpu)
946 {
947     CPURISCVState *env = &cpu->env;
948     struct kvm_one_reg reg;
949     int ret;
950 
951     reg.id = RISCV_CONFIG_REG(env, isa);
952     reg.addr = (uint64_t)&env->misa_ext_mask;
953     ret = ioctl(kvmcpu->cpufd, KVM_GET_ONE_REG, &reg);
954 
955     if (ret) {
956         error_report("Unable to fetch ISA register from KVM, "
957                      "error %d", ret);
958         kvm_riscv_destroy_scratch_vcpu(kvmcpu);
959         exit(EXIT_FAILURE);
960     }
961 
962     env->misa_ext = env->misa_ext_mask;
963 }
964 
965 static void kvm_riscv_read_cbomz_blksize(RISCVCPU *cpu, KVMScratchCPU *kvmcpu,
966                                          KVMCPUConfig *cbomz_cfg)
967 {
968     CPURISCVState *env = &cpu->env;
969     struct kvm_one_reg reg;
970     int ret;
971 
972     reg.id = kvm_riscv_reg_id_ulong(env, KVM_REG_RISCV_CONFIG,
973                                     cbomz_cfg->kvm_reg_id);
974     reg.addr = (uint64_t)kvmconfig_get_cfg_addr(cpu, cbomz_cfg);
975     ret = ioctl(kvmcpu->cpufd, KVM_GET_ONE_REG, &reg);
976     if (ret != 0) {
977         error_report("Unable to read KVM reg %s, error %d",
978                      cbomz_cfg->name, ret);
979         exit(EXIT_FAILURE);
980     }
981 }
982 
983 static void kvm_riscv_read_multiext_legacy(RISCVCPU *cpu,
984                                            KVMScratchCPU *kvmcpu)
985 {
986     CPURISCVState *env = &cpu->env;
987     uint64_t val;
988     int i, ret;
989 
990     for (i = 0; i < ARRAY_SIZE(kvm_multi_ext_cfgs); i++) {
991         KVMCPUConfig *multi_ext_cfg = &kvm_multi_ext_cfgs[i];
992         struct kvm_one_reg reg;
993 
994         reg.id = kvm_riscv_reg_id_ulong(env, KVM_REG_RISCV_ISA_EXT,
995                                         multi_ext_cfg->kvm_reg_id);
996         reg.addr = (uint64_t)&val;
997         ret = ioctl(kvmcpu->cpufd, KVM_GET_ONE_REG, &reg);
998         if (ret != 0) {
999             if (errno == EINVAL) {
1000                 /* Silently default to 'false' if KVM does not support it. */
1001                 multi_ext_cfg->supported = false;
1002                 val = false;
1003             } else {
1004                 error_report("Unable to read ISA_EXT KVM register %s: %s",
1005                              multi_ext_cfg->name, strerror(errno));
1006                 exit(EXIT_FAILURE);
1007             }
1008         } else {
1009             multi_ext_cfg->supported = true;
1010         }
1011 
1012         kvm_cpu_cfg_set(cpu, multi_ext_cfg, val);
1013     }
1014 
1015     if (cpu->cfg.ext_zicbom) {
1016         kvm_riscv_read_cbomz_blksize(cpu, kvmcpu, &kvm_cbom_blocksize);
1017     }
1018 
1019     if (cpu->cfg.ext_zicboz) {
1020         kvm_riscv_read_cbomz_blksize(cpu, kvmcpu, &kvm_cboz_blocksize);
1021     }
1022 }
1023 
1024 static int uint64_cmp(const void *a, const void *b)
1025 {
1026     uint64_t val1 = *(const uint64_t *)a;
1027     uint64_t val2 = *(const uint64_t *)b;
1028 
1029     if (val1 < val2) {
1030         return -1;
1031     }
1032 
1033     if (val1 > val2) {
1034         return 1;
1035     }
1036 
1037     return 0;
1038 }
1039 
1040 static void kvm_riscv_read_vlenb(RISCVCPU *cpu, KVMScratchCPU *kvmcpu,
1041                                  struct kvm_reg_list *reglist)
1042 {
1043     struct kvm_one_reg reg;
1044     struct kvm_reg_list *reg_search;
1045     uint64_t val;
1046     int ret;
1047 
1048     reg_search = bsearch(&kvm_v_vlenb.kvm_reg_id, reglist->reg, reglist->n,
1049                          sizeof(uint64_t), uint64_cmp);
1050 
1051     if (reg_search) {
1052         reg.id = kvm_v_vlenb.kvm_reg_id;
1053         reg.addr = (uint64_t)&val;
1054 
1055         ret = ioctl(kvmcpu->cpufd, KVM_GET_ONE_REG, &reg);
1056         if (ret != 0) {
1057             error_report("Unable to read vlenb register, error code: %s",
1058                          strerrorname_np(errno));
1059             exit(EXIT_FAILURE);
1060         }
1061 
1062         kvm_v_vlenb.supported = true;
1063         cpu->cfg.vlenb = val;
1064     }
1065 }
1066 
1067 static void kvm_riscv_init_multiext_cfg(RISCVCPU *cpu, KVMScratchCPU *kvmcpu)
1068 {
1069     KVMCPUConfig *multi_ext_cfg;
1070     struct kvm_one_reg reg;
1071     struct kvm_reg_list rl_struct;
1072     struct kvm_reg_list *reglist;
1073     uint64_t val, reg_id, *reg_search;
1074     int i, ret;
1075 
1076     rl_struct.n = 0;
1077     ret = ioctl(kvmcpu->cpufd, KVM_GET_REG_LIST, &rl_struct);
1078 
1079     /*
1080      * If KVM_GET_REG_LIST isn't supported we'll get errno 22
1081      * (EINVAL). Use read_legacy() in this case.
1082      */
1083     if (errno == EINVAL) {
1084         return kvm_riscv_read_multiext_legacy(cpu, kvmcpu);
1085     } else if (errno != E2BIG) {
1086         /*
1087          * E2BIG is an expected error message for the API since we
1088          * don't know the number of registers. The right amount will
1089          * be written in rl_struct.n.
1090          *
1091          * Error out if we get any other errno.
1092          */
1093         error_report("Error when accessing get-reg-list: %s",
1094                      strerror(errno));
1095         exit(EXIT_FAILURE);
1096     }
1097 
1098     reglist = g_malloc(sizeof(struct kvm_reg_list) +
1099                        rl_struct.n * sizeof(uint64_t));
1100     reglist->n = rl_struct.n;
1101     ret = ioctl(kvmcpu->cpufd, KVM_GET_REG_LIST, reglist);
1102     if (ret) {
1103         error_report("Error when reading KVM_GET_REG_LIST: %s",
1104                      strerror(errno));
1105         exit(EXIT_FAILURE);
1106     }
1107 
1108     /* sort reglist to use bsearch() */
1109     qsort(&reglist->reg, reglist->n, sizeof(uint64_t), uint64_cmp);
1110 
1111     for (i = 0; i < ARRAY_SIZE(kvm_multi_ext_cfgs); i++) {
1112         multi_ext_cfg = &kvm_multi_ext_cfgs[i];
1113         reg_id = kvm_riscv_reg_id_ulong(&cpu->env, KVM_REG_RISCV_ISA_EXT,
1114                                         multi_ext_cfg->kvm_reg_id);
1115         reg_search = bsearch(&reg_id, reglist->reg, reglist->n,
1116                              sizeof(uint64_t), uint64_cmp);
1117         if (!reg_search) {
1118             continue;
1119         }
1120 
1121         reg.id = reg_id;
1122         reg.addr = (uint64_t)&val;
1123         ret = ioctl(kvmcpu->cpufd, KVM_GET_ONE_REG, &reg);
1124         if (ret != 0) {
1125             error_report("Unable to read ISA_EXT KVM register %s: %s",
1126                          multi_ext_cfg->name, strerror(errno));
1127             exit(EXIT_FAILURE);
1128         }
1129 
1130         multi_ext_cfg->supported = true;
1131         kvm_cpu_cfg_set(cpu, multi_ext_cfg, val);
1132     }
1133 
1134     if (cpu->cfg.ext_zicbom) {
1135         kvm_riscv_read_cbomz_blksize(cpu, kvmcpu, &kvm_cbom_blocksize);
1136     }
1137 
1138     if (cpu->cfg.ext_zicboz) {
1139         kvm_riscv_read_cbomz_blksize(cpu, kvmcpu, &kvm_cboz_blocksize);
1140     }
1141 
1142     if (riscv_has_ext(&cpu->env, RVV)) {
1143         kvm_riscv_read_vlenb(cpu, kvmcpu, reglist);
1144     }
1145 }
1146 
1147 static void riscv_init_kvm_registers(Object *cpu_obj)
1148 {
1149     RISCVCPU *cpu = RISCV_CPU(cpu_obj);
1150     KVMScratchCPU kvmcpu;
1151 
1152     if (!kvm_riscv_create_scratch_vcpu(&kvmcpu)) {
1153         return;
1154     }
1155 
1156     kvm_riscv_init_machine_ids(cpu, &kvmcpu);
1157     kvm_riscv_init_misa_ext_mask(cpu, &kvmcpu);
1158     kvm_riscv_init_multiext_cfg(cpu, &kvmcpu);
1159 
1160     kvm_riscv_destroy_scratch_vcpu(&kvmcpu);
1161 }
1162 
1163 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
1164     KVM_CAP_LAST_INFO
1165 };
1166 
1167 int kvm_arch_get_registers(CPUState *cs)
1168 {
1169     int ret = 0;
1170 
1171     ret = kvm_riscv_get_regs_core(cs);
1172     if (ret) {
1173         return ret;
1174     }
1175 
1176     ret = kvm_riscv_get_regs_csr(cs);
1177     if (ret) {
1178         return ret;
1179     }
1180 
1181     ret = kvm_riscv_get_regs_fp(cs);
1182     if (ret) {
1183         return ret;
1184     }
1185 
1186     ret = kvm_riscv_get_regs_vector(cs);
1187     if (ret) {
1188         return ret;
1189     }
1190 
1191     return ret;
1192 }
1193 
1194 int kvm_riscv_sync_mpstate_to_kvm(RISCVCPU *cpu, int state)
1195 {
1196     if (cap_has_mp_state) {
1197         struct kvm_mp_state mp_state = {
1198             .mp_state = state
1199         };
1200 
1201         int ret = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MP_STATE, &mp_state);
1202         if (ret) {
1203             fprintf(stderr, "%s: failed to sync MP_STATE %d/%s\n",
1204                     __func__, ret, strerror(-ret));
1205             return -1;
1206         }
1207     }
1208 
1209     return 0;
1210 }
1211 
1212 int kvm_arch_put_registers(CPUState *cs, int level)
1213 {
1214     int ret = 0;
1215 
1216     ret = kvm_riscv_put_regs_core(cs);
1217     if (ret) {
1218         return ret;
1219     }
1220 
1221     ret = kvm_riscv_put_regs_csr(cs);
1222     if (ret) {
1223         return ret;
1224     }
1225 
1226     ret = kvm_riscv_put_regs_fp(cs);
1227     if (ret) {
1228         return ret;
1229     }
1230 
1231     ret = kvm_riscv_put_regs_vector(cs);
1232     if (ret) {
1233         return ret;
1234     }
1235 
1236     if (KVM_PUT_RESET_STATE == level) {
1237         RISCVCPU *cpu = RISCV_CPU(cs);
1238         if (cs->cpu_index == 0) {
1239             ret = kvm_riscv_sync_mpstate_to_kvm(cpu, KVM_MP_STATE_RUNNABLE);
1240         } else {
1241             ret = kvm_riscv_sync_mpstate_to_kvm(cpu, KVM_MP_STATE_STOPPED);
1242         }
1243         if (ret) {
1244             return ret;
1245         }
1246     }
1247 
1248     return ret;
1249 }
1250 
1251 int kvm_arch_release_virq_post(int virq)
1252 {
1253     return 0;
1254 }
1255 
1256 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
1257                              uint64_t address, uint32_t data, PCIDevice *dev)
1258 {
1259     return 0;
1260 }
1261 
1262 int kvm_arch_destroy_vcpu(CPUState *cs)
1263 {
1264     return 0;
1265 }
1266 
1267 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
1268 {
1269     return cpu->cpu_index;
1270 }
1271 
1272 static void kvm_riscv_vm_state_change(void *opaque, bool running,
1273                                       RunState state)
1274 {
1275     CPUState *cs = opaque;
1276 
1277     if (running) {
1278         kvm_riscv_put_regs_timer(cs);
1279     } else {
1280         kvm_riscv_get_regs_timer(cs);
1281     }
1282 }
1283 
1284 void kvm_arch_init_irq_routing(KVMState *s)
1285 {
1286 }
1287 
1288 static int kvm_vcpu_set_machine_ids(RISCVCPU *cpu, CPUState *cs)
1289 {
1290     CPURISCVState *env = &cpu->env;
1291     target_ulong reg;
1292     uint64_t id;
1293     int ret;
1294 
1295     id = RISCV_CONFIG_REG(env, mvendorid);
1296     /*
1297      * cfg.mvendorid is an uint32 but a target_ulong will
1298      * be written. Assign it to a target_ulong var to avoid
1299      * writing pieces of other cpu->cfg fields in the reg.
1300      */
1301     reg = cpu->cfg.mvendorid;
1302     ret = kvm_set_one_reg(cs, id, &reg);
1303     if (ret != 0) {
1304         return ret;
1305     }
1306 
1307     id = RISCV_CONFIG_REG(env, marchid);
1308     ret = kvm_set_one_reg(cs, id, &cpu->cfg.marchid);
1309     if (ret != 0) {
1310         return ret;
1311     }
1312 
1313     id = RISCV_CONFIG_REG(env, mimpid);
1314     ret = kvm_set_one_reg(cs, id, &cpu->cfg.mimpid);
1315 
1316     return ret;
1317 }
1318 
1319 int kvm_arch_init_vcpu(CPUState *cs)
1320 {
1321     int ret = 0;
1322     RISCVCPU *cpu = RISCV_CPU(cs);
1323 
1324     qemu_add_vm_change_state_handler(kvm_riscv_vm_state_change, cs);
1325 
1326     if (!object_dynamic_cast(OBJECT(cpu), TYPE_RISCV_CPU_HOST)) {
1327         ret = kvm_vcpu_set_machine_ids(cpu, cs);
1328         if (ret != 0) {
1329             return ret;
1330         }
1331     }
1332 
1333     kvm_riscv_update_cpu_misa_ext(cpu, cs);
1334     kvm_riscv_update_cpu_cfg_isa_ext(cpu, cs);
1335 
1336     return ret;
1337 }
1338 
1339 int kvm_arch_msi_data_to_gsi(uint32_t data)
1340 {
1341     abort();
1342 }
1343 
1344 int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route,
1345                                 int vector, PCIDevice *dev)
1346 {
1347     return 0;
1348 }
1349 
1350 int kvm_arch_get_default_type(MachineState *ms)
1351 {
1352     return 0;
1353 }
1354 
1355 int kvm_arch_init(MachineState *ms, KVMState *s)
1356 {
1357     cap_has_mp_state = kvm_check_extension(s, KVM_CAP_MP_STATE);
1358     return 0;
1359 }
1360 
1361 int kvm_arch_irqchip_create(KVMState *s)
1362 {
1363     if (kvm_kernel_irqchip_split()) {
1364         error_report("-machine kernel_irqchip=split is not supported on RISC-V.");
1365         exit(1);
1366     }
1367 
1368     /*
1369      * We can create the VAIA using the newer device control API.
1370      */
1371     return kvm_check_extension(s, KVM_CAP_DEVICE_CTRL);
1372 }
1373 
1374 int kvm_arch_process_async_events(CPUState *cs)
1375 {
1376     return 0;
1377 }
1378 
1379 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1380 {
1381 }
1382 
1383 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
1384 {
1385     return MEMTXATTRS_UNSPECIFIED;
1386 }
1387 
1388 bool kvm_arch_stop_on_emulation_error(CPUState *cs)
1389 {
1390     return true;
1391 }
1392 
1393 static int kvm_riscv_handle_sbi(CPUState *cs, struct kvm_run *run)
1394 {
1395     int ret = 0;
1396     unsigned char ch;
1397     switch (run->riscv_sbi.extension_id) {
1398     case SBI_EXT_0_1_CONSOLE_PUTCHAR:
1399         ch = run->riscv_sbi.args[0];
1400         qemu_chr_fe_write(serial_hd(0)->be, &ch, sizeof(ch));
1401         break;
1402     case SBI_EXT_0_1_CONSOLE_GETCHAR:
1403         ret = qemu_chr_fe_read_all(serial_hd(0)->be, &ch, sizeof(ch));
1404         if (ret == sizeof(ch)) {
1405             run->riscv_sbi.ret[0] = ch;
1406         } else {
1407             run->riscv_sbi.ret[0] = -1;
1408         }
1409         ret = 0;
1410         break;
1411     default:
1412         qemu_log_mask(LOG_UNIMP,
1413                       "%s: un-handled SBI EXIT, specific reasons is %lu\n",
1414                       __func__, run->riscv_sbi.extension_id);
1415         ret = -1;
1416         break;
1417     }
1418     return ret;
1419 }
1420 
1421 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1422 {
1423     int ret = 0;
1424     switch (run->exit_reason) {
1425     case KVM_EXIT_RISCV_SBI:
1426         ret = kvm_riscv_handle_sbi(cs, run);
1427         break;
1428     default:
1429         qemu_log_mask(LOG_UNIMP, "%s: un-handled exit reason %d\n",
1430                       __func__, run->exit_reason);
1431         ret = -1;
1432         break;
1433     }
1434     return ret;
1435 }
1436 
1437 void kvm_riscv_reset_vcpu(RISCVCPU *cpu)
1438 {
1439     CPURISCVState *env = &cpu->env;
1440     int i;
1441 
1442     if (!kvm_enabled()) {
1443         return;
1444     }
1445     for (i = 0; i < 32; i++) {
1446         env->gpr[i] = 0;
1447     }
1448     env->pc = cpu->env.kernel_addr;
1449     env->gpr[10] = kvm_arch_vcpu_id(CPU(cpu)); /* a0 */
1450     env->gpr[11] = cpu->env.fdt_addr;          /* a1 */
1451     env->satp = 0;
1452     env->mie = 0;
1453     env->stvec = 0;
1454     env->sscratch = 0;
1455     env->sepc = 0;
1456     env->scause = 0;
1457     env->stval = 0;
1458     env->mip = 0;
1459 }
1460 
1461 void kvm_riscv_set_irq(RISCVCPU *cpu, int irq, int level)
1462 {
1463     int ret;
1464     unsigned virq = level ? KVM_INTERRUPT_SET : KVM_INTERRUPT_UNSET;
1465 
1466     if (irq != IRQ_S_EXT) {
1467         perror("kvm riscv set irq != IRQ_S_EXT\n");
1468         abort();
1469     }
1470 
1471     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1472     if (ret < 0) {
1473         perror("Set irq failed");
1474         abort();
1475     }
1476 }
1477 
1478 static int aia_mode;
1479 
1480 static const char *kvm_aia_mode_str(uint64_t mode)
1481 {
1482     switch (mode) {
1483     case KVM_DEV_RISCV_AIA_MODE_EMUL:
1484         return "emul";
1485     case KVM_DEV_RISCV_AIA_MODE_HWACCEL:
1486         return "hwaccel";
1487     case KVM_DEV_RISCV_AIA_MODE_AUTO:
1488     default:
1489         return "auto";
1490     };
1491 }
1492 
1493 static char *riscv_get_kvm_aia(Object *obj, Error **errp)
1494 {
1495     return g_strdup(kvm_aia_mode_str(aia_mode));
1496 }
1497 
1498 static void riscv_set_kvm_aia(Object *obj, const char *val, Error **errp)
1499 {
1500     if (!strcmp(val, "emul")) {
1501         aia_mode = KVM_DEV_RISCV_AIA_MODE_EMUL;
1502     } else if (!strcmp(val, "hwaccel")) {
1503         aia_mode = KVM_DEV_RISCV_AIA_MODE_HWACCEL;
1504     } else if (!strcmp(val, "auto")) {
1505         aia_mode = KVM_DEV_RISCV_AIA_MODE_AUTO;
1506     } else {
1507         error_setg(errp, "Invalid KVM AIA mode");
1508         error_append_hint(errp, "Valid values are emul, hwaccel, and auto.\n");
1509     }
1510 }
1511 
1512 void kvm_arch_accel_class_init(ObjectClass *oc)
1513 {
1514     object_class_property_add_str(oc, "riscv-aia", riscv_get_kvm_aia,
1515                                   riscv_set_kvm_aia);
1516     object_class_property_set_description(oc, "riscv-aia",
1517                                           "Set KVM AIA mode. Valid values are "
1518                                           "emul, hwaccel, and auto. Default "
1519                                           "is auto.");
1520     object_property_set_default_str(object_class_property_find(oc, "riscv-aia"),
1521                                     "auto");
1522 }
1523 
1524 void kvm_riscv_aia_create(MachineState *machine, uint64_t group_shift,
1525                           uint64_t aia_irq_num, uint64_t aia_msi_num,
1526                           uint64_t aplic_base, uint64_t imsic_base,
1527                           uint64_t guest_num)
1528 {
1529     int ret, i;
1530     int aia_fd = -1;
1531     uint64_t default_aia_mode;
1532     uint64_t socket_count = riscv_socket_count(machine);
1533     uint64_t max_hart_per_socket = 0;
1534     uint64_t socket, base_hart, hart_count, socket_imsic_base, imsic_addr;
1535     uint64_t socket_bits, hart_bits, guest_bits;
1536 
1537     aia_fd = kvm_create_device(kvm_state, KVM_DEV_TYPE_RISCV_AIA, false);
1538 
1539     if (aia_fd < 0) {
1540         error_report("Unable to create in-kernel irqchip");
1541         exit(1);
1542     }
1543 
1544     ret = kvm_device_access(aia_fd, KVM_DEV_RISCV_AIA_GRP_CONFIG,
1545                             KVM_DEV_RISCV_AIA_CONFIG_MODE,
1546                             &default_aia_mode, false, NULL);
1547     if (ret < 0) {
1548         error_report("KVM AIA: failed to get current KVM AIA mode");
1549         exit(1);
1550     }
1551     qemu_log("KVM AIA: default mode is %s\n",
1552              kvm_aia_mode_str(default_aia_mode));
1553 
1554     if (default_aia_mode != aia_mode) {
1555         ret = kvm_device_access(aia_fd, KVM_DEV_RISCV_AIA_GRP_CONFIG,
1556                                 KVM_DEV_RISCV_AIA_CONFIG_MODE,
1557                                 &aia_mode, true, NULL);
1558         if (ret < 0)
1559             warn_report("KVM AIA: failed to set KVM AIA mode");
1560         else
1561             qemu_log("KVM AIA: set current mode to %s\n",
1562                      kvm_aia_mode_str(aia_mode));
1563     }
1564 
1565     ret = kvm_device_access(aia_fd, KVM_DEV_RISCV_AIA_GRP_CONFIG,
1566                             KVM_DEV_RISCV_AIA_CONFIG_SRCS,
1567                             &aia_irq_num, true, NULL);
1568     if (ret < 0) {
1569         error_report("KVM AIA: failed to set number of input irq lines");
1570         exit(1);
1571     }
1572 
1573     ret = kvm_device_access(aia_fd, KVM_DEV_RISCV_AIA_GRP_CONFIG,
1574                             KVM_DEV_RISCV_AIA_CONFIG_IDS,
1575                             &aia_msi_num, true, NULL);
1576     if (ret < 0) {
1577         error_report("KVM AIA: failed to set number of msi");
1578         exit(1);
1579     }
1580 
1581 
1582     if (socket_count > 1) {
1583         socket_bits = find_last_bit(&socket_count, BITS_PER_LONG) + 1;
1584         ret = kvm_device_access(aia_fd, KVM_DEV_RISCV_AIA_GRP_CONFIG,
1585                                 KVM_DEV_RISCV_AIA_CONFIG_GROUP_BITS,
1586                                 &socket_bits, true, NULL);
1587         if (ret < 0) {
1588             error_report("KVM AIA: failed to set group_bits");
1589             exit(1);
1590         }
1591 
1592         ret = kvm_device_access(aia_fd, KVM_DEV_RISCV_AIA_GRP_CONFIG,
1593                                 KVM_DEV_RISCV_AIA_CONFIG_GROUP_SHIFT,
1594                                 &group_shift, true, NULL);
1595         if (ret < 0) {
1596             error_report("KVM AIA: failed to set group_shift");
1597             exit(1);
1598         }
1599     }
1600 
1601     guest_bits = guest_num == 0 ? 0 :
1602                  find_last_bit(&guest_num, BITS_PER_LONG) + 1;
1603     ret = kvm_device_access(aia_fd, KVM_DEV_RISCV_AIA_GRP_CONFIG,
1604                             KVM_DEV_RISCV_AIA_CONFIG_GUEST_BITS,
1605                             &guest_bits, true, NULL);
1606     if (ret < 0) {
1607         error_report("KVM AIA: failed to set guest_bits");
1608         exit(1);
1609     }
1610 
1611     ret = kvm_device_access(aia_fd, KVM_DEV_RISCV_AIA_GRP_ADDR,
1612                             KVM_DEV_RISCV_AIA_ADDR_APLIC,
1613                             &aplic_base, true, NULL);
1614     if (ret < 0) {
1615         error_report("KVM AIA: failed to set the base address of APLIC");
1616         exit(1);
1617     }
1618 
1619     for (socket = 0; socket < socket_count; socket++) {
1620         socket_imsic_base = imsic_base + socket * (1U << group_shift);
1621         hart_count = riscv_socket_hart_count(machine, socket);
1622         base_hart = riscv_socket_first_hartid(machine, socket);
1623 
1624         if (max_hart_per_socket < hart_count) {
1625             max_hart_per_socket = hart_count;
1626         }
1627 
1628         for (i = 0; i < hart_count; i++) {
1629             imsic_addr = socket_imsic_base + i * IMSIC_HART_SIZE(guest_bits);
1630             ret = kvm_device_access(aia_fd, KVM_DEV_RISCV_AIA_GRP_ADDR,
1631                                     KVM_DEV_RISCV_AIA_ADDR_IMSIC(i + base_hart),
1632                                     &imsic_addr, true, NULL);
1633             if (ret < 0) {
1634                 error_report("KVM AIA: failed to set the IMSIC address for hart %d", i);
1635                 exit(1);
1636             }
1637         }
1638     }
1639 
1640     hart_bits = find_last_bit(&max_hart_per_socket, BITS_PER_LONG) + 1;
1641     ret = kvm_device_access(aia_fd, KVM_DEV_RISCV_AIA_GRP_CONFIG,
1642                             KVM_DEV_RISCV_AIA_CONFIG_HART_BITS,
1643                             &hart_bits, true, NULL);
1644     if (ret < 0) {
1645         error_report("KVM AIA: failed to set hart_bits");
1646         exit(1);
1647     }
1648 
1649     if (kvm_has_gsi_routing()) {
1650         for (uint64_t idx = 0; idx < aia_irq_num + 1; ++idx) {
1651             /* KVM AIA only has one APLIC instance */
1652             kvm_irqchip_add_irq_route(kvm_state, idx, 0, idx);
1653         }
1654         kvm_gsi_routing_allowed = true;
1655         kvm_irqchip_commit_routes(kvm_state);
1656     }
1657 
1658     ret = kvm_device_access(aia_fd, KVM_DEV_RISCV_AIA_GRP_CTRL,
1659                             KVM_DEV_RISCV_AIA_CTRL_INIT,
1660                             NULL, true, NULL);
1661     if (ret < 0) {
1662         error_report("KVM AIA: initialized fail");
1663         exit(1);
1664     }
1665 
1666     kvm_msi_via_irqfd_allowed = true;
1667 }
1668 
1669 static void kvm_cpu_instance_init(CPUState *cs)
1670 {
1671     Object *obj = OBJECT(RISCV_CPU(cs));
1672 
1673     riscv_init_kvm_registers(obj);
1674 
1675     kvm_riscv_add_cpu_user_properties(obj);
1676 }
1677 
1678 /*
1679  * We'll get here via the following path:
1680  *
1681  * riscv_cpu_realize()
1682  *   -> cpu_exec_realizefn()
1683  *      -> kvm_cpu_realize() (via accel_cpu_common_realize())
1684  */
1685 static bool kvm_cpu_realize(CPUState *cs, Error **errp)
1686 {
1687     RISCVCPU *cpu = RISCV_CPU(cs);
1688     int ret;
1689 
1690     if (riscv_has_ext(&cpu->env, RVV)) {
1691         ret = prctl(PR_RISCV_V_SET_CONTROL, PR_RISCV_V_VSTATE_CTRL_ON);
1692         if (ret) {
1693             error_setg(errp, "Error in prctl PR_RISCV_V_SET_CONTROL, code: %s",
1694                        strerrorname_np(errno));
1695             return false;
1696         }
1697     }
1698 
1699    return true;
1700 }
1701 
1702 void riscv_kvm_cpu_finalize_features(RISCVCPU *cpu, Error **errp)
1703 {
1704     CPURISCVState *env = &cpu->env;
1705     KVMScratchCPU kvmcpu;
1706     struct kvm_one_reg reg;
1707     uint64_t val;
1708     int ret;
1709 
1710     /* short-circuit without spinning the scratch CPU */
1711     if (!cpu->cfg.ext_zicbom && !cpu->cfg.ext_zicboz &&
1712         !riscv_has_ext(env, RVV)) {
1713         return;
1714     }
1715 
1716     if (!kvm_riscv_create_scratch_vcpu(&kvmcpu)) {
1717         error_setg(errp, "Unable to create scratch KVM cpu");
1718         return;
1719     }
1720 
1721     if (cpu->cfg.ext_zicbom &&
1722         riscv_cpu_option_set(kvm_cbom_blocksize.name)) {
1723 
1724         reg.id = kvm_riscv_reg_id_ulong(env, KVM_REG_RISCV_CONFIG,
1725                                         kvm_cbom_blocksize.kvm_reg_id);
1726         reg.addr = (uint64_t)&val;
1727         ret = ioctl(kvmcpu.cpufd, KVM_GET_ONE_REG, &reg);
1728         if (ret != 0) {
1729             error_setg(errp, "Unable to read cbom_blocksize, error %d", errno);
1730             return;
1731         }
1732 
1733         if (cpu->cfg.cbom_blocksize != val) {
1734             error_setg(errp, "Unable to set cbom_blocksize to a different "
1735                        "value than the host (%lu)", val);
1736             return;
1737         }
1738     }
1739 
1740     if (cpu->cfg.ext_zicboz &&
1741         riscv_cpu_option_set(kvm_cboz_blocksize.name)) {
1742 
1743         reg.id = kvm_riscv_reg_id_ulong(env, KVM_REG_RISCV_CONFIG,
1744                                         kvm_cboz_blocksize.kvm_reg_id);
1745         reg.addr = (uint64_t)&val;
1746         ret = ioctl(kvmcpu.cpufd, KVM_GET_ONE_REG, &reg);
1747         if (ret != 0) {
1748             error_setg(errp, "Unable to read cboz_blocksize, error %d", errno);
1749             return;
1750         }
1751 
1752         if (cpu->cfg.cboz_blocksize != val) {
1753             error_setg(errp, "Unable to set cboz_blocksize to a different "
1754                        "value than the host (%lu)", val);
1755             return;
1756         }
1757     }
1758 
1759     /* Users are setting vlen, not vlenb */
1760     if (riscv_has_ext(env, RVV) && riscv_cpu_option_set("vlen")) {
1761         if (!kvm_v_vlenb.supported) {
1762             error_setg(errp, "Unable to set 'vlenb': register not supported");
1763             return;
1764         }
1765 
1766         reg.id = kvm_v_vlenb.kvm_reg_id;
1767         reg.addr = (uint64_t)&val;
1768         ret = ioctl(kvmcpu.cpufd, KVM_GET_ONE_REG, &reg);
1769         if (ret != 0) {
1770             error_setg(errp, "Unable to read vlenb register, error %d", errno);
1771             return;
1772         }
1773 
1774         if (cpu->cfg.vlenb != val) {
1775             error_setg(errp, "Unable to set 'vlen' to a different "
1776                        "value than the host (%lu)", val * 8);
1777             return;
1778         }
1779     }
1780 
1781     kvm_riscv_destroy_scratch_vcpu(&kvmcpu);
1782 }
1783 
1784 static void kvm_cpu_accel_class_init(ObjectClass *oc, void *data)
1785 {
1786     AccelCPUClass *acc = ACCEL_CPU_CLASS(oc);
1787 
1788     acc->cpu_instance_init = kvm_cpu_instance_init;
1789     acc->cpu_target_realize = kvm_cpu_realize;
1790 }
1791 
1792 static const TypeInfo kvm_cpu_accel_type_info = {
1793     .name = ACCEL_CPU_NAME("kvm"),
1794 
1795     .parent = TYPE_ACCEL_CPU,
1796     .class_init = kvm_cpu_accel_class_init,
1797     .abstract = true,
1798 };
1799 static void kvm_cpu_accel_register_types(void)
1800 {
1801     type_register_static(&kvm_cpu_accel_type_info);
1802 }
1803 type_init(kvm_cpu_accel_register_types);
1804 
1805 static void riscv_host_cpu_class_init(ObjectClass *c, void *data)
1806 {
1807     RISCVCPUClass *mcc = RISCV_CPU_CLASS(c);
1808 
1809 #if defined(TARGET_RISCV32)
1810     mcc->misa_mxl_max = MXL_RV32;
1811 #elif defined(TARGET_RISCV64)
1812     mcc->misa_mxl_max = MXL_RV64;
1813 #endif
1814 }
1815 
1816 static const TypeInfo riscv_kvm_cpu_type_infos[] = {
1817     {
1818         .name = TYPE_RISCV_CPU_HOST,
1819         .parent = TYPE_RISCV_CPU,
1820         .class_init = riscv_host_cpu_class_init,
1821     }
1822 };
1823 
1824 DEFINE_TYPES(riscv_kvm_cpu_type_infos)
1825