xref: /openbmc/qemu/scripts/arm_processor_error.py (revision 92a0dcbd751d771512b9dedd97e00553181b7699)
1#!/usr/bin/env python3
2#
3# pylint: disable=C0301,C0114,R0903,R0912,R0913,R0914,R0915,W0511
4# SPDX-License-Identifier: GPL-2.0-or-later
5#
6# Copyright (C) 2024-2025 Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
7
8# TODO: current implementation has dummy defaults.
9#
10# For a better implementation, a QMP addition/call is needed to
11# retrieve some data for ARM Processor Error injection:
12#
13#   - ARM registers: power_state, mpidr.
14
15"""
16Generate an ARM processor error CPER, compatible with
17UEFI 2.9A Errata.
18
19Injecting such errors can be done using:
20
21    $ ./scripts/ghes_inject.py arm
22    Error injected.
23
24Produces a simple CPER register, as detected on a Linux guest:
25
26[Hardware Error]: Hardware error from APEI Generic Hardware Error Source: 1
27[Hardware Error]: event severity: recoverable
28[Hardware Error]:  Error 0, type: recoverable
29[Hardware Error]:   section_type: ARM processor error
30[Hardware Error]:   MIDR: 0x0000000000000000
31[Hardware Error]:   running state: 0x0
32[Hardware Error]:   Power State Coordination Interface state: 0
33[Hardware Error]:   Error info structure 0:
34[Hardware Error]:   num errors: 2
35[Hardware Error]:    error_type: 0x02: cache error
36[Hardware Error]:    error_info: 0x000000000091000f
37[Hardware Error]:     transaction type: Data Access
38[Hardware Error]:     cache error, operation type: Data write
39[Hardware Error]:     cache level: 2
40[Hardware Error]:     processor context not corrupted
41[Firmware Warn]: GHES: Unhandled processor error type 0x02: cache error
42
43The ARM Processor Error message can be customized via command line
44parameters. For instance:
45
46    $ ./scripts/ghes_inject.py arm --mpidr 0x444 --running --affinity 1 \
47        --error-info 12345678 --vendor 0x13,123,4,5,1 --ctx-array 0,1,2,3,4,5 \
48        -t cache tlb bus micro-arch tlb,micro-arch
49    Error injected.
50
51Injects this error, as detected on a Linux guest:
52
53[Hardware Error]: Hardware error from APEI Generic Hardware Error Source: 1
54[Hardware Error]: event severity: recoverable
55[Hardware Error]:  Error 0, type: recoverable
56[Hardware Error]:   section_type: ARM processor error
57[Hardware Error]:   MIDR: 0x0000000000000000
58[Hardware Error]:   Multiprocessor Affinity Register (MPIDR): 0x0000000000000000
59[Hardware Error]:   error affinity level: 0
60[Hardware Error]:   running state: 0x1
61[Hardware Error]:   Power State Coordination Interface state: 0
62[Hardware Error]:   Error info structure 0:
63[Hardware Error]:   num errors: 2
64[Hardware Error]:    error_type: 0x02: cache error
65[Hardware Error]:    error_info: 0x0000000000bc614e
66[Hardware Error]:     cache level: 2
67[Hardware Error]:     processor context not corrupted
68[Hardware Error]:   Error info structure 1:
69[Hardware Error]:   num errors: 2
70[Hardware Error]:    error_type: 0x04: TLB error
71[Hardware Error]:    error_info: 0x000000000054007f
72[Hardware Error]:     transaction type: Instruction
73[Hardware Error]:     TLB error, operation type: Instruction fetch
74[Hardware Error]:     TLB level: 1
75[Hardware Error]:     processor context not corrupted
76[Hardware Error]:     the error has not been corrected
77[Hardware Error]:     PC is imprecise
78[Hardware Error]:   Error info structure 2:
79[Hardware Error]:   num errors: 2
80[Hardware Error]:    error_type: 0x08: bus error
81[Hardware Error]:    error_info: 0x00000080d6460fff
82[Hardware Error]:     transaction type: Generic
83[Hardware Error]:     bus error, operation type: Generic read (type of instruction or data request cannot be determined)
84[Hardware Error]:     affinity level at which the bus error occurred: 1
85[Hardware Error]:     processor context corrupted
86[Hardware Error]:     the error has been corrected
87[Hardware Error]:     PC is imprecise
88[Hardware Error]:     Program execution can be restarted reliably at the PC associated with the error.
89[Hardware Error]:     participation type: Local processor observed
90[Hardware Error]:     request timed out
91[Hardware Error]:     address space: External Memory Access
92[Hardware Error]:     memory access attributes:0x20
93[Hardware Error]:     access mode: secure
94[Hardware Error]:   Error info structure 3:
95[Hardware Error]:   num errors: 2
96[Hardware Error]:    error_type: 0x10: micro-architectural error
97[Hardware Error]:    error_info: 0x0000000078da03ff
98[Hardware Error]:   Error info structure 4:
99[Hardware Error]:   num errors: 2
100[Hardware Error]:    error_type: 0x14: TLB error|micro-architectural error
101[Hardware Error]:   Context info structure 0:
102[Hardware Error]:    register context type: AArch64 EL1 context registers
103[Hardware Error]:    00000000: 00000000 00000000
104[Hardware Error]:   Vendor specific error info has 5 bytes:
105[Hardware Error]:    00000000: 13 7b 04 05 01                                   .{...
106[Firmware Warn]: GHES: Unhandled processor error type 0x02: cache error
107[Firmware Warn]: GHES: Unhandled processor error type 0x04: TLB error
108[Firmware Warn]: GHES: Unhandled processor error type 0x08: bus error
109[Firmware Warn]: GHES: Unhandled processor error type 0x10: micro-architectural error
110[Firmware Warn]: GHES: Unhandled processor error type 0x14: TLB error|micro-architectural error
111"""
112
113import argparse
114import re
115
116from qmp_helper import qmp, util, cper_guid
117
118
119class ArmProcessorEinj:
120    """
121    Implements ARM Processor Error injection via GHES
122    """
123
124    DESC = """
125    Generates an ARM processor error CPER, compatible with
126    UEFI 2.9A Errata.
127    """
128
129    ACPI_GHES_ARM_CPER_LENGTH = 40
130    ACPI_GHES_ARM_CPER_PEI_LENGTH = 32
131
132    # Context types
133    CONTEXT_AARCH32_EL1 = 1
134    CONTEXT_AARCH64_EL1 = 5
135    CONTEXT_MISC_REG = 8
136
137    def __init__(self, subparsers):
138        """Initialize the error injection class and add subparser"""
139
140        # Valid choice values
141        self.arm_valid_bits = {
142            "mpidr":    util.bit(0),
143            "affinity": util.bit(1),
144            "running":  util.bit(2),
145            "vendor":   util.bit(3),
146        }
147
148        self.pei_flags = {
149            "first":        util.bit(0),
150            "last":         util.bit(1),
151            "propagated":   util.bit(2),
152            "overflow":     util.bit(3),
153        }
154
155        self.pei_error_types = {
156            "cache":        util.bit(1),
157            "tlb":          util.bit(2),
158            "bus":          util.bit(3),
159            "micro-arch":   util.bit(4),
160        }
161
162        self.pei_valid_bits = {
163            "multiple-error":   util.bit(0),
164            "flags":            util.bit(1),
165            "error-info":       util.bit(2),
166            "virt-addr":        util.bit(3),
167            "phy-addr":         util.bit(4),
168        }
169
170        self.data = bytearray()
171
172        parser = subparsers.add_parser("arm", description=self.DESC)
173
174        arm_valid_bits = ",".join(self.arm_valid_bits.keys())
175        flags = ",".join(self.pei_flags.keys())
176        error_types = ",".join(self.pei_error_types.keys())
177        pei_valid_bits = ",".join(self.pei_valid_bits.keys())
178
179        # UEFI N.16 ARM Validation bits
180        g_arm = parser.add_argument_group("ARM processor")
181        g_arm.add_argument("--arm", "--arm-valid",
182                           help=f"ARM valid bits: {arm_valid_bits}")
183        g_arm.add_argument("-a", "--affinity",  "--level", "--affinity-level",
184                           type=lambda x: int(x, 0),
185                           help="Affinity level (when multiple levels apply)")
186        g_arm.add_argument("-l", "--mpidr", type=lambda x: int(x, 0),
187                           help="Multiprocessor Affinity Register")
188        g_arm.add_argument("-i", "--midr", type=lambda x: int(x, 0),
189                           help="Main ID Register")
190        g_arm.add_argument("-r", "--running",
191                           action=argparse.BooleanOptionalAction,
192                           default=None,
193                           help="Indicates if the processor is running or not")
194        g_arm.add_argument("--psci", "--psci-state",
195                           type=lambda x: int(x, 0),
196                           help="Power State Coordination Interface - PSCI state")
197
198        # TODO: Add vendor-specific support
199
200        # UEFI N.17 bitmaps (type and flags)
201        g_pei = parser.add_argument_group("ARM Processor Error Info (PEI)")
202        g_pei.add_argument("-t", "--type", nargs="+",
203                        help=f"one or more error types: {error_types}")
204        g_pei.add_argument("-f", "--flags", nargs="*",
205                        help=f"zero or more error flags: {flags}")
206        g_pei.add_argument("-V", "--pei-valid", "--error-valid", nargs="*",
207                        help=f"zero or more PEI valid bits: {pei_valid_bits}")
208
209        # UEFI N.17 Integer values
210        g_pei.add_argument("-m", "--multiple-error", nargs="+",
211                        help="Number of errors: 0: Single error, 1: Multiple errors, 2-65535: Error count if known")
212        g_pei.add_argument("-e", "--error-info", nargs="+",
213                        help="Error information (UEFI 2.10 tables N.18 to N.20)")
214        g_pei.add_argument("-p", "--physical-address",  nargs="+",
215                        help="Physical address")
216        g_pei.add_argument("-v", "--virtual-address",  nargs="+",
217                        help="Virtual address")
218
219        # UEFI N.21 Context
220        g_ctx = parser.add_argument_group("Processor Context")
221        g_ctx.add_argument("--ctx-type", "--context-type", nargs="*",
222                        help="Type of the context (0=ARM32 GPR, 5=ARM64 EL1, other values supported)")
223        g_ctx.add_argument("--ctx-size", "--context-size", nargs="*",
224                        help="Minimal size of the context")
225        g_ctx.add_argument("--ctx-array", "--context-array", nargs="*",
226                        help="Comma-separated arrays for each context")
227
228        # Vendor-specific data
229        g_vendor = parser.add_argument_group("Vendor-specific data")
230        g_vendor.add_argument("--vendor", "--vendor-specific", nargs="+",
231                        help="Vendor-specific byte arrays of data")
232
233        # Add arguments for Generic Error Data
234        qmp.argparse(parser)
235
236        parser.set_defaults(func=self.send_cper)
237
238    def send_cper(self, args):
239        """Parse subcommand arguments and send a CPER via QMP"""
240
241        qmp_cmd = qmp(args.host, args.port, args.debug)
242
243        # Handle Generic Error Data arguments if any
244        qmp_cmd.set_args(args)
245
246        is_cpu_type = re.compile(r"^([\w+]+\-)?arm\-cpu$")
247        cpus = qmp_cmd.search_qom("/machine/unattached/device",
248                                  "type", is_cpu_type)
249
250        cper = {}
251        pei = {}
252        ctx = {}
253        vendor = {}
254
255        arg = vars(args)
256
257        # Handle global parameters
258        if args.arm:
259            arm_valid_init = False
260            cper["valid"] = util.get_choice(name="valid",
261                                       value=args.arm,
262                                       choices=self.arm_valid_bits,
263                                       suffixes=["-error", "-err"])
264        else:
265            cper["valid"] = 0
266            arm_valid_init = True
267
268        if "running" in arg:
269            if args.running:
270                cper["running-state"] = util.bit(0)
271            else:
272                cper["running-state"] = 0
273        else:
274            cper["running-state"] = 0
275
276        if arm_valid_init:
277            if args.affinity:
278                cper["valid"] |= self.arm_valid_bits["affinity"]
279
280            if args.mpidr:
281                cper["valid"] |= self.arm_valid_bits["mpidr"]
282
283            if "running-state" in cper:
284                cper["valid"] |= self.arm_valid_bits["running"]
285
286            if args.psci:
287                cper["valid"] |= self.arm_valid_bits["running"]
288
289        # Handle PEI
290        if not args.type:
291            args.type = ["cache-error"]
292
293        util.get_mult_choices(
294            pei,
295            name="valid",
296            values=args.pei_valid,
297            choices=self.pei_valid_bits,
298            suffixes=["-valid", "--addr"],
299        )
300        util.get_mult_choices(
301            pei,
302            name="type",
303            values=args.type,
304            choices=self.pei_error_types,
305            suffixes=["-error", "-err"],
306        )
307        util.get_mult_choices(
308            pei,
309            name="flags",
310            values=args.flags,
311            choices=self.pei_flags,
312            suffixes=["-error", "-cap"],
313        )
314        util.get_mult_int(pei, "error-info", args.error_info)
315        util.get_mult_int(pei, "multiple-error", args.multiple_error)
316        util.get_mult_int(pei, "phy-addr", args.physical_address)
317        util.get_mult_int(pei, "virt-addr", args.virtual_address)
318
319        # Handle context
320        util.get_mult_int(ctx, "type", args.ctx_type, allow_zero=True)
321        util.get_mult_int(ctx, "minimal-size", args.ctx_size, allow_zero=True)
322        util.get_mult_array(ctx, "register", args.ctx_array, allow_zero=True)
323
324        util.get_mult_array(vendor, "bytes", args.vendor, max_val=255)
325
326        # Store PEI
327        pei_data = bytearray()
328        default_flags  = self.pei_flags["first"]
329        default_flags |= self.pei_flags["last"]
330
331        error_info_num = 0
332
333        for i, p in pei.items():        # pylint: disable=W0612
334            error_info_num += 1
335
336            # UEFI 2.10 doesn't define how to encode error information
337            # when multiple types are raised. So, provide a default only
338            # if a single type is there
339            if "error-info" not in p:
340                if p["type"] == util.bit(1):
341                    p["error-info"] = 0x0091000F
342                if p["type"] == util.bit(2):
343                    p["error-info"] = 0x0054007F
344                if p["type"] == util.bit(3):
345                    p["error-info"] = 0x80D6460FFF
346                if p["type"] == util.bit(4):
347                    p["error-info"] = 0x78DA03FF
348
349            if "valid" not in p:
350                p["valid"] = 0
351                if "multiple-error" in p:
352                    p["valid"] |= self.pei_valid_bits["multiple-error"]
353
354                if "flags" in p:
355                    p["valid"] |= self.pei_valid_bits["flags"]
356
357                if "error-info" in p:
358                    p["valid"] |= self.pei_valid_bits["error-info"]
359
360                if "phy-addr" in p:
361                    p["valid"] |= self.pei_valid_bits["phy-addr"]
362
363                if "virt-addr" in p:
364                    p["valid"] |= self.pei_valid_bits["virt-addr"]
365
366            # Version
367            util.data_add(pei_data, 0, 1)
368
369            util.data_add(pei_data,
370                         self.ACPI_GHES_ARM_CPER_PEI_LENGTH, 1)
371
372            util.data_add(pei_data, p["valid"], 2)
373            util.data_add(pei_data, p["type"], 1)
374            util.data_add(pei_data, p.get("multiple-error", 1), 2)
375            util.data_add(pei_data, p.get("flags", default_flags), 1)
376            util.data_add(pei_data, p.get("error-info", 0), 8)
377            util.data_add(pei_data, p.get("virt-addr", 0xDEADBEEF), 8)
378            util.data_add(pei_data, p.get("phy-addr", 0xABBA0BAD), 8)
379
380        # Store Context
381        ctx_data = bytearray()
382        context_info_num = 0
383
384        if ctx:
385            ret = qmp_cmd.send_cmd("query-target", may_open=True)
386
387            default_ctx = self.CONTEXT_MISC_REG
388
389            if "arch" in ret:
390                if ret["arch"] == "aarch64":
391                    default_ctx = self.CONTEXT_AARCH64_EL1
392                elif ret["arch"] == "arm":
393                    default_ctx = self.CONTEXT_AARCH32_EL1
394
395            for k in sorted(ctx.keys()):
396                context_info_num += 1
397
398                if "type" not in ctx[k]:
399                    ctx[k]["type"] = default_ctx
400
401                if "register" not in ctx[k]:
402                    ctx[k]["register"] = []
403
404                reg_size = len(ctx[k]["register"])
405                size = 0
406
407                if "minimal-size" in ctx:
408                    size = ctx[k]["minimal-size"]
409
410                size = max(size, reg_size)
411
412                size = (size + 1) % 0xFFFE
413
414                # Version
415                util.data_add(ctx_data, 0, 2)
416
417                util.data_add(ctx_data, ctx[k]["type"], 2)
418
419                util.data_add(ctx_data, 8 * size, 4)
420
421                for r in ctx[k]["register"]:
422                    util.data_add(ctx_data, r, 8)
423
424                for i in range(reg_size, size):   # pylint: disable=W0612
425                    util.data_add(ctx_data, 0, 8)
426
427        # Vendor-specific bytes are not grouped
428        vendor_data = bytearray()
429        if vendor:
430            for k in sorted(vendor.keys()):
431                for b in vendor[k]["bytes"]:
432                    util.data_add(vendor_data, b, 1)
433
434        # Encode ARM Processor Error
435        data = bytearray()
436
437        util.data_add(data, cper["valid"], 4)
438
439        util.data_add(data, error_info_num, 2)
440        util.data_add(data, context_info_num, 2)
441
442        # Calculate the length of the CPER data
443        cper_length = self.ACPI_GHES_ARM_CPER_LENGTH
444        cper_length += len(pei_data)
445        cper_length += len(vendor_data)
446        cper_length += len(ctx_data)
447        util.data_add(data, cper_length, 4)
448
449        util.data_add(data, arg.get("affinity-level", 0), 1)
450
451        # Reserved
452        util.data_add(data, 0, 3)
453
454        if "midr-el1" not in arg:
455            if cpus:
456                cmd_arg = {
457                    'path': cpus[0],
458                    'property': "midr"
459                }
460                ret = qmp_cmd.send_cmd("qom-get", cmd_arg, may_open=True)
461                if isinstance(ret, int):
462                    arg["midr-el1"] = ret
463
464        util.data_add(data, arg.get("mpidr-el1", 0), 8)
465        util.data_add(data, arg.get("midr-el1", 0), 8)
466        util.data_add(data, cper["running-state"], 4)
467        util.data_add(data, arg.get("psci-state", 0), 4)
468
469        # Add PEI
470        data.extend(pei_data)
471        data.extend(ctx_data)
472        data.extend(vendor_data)
473
474        self.data = data
475
476        qmp_cmd.send_cper(cper_guid.CPER_PROC_ARM, self.data)
477