1#!/usr/bin/env python3 2# 3# pylint: disable=C0301,C0114,R0903,R0912,R0913,R0914,R0915,W0511 4# SPDX-License-Identifier: GPL-2.0-or-later 5# 6# Copyright (C) 2024-2025 Mauro Carvalho Chehab <mchehab+huawei@kernel.org> 7 8# TODO: current implementation has dummy defaults. 9# 10# For a better implementation, a QMP addition/call is needed to 11# retrieve some data for ARM Processor Error injection: 12# 13# - ARM registers: power_state, mpidr. 14 15""" 16Generate an ARM processor error CPER, compatible with 17UEFI 2.9A Errata. 18 19Injecting such errors can be done using: 20 21 $ ./scripts/ghes_inject.py arm 22 Error injected. 23 24Produces a simple CPER register, as detected on a Linux guest: 25 26[Hardware Error]: Hardware error from APEI Generic Hardware Error Source: 1 27[Hardware Error]: event severity: recoverable 28[Hardware Error]: Error 0, type: recoverable 29[Hardware Error]: section_type: ARM processor error 30[Hardware Error]: MIDR: 0x0000000000000000 31[Hardware Error]: running state: 0x0 32[Hardware Error]: Power State Coordination Interface state: 0 33[Hardware Error]: Error info structure 0: 34[Hardware Error]: num errors: 2 35[Hardware Error]: error_type: 0x02: cache error 36[Hardware Error]: error_info: 0x000000000091000f 37[Hardware Error]: transaction type: Data Access 38[Hardware Error]: cache error, operation type: Data write 39[Hardware Error]: cache level: 2 40[Hardware Error]: processor context not corrupted 41[Firmware Warn]: GHES: Unhandled processor error type 0x02: cache error 42 43The ARM Processor Error message can be customized via command line 44parameters. For instance: 45 46 $ ./scripts/ghes_inject.py arm --mpidr 0x444 --running --affinity 1 \ 47 --error-info 12345678 --vendor 0x13,123,4,5,1 --ctx-array 0,1,2,3,4,5 \ 48 -t cache tlb bus micro-arch tlb,micro-arch 49 Error injected. 50 51Injects this error, as detected on a Linux guest: 52 53[Hardware Error]: Hardware error from APEI Generic Hardware Error Source: 1 54[Hardware Error]: event severity: recoverable 55[Hardware Error]: Error 0, type: recoverable 56[Hardware Error]: section_type: ARM processor error 57[Hardware Error]: MIDR: 0x0000000000000000 58[Hardware Error]: Multiprocessor Affinity Register (MPIDR): 0x0000000000000000 59[Hardware Error]: error affinity level: 0 60[Hardware Error]: running state: 0x1 61[Hardware Error]: Power State Coordination Interface state: 0 62[Hardware Error]: Error info structure 0: 63[Hardware Error]: num errors: 2 64[Hardware Error]: error_type: 0x02: cache error 65[Hardware Error]: error_info: 0x0000000000bc614e 66[Hardware Error]: cache level: 2 67[Hardware Error]: processor context not corrupted 68[Hardware Error]: Error info structure 1: 69[Hardware Error]: num errors: 2 70[Hardware Error]: error_type: 0x04: TLB error 71[Hardware Error]: error_info: 0x000000000054007f 72[Hardware Error]: transaction type: Instruction 73[Hardware Error]: TLB error, operation type: Instruction fetch 74[Hardware Error]: TLB level: 1 75[Hardware Error]: processor context not corrupted 76[Hardware Error]: the error has not been corrected 77[Hardware Error]: PC is imprecise 78[Hardware Error]: Error info structure 2: 79[Hardware Error]: num errors: 2 80[Hardware Error]: error_type: 0x08: bus error 81[Hardware Error]: error_info: 0x00000080d6460fff 82[Hardware Error]: transaction type: Generic 83[Hardware Error]: bus error, operation type: Generic read (type of instruction or data request cannot be determined) 84[Hardware Error]: affinity level at which the bus error occurred: 1 85[Hardware Error]: processor context corrupted 86[Hardware Error]: the error has been corrected 87[Hardware Error]: PC is imprecise 88[Hardware Error]: Program execution can be restarted reliably at the PC associated with the error. 89[Hardware Error]: participation type: Local processor observed 90[Hardware Error]: request timed out 91[Hardware Error]: address space: External Memory Access 92[Hardware Error]: memory access attributes:0x20 93[Hardware Error]: access mode: secure 94[Hardware Error]: Error info structure 3: 95[Hardware Error]: num errors: 2 96[Hardware Error]: error_type: 0x10: micro-architectural error 97[Hardware Error]: error_info: 0x0000000078da03ff 98[Hardware Error]: Error info structure 4: 99[Hardware Error]: num errors: 2 100[Hardware Error]: error_type: 0x14: TLB error|micro-architectural error 101[Hardware Error]: Context info structure 0: 102[Hardware Error]: register context type: AArch64 EL1 context registers 103[Hardware Error]: 00000000: 00000000 00000000 104[Hardware Error]: Vendor specific error info has 5 bytes: 105[Hardware Error]: 00000000: 13 7b 04 05 01 .{... 106[Firmware Warn]: GHES: Unhandled processor error type 0x02: cache error 107[Firmware Warn]: GHES: Unhandled processor error type 0x04: TLB error 108[Firmware Warn]: GHES: Unhandled processor error type 0x08: bus error 109[Firmware Warn]: GHES: Unhandled processor error type 0x10: micro-architectural error 110[Firmware Warn]: GHES: Unhandled processor error type 0x14: TLB error|micro-architectural error 111""" 112 113import argparse 114import re 115 116from qmp_helper import qmp, util, cper_guid 117 118 119class ArmProcessorEinj: 120 """ 121 Implements ARM Processor Error injection via GHES 122 """ 123 124 DESC = """ 125 Generates an ARM processor error CPER, compatible with 126 UEFI 2.9A Errata. 127 """ 128 129 ACPI_GHES_ARM_CPER_LENGTH = 40 130 ACPI_GHES_ARM_CPER_PEI_LENGTH = 32 131 132 # Context types 133 CONTEXT_AARCH32_EL1 = 1 134 CONTEXT_AARCH64_EL1 = 5 135 CONTEXT_MISC_REG = 8 136 137 def __init__(self, subparsers): 138 """Initialize the error injection class and add subparser""" 139 140 # Valid choice values 141 self.arm_valid_bits = { 142 "mpidr": util.bit(0), 143 "affinity": util.bit(1), 144 "running": util.bit(2), 145 "vendor": util.bit(3), 146 } 147 148 self.pei_flags = { 149 "first": util.bit(0), 150 "last": util.bit(1), 151 "propagated": util.bit(2), 152 "overflow": util.bit(3), 153 } 154 155 self.pei_error_types = { 156 "cache": util.bit(1), 157 "tlb": util.bit(2), 158 "bus": util.bit(3), 159 "micro-arch": util.bit(4), 160 } 161 162 self.pei_valid_bits = { 163 "multiple-error": util.bit(0), 164 "flags": util.bit(1), 165 "error-info": util.bit(2), 166 "virt-addr": util.bit(3), 167 "phy-addr": util.bit(4), 168 } 169 170 self.data = bytearray() 171 172 parser = subparsers.add_parser("arm", description=self.DESC) 173 174 arm_valid_bits = ",".join(self.arm_valid_bits.keys()) 175 flags = ",".join(self.pei_flags.keys()) 176 error_types = ",".join(self.pei_error_types.keys()) 177 pei_valid_bits = ",".join(self.pei_valid_bits.keys()) 178 179 # UEFI N.16 ARM Validation bits 180 g_arm = parser.add_argument_group("ARM processor") 181 g_arm.add_argument("--arm", "--arm-valid", 182 help=f"ARM valid bits: {arm_valid_bits}") 183 g_arm.add_argument("-a", "--affinity", "--level", "--affinity-level", 184 type=lambda x: int(x, 0), 185 help="Affinity level (when multiple levels apply)") 186 g_arm.add_argument("-l", "--mpidr", type=lambda x: int(x, 0), 187 help="Multiprocessor Affinity Register") 188 g_arm.add_argument("-i", "--midr", type=lambda x: int(x, 0), 189 help="Main ID Register") 190 g_arm.add_argument("-r", "--running", 191 action=argparse.BooleanOptionalAction, 192 default=None, 193 help="Indicates if the processor is running or not") 194 g_arm.add_argument("--psci", "--psci-state", 195 type=lambda x: int(x, 0), 196 help="Power State Coordination Interface - PSCI state") 197 198 # TODO: Add vendor-specific support 199 200 # UEFI N.17 bitmaps (type and flags) 201 g_pei = parser.add_argument_group("ARM Processor Error Info (PEI)") 202 g_pei.add_argument("-t", "--type", nargs="+", 203 help=f"one or more error types: {error_types}") 204 g_pei.add_argument("-f", "--flags", nargs="*", 205 help=f"zero or more error flags: {flags}") 206 g_pei.add_argument("-V", "--pei-valid", "--error-valid", nargs="*", 207 help=f"zero or more PEI valid bits: {pei_valid_bits}") 208 209 # UEFI N.17 Integer values 210 g_pei.add_argument("-m", "--multiple-error", nargs="+", 211 help="Number of errors: 0: Single error, 1: Multiple errors, 2-65535: Error count if known") 212 g_pei.add_argument("-e", "--error-info", nargs="+", 213 help="Error information (UEFI 2.10 tables N.18 to N.20)") 214 g_pei.add_argument("-p", "--physical-address", nargs="+", 215 help="Physical address") 216 g_pei.add_argument("-v", "--virtual-address", nargs="+", 217 help="Virtual address") 218 219 # UEFI N.21 Context 220 g_ctx = parser.add_argument_group("Processor Context") 221 g_ctx.add_argument("--ctx-type", "--context-type", nargs="*", 222 help="Type of the context (0=ARM32 GPR, 5=ARM64 EL1, other values supported)") 223 g_ctx.add_argument("--ctx-size", "--context-size", nargs="*", 224 help="Minimal size of the context") 225 g_ctx.add_argument("--ctx-array", "--context-array", nargs="*", 226 help="Comma-separated arrays for each context") 227 228 # Vendor-specific data 229 g_vendor = parser.add_argument_group("Vendor-specific data") 230 g_vendor.add_argument("--vendor", "--vendor-specific", nargs="+", 231 help="Vendor-specific byte arrays of data") 232 233 # Add arguments for Generic Error Data 234 qmp.argparse(parser) 235 236 parser.set_defaults(func=self.send_cper) 237 238 def send_cper(self, args): 239 """Parse subcommand arguments and send a CPER via QMP""" 240 241 qmp_cmd = qmp(args.host, args.port, args.debug) 242 243 # Handle Generic Error Data arguments if any 244 qmp_cmd.set_args(args) 245 246 is_cpu_type = re.compile(r"^([\w+]+\-)?arm\-cpu$") 247 cpus = qmp_cmd.search_qom("/machine/unattached/device", 248 "type", is_cpu_type) 249 250 cper = {} 251 pei = {} 252 ctx = {} 253 vendor = {} 254 255 arg = vars(args) 256 257 # Handle global parameters 258 if args.arm: 259 arm_valid_init = False 260 cper["valid"] = util.get_choice(name="valid", 261 value=args.arm, 262 choices=self.arm_valid_bits, 263 suffixes=["-error", "-err"]) 264 else: 265 cper["valid"] = 0 266 arm_valid_init = True 267 268 if "running" in arg: 269 if args.running: 270 cper["running-state"] = util.bit(0) 271 else: 272 cper["running-state"] = 0 273 else: 274 cper["running-state"] = 0 275 276 if arm_valid_init: 277 if args.affinity: 278 cper["valid"] |= self.arm_valid_bits["affinity"] 279 280 if args.mpidr: 281 cper["valid"] |= self.arm_valid_bits["mpidr"] 282 283 if "running-state" in cper: 284 cper["valid"] |= self.arm_valid_bits["running"] 285 286 if args.psci: 287 cper["valid"] |= self.arm_valid_bits["running"] 288 289 # Handle PEI 290 if not args.type: 291 args.type = ["cache-error"] 292 293 util.get_mult_choices( 294 pei, 295 name="valid", 296 values=args.pei_valid, 297 choices=self.pei_valid_bits, 298 suffixes=["-valid", "--addr"], 299 ) 300 util.get_mult_choices( 301 pei, 302 name="type", 303 values=args.type, 304 choices=self.pei_error_types, 305 suffixes=["-error", "-err"], 306 ) 307 util.get_mult_choices( 308 pei, 309 name="flags", 310 values=args.flags, 311 choices=self.pei_flags, 312 suffixes=["-error", "-cap"], 313 ) 314 util.get_mult_int(pei, "error-info", args.error_info) 315 util.get_mult_int(pei, "multiple-error", args.multiple_error) 316 util.get_mult_int(pei, "phy-addr", args.physical_address) 317 util.get_mult_int(pei, "virt-addr", args.virtual_address) 318 319 # Handle context 320 util.get_mult_int(ctx, "type", args.ctx_type, allow_zero=True) 321 util.get_mult_int(ctx, "minimal-size", args.ctx_size, allow_zero=True) 322 util.get_mult_array(ctx, "register", args.ctx_array, allow_zero=True) 323 324 util.get_mult_array(vendor, "bytes", args.vendor, max_val=255) 325 326 # Store PEI 327 pei_data = bytearray() 328 default_flags = self.pei_flags["first"] 329 default_flags |= self.pei_flags["last"] 330 331 error_info_num = 0 332 333 for i, p in pei.items(): # pylint: disable=W0612 334 error_info_num += 1 335 336 # UEFI 2.10 doesn't define how to encode error information 337 # when multiple types are raised. So, provide a default only 338 # if a single type is there 339 if "error-info" not in p: 340 if p["type"] == util.bit(1): 341 p["error-info"] = 0x0091000F 342 if p["type"] == util.bit(2): 343 p["error-info"] = 0x0054007F 344 if p["type"] == util.bit(3): 345 p["error-info"] = 0x80D6460FFF 346 if p["type"] == util.bit(4): 347 p["error-info"] = 0x78DA03FF 348 349 if "valid" not in p: 350 p["valid"] = 0 351 if "multiple-error" in p: 352 p["valid"] |= self.pei_valid_bits["multiple-error"] 353 354 if "flags" in p: 355 p["valid"] |= self.pei_valid_bits["flags"] 356 357 if "error-info" in p: 358 p["valid"] |= self.pei_valid_bits["error-info"] 359 360 if "phy-addr" in p: 361 p["valid"] |= self.pei_valid_bits["phy-addr"] 362 363 if "virt-addr" in p: 364 p["valid"] |= self.pei_valid_bits["virt-addr"] 365 366 # Version 367 util.data_add(pei_data, 0, 1) 368 369 util.data_add(pei_data, 370 self.ACPI_GHES_ARM_CPER_PEI_LENGTH, 1) 371 372 util.data_add(pei_data, p["valid"], 2) 373 util.data_add(pei_data, p["type"], 1) 374 util.data_add(pei_data, p.get("multiple-error", 1), 2) 375 util.data_add(pei_data, p.get("flags", default_flags), 1) 376 util.data_add(pei_data, p.get("error-info", 0), 8) 377 util.data_add(pei_data, p.get("virt-addr", 0xDEADBEEF), 8) 378 util.data_add(pei_data, p.get("phy-addr", 0xABBA0BAD), 8) 379 380 # Store Context 381 ctx_data = bytearray() 382 context_info_num = 0 383 384 if ctx: 385 ret = qmp_cmd.send_cmd("query-target", may_open=True) 386 387 default_ctx = self.CONTEXT_MISC_REG 388 389 if "arch" in ret: 390 if ret["arch"] == "aarch64": 391 default_ctx = self.CONTEXT_AARCH64_EL1 392 elif ret["arch"] == "arm": 393 default_ctx = self.CONTEXT_AARCH32_EL1 394 395 for k in sorted(ctx.keys()): 396 context_info_num += 1 397 398 if "type" not in ctx[k]: 399 ctx[k]["type"] = default_ctx 400 401 if "register" not in ctx[k]: 402 ctx[k]["register"] = [] 403 404 reg_size = len(ctx[k]["register"]) 405 size = 0 406 407 if "minimal-size" in ctx: 408 size = ctx[k]["minimal-size"] 409 410 size = max(size, reg_size) 411 412 size = (size + 1) % 0xFFFE 413 414 # Version 415 util.data_add(ctx_data, 0, 2) 416 417 util.data_add(ctx_data, ctx[k]["type"], 2) 418 419 util.data_add(ctx_data, 8 * size, 4) 420 421 for r in ctx[k]["register"]: 422 util.data_add(ctx_data, r, 8) 423 424 for i in range(reg_size, size): # pylint: disable=W0612 425 util.data_add(ctx_data, 0, 8) 426 427 # Vendor-specific bytes are not grouped 428 vendor_data = bytearray() 429 if vendor: 430 for k in sorted(vendor.keys()): 431 for b in vendor[k]["bytes"]: 432 util.data_add(vendor_data, b, 1) 433 434 # Encode ARM Processor Error 435 data = bytearray() 436 437 util.data_add(data, cper["valid"], 4) 438 439 util.data_add(data, error_info_num, 2) 440 util.data_add(data, context_info_num, 2) 441 442 # Calculate the length of the CPER data 443 cper_length = self.ACPI_GHES_ARM_CPER_LENGTH 444 cper_length += len(pei_data) 445 cper_length += len(vendor_data) 446 cper_length += len(ctx_data) 447 util.data_add(data, cper_length, 4) 448 449 util.data_add(data, arg.get("affinity-level", 0), 1) 450 451 # Reserved 452 util.data_add(data, 0, 3) 453 454 if "midr-el1" not in arg: 455 if cpus: 456 cmd_arg = { 457 'path': cpus[0], 458 'property': "midr" 459 } 460 ret = qmp_cmd.send_cmd("qom-get", cmd_arg, may_open=True) 461 if isinstance(ret, int): 462 arg["midr-el1"] = ret 463 464 util.data_add(data, arg.get("mpidr-el1", 0), 8) 465 util.data_add(data, arg.get("midr-el1", 0), 8) 466 util.data_add(data, cper["running-state"], 4) 467 util.data_add(data, arg.get("psci-state", 0), 4) 468 469 # Add PEI 470 data.extend(pei_data) 471 data.extend(ctx_data) 472 data.extend(vendor_data) 473 474 self.data = data 475 476 qmp_cmd.send_cper(cper_guid.CPER_PROC_ARM, self.data) 477