xref: /openbmc/qemu/hw/pci/pcie_aer.c (revision 7ef295ea)
1 /*
2  * pcie_aer.c
3  *
4  * Copyright (c) 2010 Isaku Yamahata <yamahata at valinux co jp>
5  *                    VA Linux Systems Japan K.K.
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License along
18  * with this program; if not, see <http://www.gnu.org/licenses/>.
19  */
20 
21 #include "qemu/osdep.h"
22 #include "sysemu/sysemu.h"
23 #include "qapi/qmp/types.h"
24 #include "monitor/monitor.h"
25 #include "hw/pci/pci_bridge.h"
26 #include "hw/pci/pcie.h"
27 #include "hw/pci/msix.h"
28 #include "hw/pci/msi.h"
29 #include "hw/pci/pci_bus.h"
30 #include "hw/pci/pcie_regs.h"
31 
32 //#define DEBUG_PCIE
33 #ifdef DEBUG_PCIE
34 # define PCIE_DPRINTF(fmt, ...)                                         \
35     fprintf(stderr, "%s:%d " fmt, __func__, __LINE__, ## __VA_ARGS__)
36 #else
37 # define PCIE_DPRINTF(fmt, ...) do {} while (0)
38 #endif
39 #define PCIE_DEV_PRINTF(dev, fmt, ...)                                  \
40     PCIE_DPRINTF("%s:%x "fmt, (dev)->name, (dev)->devfn, ## __VA_ARGS__)
41 
42 #define PCI_ERR_SRC_COR_OFFS    0
43 #define PCI_ERR_SRC_UNCOR_OFFS  2
44 
45 /* From 6.2.7 Error Listing and Rules. Table 6-2, 6-3 and 6-4 */
46 static uint32_t pcie_aer_uncor_default_severity(uint32_t status)
47 {
48     switch (status) {
49     case PCI_ERR_UNC_INTN:
50     case PCI_ERR_UNC_DLP:
51     case PCI_ERR_UNC_SDN:
52     case PCI_ERR_UNC_RX_OVER:
53     case PCI_ERR_UNC_FCP:
54     case PCI_ERR_UNC_MALF_TLP:
55         return PCI_ERR_ROOT_CMD_FATAL_EN;
56     case PCI_ERR_UNC_POISON_TLP:
57     case PCI_ERR_UNC_ECRC:
58     case PCI_ERR_UNC_UNSUP:
59     case PCI_ERR_UNC_COMP_TIME:
60     case PCI_ERR_UNC_COMP_ABORT:
61     case PCI_ERR_UNC_UNX_COMP:
62     case PCI_ERR_UNC_ACSV:
63     case PCI_ERR_UNC_MCBTLP:
64     case PCI_ERR_UNC_ATOP_EBLOCKED:
65     case PCI_ERR_UNC_TLP_PRF_BLOCKED:
66         return PCI_ERR_ROOT_CMD_NONFATAL_EN;
67     default:
68         abort();
69         break;
70     }
71     return PCI_ERR_ROOT_CMD_FATAL_EN;
72 }
73 
74 static int aer_log_add_err(PCIEAERLog *aer_log, const PCIEAERErr *err)
75 {
76     if (aer_log->log_num == aer_log->log_max) {
77         return -1;
78     }
79     memcpy(&aer_log->log[aer_log->log_num], err, sizeof *err);
80     aer_log->log_num++;
81     return 0;
82 }
83 
84 static void aer_log_del_err(PCIEAERLog *aer_log, PCIEAERErr *err)
85 {
86     assert(aer_log->log_num);
87     *err = aer_log->log[0];
88     aer_log->log_num--;
89     memmove(&aer_log->log[0], &aer_log->log[1],
90             aer_log->log_num * sizeof *err);
91 }
92 
93 static void aer_log_clear_all_err(PCIEAERLog *aer_log)
94 {
95     aer_log->log_num = 0;
96 }
97 
98 int pcie_aer_init(PCIDevice *dev, uint16_t offset, uint16_t size)
99 {
100     PCIExpressDevice *exp;
101 
102     pcie_add_capability(dev, PCI_EXT_CAP_ID_ERR, PCI_ERR_VER,
103                         offset, size);
104     exp = &dev->exp;
105     exp->aer_cap = offset;
106 
107     /* log_max is property */
108     if (dev->exp.aer_log.log_max == PCIE_AER_LOG_MAX_UNSET) {
109         dev->exp.aer_log.log_max = PCIE_AER_LOG_MAX_DEFAULT;
110     }
111     /* clip down the value to avoid unreasobale memory usage */
112     if (dev->exp.aer_log.log_max > PCIE_AER_LOG_MAX_LIMIT) {
113         return -EINVAL;
114     }
115     dev->exp.aer_log.log = g_malloc0(sizeof dev->exp.aer_log.log[0] *
116                                         dev->exp.aer_log.log_max);
117 
118     pci_set_long(dev->w1cmask + offset + PCI_ERR_UNCOR_STATUS,
119                  PCI_ERR_UNC_SUPPORTED);
120 
121     pci_set_long(dev->config + offset + PCI_ERR_UNCOR_SEVER,
122                  PCI_ERR_UNC_SEVERITY_DEFAULT);
123     pci_set_long(dev->wmask + offset + PCI_ERR_UNCOR_SEVER,
124                  PCI_ERR_UNC_SUPPORTED);
125 
126     pci_long_test_and_set_mask(dev->w1cmask + offset + PCI_ERR_COR_STATUS,
127                                PCI_ERR_COR_SUPPORTED);
128 
129     pci_set_long(dev->config + offset + PCI_ERR_COR_MASK,
130                  PCI_ERR_COR_MASK_DEFAULT);
131     pci_set_long(dev->wmask + offset + PCI_ERR_COR_MASK,
132                  PCI_ERR_COR_SUPPORTED);
133 
134     /* capabilities and control. multiple header logging is supported */
135     if (dev->exp.aer_log.log_max > 0) {
136         pci_set_long(dev->config + offset + PCI_ERR_CAP,
137                      PCI_ERR_CAP_ECRC_GENC | PCI_ERR_CAP_ECRC_CHKC |
138                      PCI_ERR_CAP_MHRC);
139         pci_set_long(dev->wmask + offset + PCI_ERR_CAP,
140                      PCI_ERR_CAP_ECRC_GENE | PCI_ERR_CAP_ECRC_CHKE |
141                      PCI_ERR_CAP_MHRE);
142     } else {
143         pci_set_long(dev->config + offset + PCI_ERR_CAP,
144                      PCI_ERR_CAP_ECRC_GENC | PCI_ERR_CAP_ECRC_CHKC);
145         pci_set_long(dev->wmask + offset + PCI_ERR_CAP,
146                      PCI_ERR_CAP_ECRC_GENE | PCI_ERR_CAP_ECRC_CHKE);
147     }
148 
149     switch (pcie_cap_get_type(dev)) {
150     case PCI_EXP_TYPE_ROOT_PORT:
151         /* this case will be set by pcie_aer_root_init() */
152         /* fallthrough */
153     case PCI_EXP_TYPE_DOWNSTREAM:
154     case PCI_EXP_TYPE_UPSTREAM:
155         pci_word_test_and_set_mask(dev->wmask + PCI_BRIDGE_CONTROL,
156                                    PCI_BRIDGE_CTL_SERR);
157         pci_long_test_and_set_mask(dev->w1cmask + PCI_STATUS,
158                                    PCI_SEC_STATUS_RCV_SYSTEM_ERROR);
159         break;
160     default:
161         /* nothing */
162         break;
163     }
164     return 0;
165 }
166 
167 void pcie_aer_exit(PCIDevice *dev)
168 {
169     g_free(dev->exp.aer_log.log);
170 }
171 
172 static void pcie_aer_update_uncor_status(PCIDevice *dev)
173 {
174     uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
175     PCIEAERLog *aer_log = &dev->exp.aer_log;
176 
177     uint16_t i;
178     for (i = 0; i < aer_log->log_num; i++) {
179         pci_long_test_and_set_mask(aer_cap + PCI_ERR_UNCOR_STATUS,
180                                    dev->exp.aer_log.log[i].status);
181     }
182 }
183 
184 /*
185  * return value:
186  * true: error message needs to be sent up
187  * false: error message is masked
188  *
189  * 6.2.6 Error Message Control
190  * Figure 6-3
191  * all pci express devices part
192  */
193 static bool
194 pcie_aer_msg_alldev(PCIDevice *dev, const PCIEAERMsg *msg)
195 {
196     if (!(pcie_aer_msg_is_uncor(msg) &&
197           (pci_get_word(dev->config + PCI_COMMAND) & PCI_COMMAND_SERR))) {
198         return false;
199     }
200 
201     /* Signaled System Error
202      *
203      * 7.5.1.1 Command register
204      * Bit 8 SERR# Enable
205      *
206      * When Set, this bit enables reporting of Non-fatal and Fatal
207      * errors detected by the Function to the Root Complex. Note that
208      * errors are reported if enabled either through this bit or through
209      * the PCI Express specific bits in the Device Control register (see
210      * Section 7.8.4).
211      */
212     pci_word_test_and_set_mask(dev->config + PCI_STATUS,
213                                PCI_STATUS_SIG_SYSTEM_ERROR);
214 
215     if (!(msg->severity &
216           pci_get_word(dev->config + dev->exp.exp_cap + PCI_EXP_DEVCTL))) {
217         return false;
218     }
219 
220     /* send up error message */
221     return true;
222 }
223 
224 /*
225  * return value:
226  * true: error message is sent up
227  * false: error message is masked
228  *
229  * 6.2.6 Error Message Control
230  * Figure 6-3
231  * virtual pci bridge part
232  */
233 static bool pcie_aer_msg_vbridge(PCIDevice *dev, const PCIEAERMsg *msg)
234 {
235     uint16_t bridge_control = pci_get_word(dev->config + PCI_BRIDGE_CONTROL);
236 
237     if (pcie_aer_msg_is_uncor(msg)) {
238         /* Received System Error */
239         pci_word_test_and_set_mask(dev->config + PCI_SEC_STATUS,
240                                    PCI_SEC_STATUS_RCV_SYSTEM_ERROR);
241     }
242 
243     if (!(bridge_control & PCI_BRIDGE_CTL_SERR)) {
244         return false;
245     }
246     return true;
247 }
248 
249 void pcie_aer_root_set_vector(PCIDevice *dev, unsigned int vector)
250 {
251     uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
252     assert(vector < PCI_ERR_ROOT_IRQ_MAX);
253     pci_long_test_and_clear_mask(aer_cap + PCI_ERR_ROOT_STATUS,
254                                  PCI_ERR_ROOT_IRQ);
255     pci_long_test_and_set_mask(aer_cap + PCI_ERR_ROOT_STATUS,
256                                vector << PCI_ERR_ROOT_IRQ_SHIFT);
257 }
258 
259 static unsigned int pcie_aer_root_get_vector(PCIDevice *dev)
260 {
261     uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
262     uint32_t root_status = pci_get_long(aer_cap + PCI_ERR_ROOT_STATUS);
263     return (root_status & PCI_ERR_ROOT_IRQ) >> PCI_ERR_ROOT_IRQ_SHIFT;
264 }
265 
266 /* Given a status register, get corresponding bits in the command register */
267 static uint32_t pcie_aer_status_to_cmd(uint32_t status)
268 {
269     uint32_t cmd = 0;
270     if (status & PCI_ERR_ROOT_COR_RCV) {
271         cmd |= PCI_ERR_ROOT_CMD_COR_EN;
272     }
273     if (status & PCI_ERR_ROOT_NONFATAL_RCV) {
274         cmd |= PCI_ERR_ROOT_CMD_NONFATAL_EN;
275     }
276     if (status & PCI_ERR_ROOT_FATAL_RCV) {
277         cmd |= PCI_ERR_ROOT_CMD_FATAL_EN;
278     }
279     return cmd;
280 }
281 
282 static void pcie_aer_root_notify(PCIDevice *dev)
283 {
284     if (msix_enabled(dev)) {
285         msix_notify(dev, pcie_aer_root_get_vector(dev));
286     } else if (msi_enabled(dev)) {
287         msi_notify(dev, pcie_aer_root_get_vector(dev));
288     } else {
289         pci_irq_assert(dev);
290     }
291 }
292 
293 /*
294  * 6.2.6 Error Message Control
295  * Figure 6-3
296  * root port part
297  */
298 static void pcie_aer_msg_root_port(PCIDevice *dev, const PCIEAERMsg *msg)
299 {
300     uint16_t cmd;
301     uint8_t *aer_cap;
302     uint32_t root_cmd;
303     uint32_t root_status, prev_status;
304 
305     cmd = pci_get_word(dev->config + PCI_COMMAND);
306     aer_cap = dev->config + dev->exp.aer_cap;
307     root_cmd = pci_get_long(aer_cap + PCI_ERR_ROOT_COMMAND);
308     prev_status = root_status = pci_get_long(aer_cap + PCI_ERR_ROOT_STATUS);
309 
310     if (cmd & PCI_COMMAND_SERR) {
311         /* System Error.
312          *
313          * The way to report System Error is platform specific and
314          * it isn't implemented in qemu right now.
315          * So just discard the error for now.
316          * OS which cares of aer would receive errors via
317          * native aer mechanims, so this wouldn't matter.
318          */
319     }
320 
321     /* Errro Message Received: Root Error Status register */
322     switch (msg->severity) {
323     case PCI_ERR_ROOT_CMD_COR_EN:
324         if (root_status & PCI_ERR_ROOT_COR_RCV) {
325             root_status |= PCI_ERR_ROOT_MULTI_COR_RCV;
326         } else {
327             pci_set_word(aer_cap + PCI_ERR_ROOT_ERR_SRC + PCI_ERR_SRC_COR_OFFS,
328                          msg->source_id);
329         }
330         root_status |= PCI_ERR_ROOT_COR_RCV;
331         break;
332     case PCI_ERR_ROOT_CMD_NONFATAL_EN:
333         root_status |= PCI_ERR_ROOT_NONFATAL_RCV;
334         break;
335     case PCI_ERR_ROOT_CMD_FATAL_EN:
336         if (!(root_status & PCI_ERR_ROOT_UNCOR_RCV)) {
337             root_status |= PCI_ERR_ROOT_FIRST_FATAL;
338         }
339         root_status |= PCI_ERR_ROOT_FATAL_RCV;
340         break;
341     default:
342         abort();
343         break;
344     }
345     if (pcie_aer_msg_is_uncor(msg)) {
346         if (root_status & PCI_ERR_ROOT_UNCOR_RCV) {
347             root_status |= PCI_ERR_ROOT_MULTI_UNCOR_RCV;
348         } else {
349             pci_set_word(aer_cap + PCI_ERR_ROOT_ERR_SRC +
350                          PCI_ERR_SRC_UNCOR_OFFS, msg->source_id);
351         }
352         root_status |= PCI_ERR_ROOT_UNCOR_RCV;
353     }
354     pci_set_long(aer_cap + PCI_ERR_ROOT_STATUS, root_status);
355 
356     /* 6.2.4.1.2 Interrupt Generation */
357     /* All the above did was set some bits in the status register.
358      * Specifically these that match message severity.
359      * The below code relies on this fact. */
360     if (!(root_cmd & msg->severity) ||
361         (pcie_aer_status_to_cmd(prev_status) & root_cmd)) {
362         /* Condition is not being set or was already true so nothing to do. */
363         return;
364     }
365 
366     pcie_aer_root_notify(dev);
367 }
368 
369 /*
370  * 6.2.6 Error Message Control Figure 6-3
371  *
372  * Walk up the bus tree from the device, propagate the error message.
373  */
374 void pcie_aer_msg(PCIDevice *dev, const PCIEAERMsg *msg)
375 {
376     uint8_t type;
377 
378     while (dev) {
379         if (!pci_is_express(dev)) {
380             /* just ignore it */
381             /* TODO: Shouldn't we set PCI_STATUS_SIG_SYSTEM_ERROR?
382              * Consider e.g. a PCI bridge above a PCI Express device. */
383             return;
384         }
385 
386         type = pcie_cap_get_type(dev);
387         if ((type == PCI_EXP_TYPE_ROOT_PORT ||
388             type == PCI_EXP_TYPE_UPSTREAM ||
389             type == PCI_EXP_TYPE_DOWNSTREAM) &&
390             !pcie_aer_msg_vbridge(dev, msg)) {
391                 return;
392         }
393         if (!pcie_aer_msg_alldev(dev, msg)) {
394             return;
395         }
396         if (type == PCI_EXP_TYPE_ROOT_PORT) {
397             pcie_aer_msg_root_port(dev, msg);
398             /* Root port can notify system itself,
399                or send the error message to root complex event collector. */
400             /*
401              * if root port is associated with an event collector,
402              * return the root complex event collector here.
403              * For now root complex event collector isn't supported.
404              */
405             return;
406         }
407         dev = pci_bridge_get_device(dev->bus);
408     }
409 }
410 
411 static void pcie_aer_update_log(PCIDevice *dev, const PCIEAERErr *err)
412 {
413     uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
414     uint8_t first_bit = ctz32(err->status);
415     uint32_t errcap = pci_get_long(aer_cap + PCI_ERR_CAP);
416     int i;
417 
418     assert(err->status);
419     assert(!(err->status & (err->status - 1)));
420 
421     errcap &= ~(PCI_ERR_CAP_FEP_MASK | PCI_ERR_CAP_TLP);
422     errcap |= PCI_ERR_CAP_FEP(first_bit);
423 
424     if (err->flags & PCIE_AER_ERR_HEADER_VALID) {
425         for (i = 0; i < ARRAY_SIZE(err->header); ++i) {
426             /* 7.10.8 Header Log Register */
427             uint8_t *header_log =
428                 aer_cap + PCI_ERR_HEADER_LOG + i * sizeof err->header[0];
429             stl_be_p(header_log, err->header[i]);
430         }
431     } else {
432         assert(!(err->flags & PCIE_AER_ERR_TLP_PREFIX_PRESENT));
433         memset(aer_cap + PCI_ERR_HEADER_LOG, 0, PCI_ERR_HEADER_LOG_SIZE);
434     }
435 
436     if ((err->flags & PCIE_AER_ERR_TLP_PREFIX_PRESENT) &&
437         (pci_get_long(dev->config + dev->exp.exp_cap + PCI_EXP_DEVCAP2) &
438          PCI_EXP_DEVCAP2_EETLPP)) {
439         for (i = 0; i < ARRAY_SIZE(err->prefix); ++i) {
440             /* 7.10.12 tlp prefix log register */
441             uint8_t *prefix_log =
442                 aer_cap + PCI_ERR_TLP_PREFIX_LOG + i * sizeof err->prefix[0];
443             stl_be_p(prefix_log, err->prefix[i]);
444         }
445         errcap |= PCI_ERR_CAP_TLP;
446     } else {
447         memset(aer_cap + PCI_ERR_TLP_PREFIX_LOG, 0,
448                PCI_ERR_TLP_PREFIX_LOG_SIZE);
449     }
450     pci_set_long(aer_cap + PCI_ERR_CAP, errcap);
451 }
452 
453 static void pcie_aer_clear_log(PCIDevice *dev)
454 {
455     uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
456 
457     pci_long_test_and_clear_mask(aer_cap + PCI_ERR_CAP,
458                                  PCI_ERR_CAP_FEP_MASK | PCI_ERR_CAP_TLP);
459 
460     memset(aer_cap + PCI_ERR_HEADER_LOG, 0, PCI_ERR_HEADER_LOG_SIZE);
461     memset(aer_cap + PCI_ERR_TLP_PREFIX_LOG, 0, PCI_ERR_TLP_PREFIX_LOG_SIZE);
462 }
463 
464 static void pcie_aer_clear_error(PCIDevice *dev)
465 {
466     uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
467     uint32_t errcap = pci_get_long(aer_cap + PCI_ERR_CAP);
468     PCIEAERLog *aer_log = &dev->exp.aer_log;
469     PCIEAERErr err;
470 
471     if (!(errcap & PCI_ERR_CAP_MHRE) || !aer_log->log_num) {
472         pcie_aer_clear_log(dev);
473         return;
474     }
475 
476     /*
477      * If more errors are queued, set corresponding bits in uncorrectable
478      * error status.
479      * We emulate uncorrectable error status register as W1CS.
480      * So set bit in uncorrectable error status here again for multiple
481      * error recording support.
482      *
483      * 6.2.4.2 Multiple Error Handling(Advanced Error Reporting Capability)
484      */
485     pcie_aer_update_uncor_status(dev);
486 
487     aer_log_del_err(aer_log, &err);
488     pcie_aer_update_log(dev, &err);
489 }
490 
491 static int pcie_aer_record_error(PCIDevice *dev,
492                                  const PCIEAERErr *err)
493 {
494     uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
495     uint32_t errcap = pci_get_long(aer_cap + PCI_ERR_CAP);
496     int fep = PCI_ERR_CAP_FEP(errcap);
497 
498     assert(err->status);
499     assert(!(err->status & (err->status - 1)));
500 
501     if (errcap & PCI_ERR_CAP_MHRE &&
502         (pci_get_long(aer_cap + PCI_ERR_UNCOR_STATUS) & (1U << fep))) {
503         /*  Not first error. queue error */
504         if (aer_log_add_err(&dev->exp.aer_log, err) < 0) {
505             /* overflow */
506             return -1;
507         }
508         return 0;
509     }
510 
511     pcie_aer_update_log(dev, err);
512     return 0;
513 }
514 
515 typedef struct PCIEAERInject {
516     PCIDevice *dev;
517     uint8_t *aer_cap;
518     const PCIEAERErr *err;
519     uint16_t devctl;
520     uint16_t devsta;
521     uint32_t error_status;
522     bool unsupported_request;
523     bool log_overflow;
524     PCIEAERMsg msg;
525 } PCIEAERInject;
526 
527 static bool pcie_aer_inject_cor_error(PCIEAERInject *inj,
528                                       uint32_t uncor_status,
529                                       bool is_advisory_nonfatal)
530 {
531     PCIDevice *dev = inj->dev;
532 
533     inj->devsta |= PCI_EXP_DEVSTA_CED;
534     if (inj->unsupported_request) {
535         inj->devsta |= PCI_EXP_DEVSTA_URD;
536     }
537     pci_set_word(dev->config + dev->exp.exp_cap + PCI_EXP_DEVSTA, inj->devsta);
538 
539     if (inj->aer_cap) {
540         uint32_t mask;
541         pci_long_test_and_set_mask(inj->aer_cap + PCI_ERR_COR_STATUS,
542                                    inj->error_status);
543         mask = pci_get_long(inj->aer_cap + PCI_ERR_COR_MASK);
544         if (mask & inj->error_status) {
545             return false;
546         }
547         if (is_advisory_nonfatal) {
548             uint32_t uncor_mask =
549                 pci_get_long(inj->aer_cap + PCI_ERR_UNCOR_MASK);
550             if (!(uncor_mask & uncor_status)) {
551                 inj->log_overflow = !!pcie_aer_record_error(dev, inj->err);
552             }
553             pci_long_test_and_set_mask(inj->aer_cap + PCI_ERR_UNCOR_STATUS,
554                                        uncor_status);
555         }
556     }
557 
558     if (inj->unsupported_request && !(inj->devctl & PCI_EXP_DEVCTL_URRE)) {
559         return false;
560     }
561     if (!(inj->devctl & PCI_EXP_DEVCTL_CERE)) {
562         return false;
563     }
564 
565     inj->msg.severity = PCI_ERR_ROOT_CMD_COR_EN;
566     return true;
567 }
568 
569 static bool pcie_aer_inject_uncor_error(PCIEAERInject *inj, bool is_fatal)
570 {
571     PCIDevice *dev = inj->dev;
572     uint16_t cmd;
573 
574     if (is_fatal) {
575         inj->devsta |= PCI_EXP_DEVSTA_FED;
576     } else {
577         inj->devsta |= PCI_EXP_DEVSTA_NFED;
578     }
579     if (inj->unsupported_request) {
580         inj->devsta |= PCI_EXP_DEVSTA_URD;
581     }
582     pci_set_long(dev->config + dev->exp.exp_cap + PCI_EXP_DEVSTA, inj->devsta);
583 
584     if (inj->aer_cap) {
585         uint32_t mask = pci_get_long(inj->aer_cap + PCI_ERR_UNCOR_MASK);
586         if (mask & inj->error_status) {
587             pci_long_test_and_set_mask(inj->aer_cap + PCI_ERR_UNCOR_STATUS,
588                                        inj->error_status);
589             return false;
590         }
591 
592         inj->log_overflow = !!pcie_aer_record_error(dev, inj->err);
593         pci_long_test_and_set_mask(inj->aer_cap + PCI_ERR_UNCOR_STATUS,
594                                    inj->error_status);
595     }
596 
597     cmd = pci_get_word(dev->config + PCI_COMMAND);
598     if (inj->unsupported_request &&
599         !(inj->devctl & PCI_EXP_DEVCTL_URRE) && !(cmd & PCI_COMMAND_SERR)) {
600         return false;
601     }
602     if (is_fatal) {
603         if (!((cmd & PCI_COMMAND_SERR) ||
604               (inj->devctl & PCI_EXP_DEVCTL_FERE))) {
605             return false;
606         }
607         inj->msg.severity = PCI_ERR_ROOT_CMD_FATAL_EN;
608     } else {
609         if (!((cmd & PCI_COMMAND_SERR) ||
610               (inj->devctl & PCI_EXP_DEVCTL_NFERE))) {
611             return false;
612         }
613         inj->msg.severity = PCI_ERR_ROOT_CMD_NONFATAL_EN;
614     }
615     return true;
616 }
617 
618 /*
619  * non-Function specific error must be recorded in all functions.
620  * It is the responsibility of the caller of this function.
621  * It is also caller's responsibility to determine which function should
622  * report the error.
623  *
624  * 6.2.4 Error Logging
625  * 6.2.5 Sequence of Device Error Signaling and Logging Operations
626  * Figure 6-2: Flowchart Showing Sequence of Device Error Signaling and Logging
627  *             Operations
628  */
629 int pcie_aer_inject_error(PCIDevice *dev, const PCIEAERErr *err)
630 {
631     uint8_t *aer_cap = NULL;
632     uint16_t devctl = 0;
633     uint16_t devsta = 0;
634     uint32_t error_status = err->status;
635     PCIEAERInject inj;
636 
637     if (!pci_is_express(dev)) {
638         return -ENOSYS;
639     }
640 
641     if (err->flags & PCIE_AER_ERR_IS_CORRECTABLE) {
642         error_status &= PCI_ERR_COR_SUPPORTED;
643     } else {
644         error_status &= PCI_ERR_UNC_SUPPORTED;
645     }
646 
647     /* invalid status bit. one and only one bit must be set */
648     if (!error_status || (error_status & (error_status - 1))) {
649         return -EINVAL;
650     }
651 
652     if (dev->exp.aer_cap) {
653         uint8_t *exp_cap = dev->config + dev->exp.exp_cap;
654         aer_cap = dev->config + dev->exp.aer_cap;
655         devctl = pci_get_long(exp_cap + PCI_EXP_DEVCTL);
656         devsta = pci_get_long(exp_cap + PCI_EXP_DEVSTA);
657     }
658 
659     inj.dev = dev;
660     inj.aer_cap = aer_cap;
661     inj.err = err;
662     inj.devctl = devctl;
663     inj.devsta = devsta;
664     inj.error_status = error_status;
665     inj.unsupported_request = !(err->flags & PCIE_AER_ERR_IS_CORRECTABLE) &&
666         err->status == PCI_ERR_UNC_UNSUP;
667     inj.log_overflow = false;
668 
669     if (err->flags & PCIE_AER_ERR_IS_CORRECTABLE) {
670         if (!pcie_aer_inject_cor_error(&inj, 0, false)) {
671             return 0;
672         }
673     } else {
674         bool is_fatal =
675             pcie_aer_uncor_default_severity(error_status) ==
676             PCI_ERR_ROOT_CMD_FATAL_EN;
677         if (aer_cap) {
678             is_fatal =
679                 error_status & pci_get_long(aer_cap + PCI_ERR_UNCOR_SEVER);
680         }
681         if (!is_fatal && (err->flags & PCIE_AER_ERR_MAYBE_ADVISORY)) {
682             inj.error_status = PCI_ERR_COR_ADV_NONFATAL;
683             if (!pcie_aer_inject_cor_error(&inj, error_status, true)) {
684                 return 0;
685             }
686         } else {
687             if (!pcie_aer_inject_uncor_error(&inj, is_fatal)) {
688                 return 0;
689             }
690         }
691     }
692 
693     /* send up error message */
694     inj.msg.source_id = err->source_id;
695     pcie_aer_msg(dev, &inj.msg);
696 
697     if (inj.log_overflow) {
698         PCIEAERErr header_log_overflow = {
699             .status = PCI_ERR_COR_HL_OVERFLOW,
700             .flags = PCIE_AER_ERR_IS_CORRECTABLE,
701         };
702         int ret = pcie_aer_inject_error(dev, &header_log_overflow);
703         assert(!ret);
704     }
705     return 0;
706 }
707 
708 void pcie_aer_write_config(PCIDevice *dev,
709                            uint32_t addr, uint32_t val, int len)
710 {
711     uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
712     uint32_t errcap = pci_get_long(aer_cap + PCI_ERR_CAP);
713     uint32_t first_error = 1U << PCI_ERR_CAP_FEP(errcap);
714     uint32_t uncorsta = pci_get_long(aer_cap + PCI_ERR_UNCOR_STATUS);
715 
716     /* uncorrectable error */
717     if (!(uncorsta & first_error)) {
718         /* the bit that corresponds to the first error is cleared */
719         pcie_aer_clear_error(dev);
720     } else if (errcap & PCI_ERR_CAP_MHRE) {
721         /* When PCI_ERR_CAP_MHRE is enabled and the first error isn't cleared
722          * nothing should happen. So we have to revert the modification to
723          * the register.
724          */
725         pcie_aer_update_uncor_status(dev);
726     } else {
727         /* capability & control
728          * PCI_ERR_CAP_MHRE might be cleared, so clear of header log.
729          */
730         aer_log_clear_all_err(&dev->exp.aer_log);
731     }
732 }
733 
734 void pcie_aer_root_init(PCIDevice *dev)
735 {
736     uint16_t pos = dev->exp.aer_cap;
737 
738     pci_set_long(dev->wmask + pos + PCI_ERR_ROOT_COMMAND,
739                  PCI_ERR_ROOT_CMD_EN_MASK);
740     pci_set_long(dev->w1cmask + pos + PCI_ERR_ROOT_STATUS,
741                  PCI_ERR_ROOT_STATUS_REPORT_MASK);
742     /* PCI_ERR_ROOT_IRQ is RO but devices change it using a
743      * device-specific method.
744      */
745     pci_set_long(dev->cmask + pos + PCI_ERR_ROOT_STATUS,
746                  ~PCI_ERR_ROOT_IRQ);
747 }
748 
749 void pcie_aer_root_reset(PCIDevice *dev)
750 {
751     uint8_t* aer_cap = dev->config + dev->exp.aer_cap;
752 
753     pci_set_long(aer_cap + PCI_ERR_ROOT_COMMAND, 0);
754 
755     /*
756      * Advanced Error Interrupt Message Number in Root Error Status Register
757      * must be updated by chip dependent code because it's chip dependent
758      * which number is used.
759      */
760 }
761 
762 void pcie_aer_root_write_config(PCIDevice *dev,
763                                 uint32_t addr, uint32_t val, int len,
764                                 uint32_t root_cmd_prev)
765 {
766     uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
767     uint32_t root_status = pci_get_long(aer_cap + PCI_ERR_ROOT_STATUS);
768     uint32_t enabled_cmd = pcie_aer_status_to_cmd(root_status);
769     uint32_t root_cmd = pci_get_long(aer_cap + PCI_ERR_ROOT_COMMAND);
770     /* 6.2.4.1.2 Interrupt Generation */
771     if (!msix_enabled(dev) && !msi_enabled(dev)) {
772         pci_set_irq(dev, !!(root_cmd & enabled_cmd));
773         return;
774     }
775 
776     if ((root_cmd_prev & enabled_cmd) || !(root_cmd & enabled_cmd)) {
777         /* Send MSI on transition from false to true. */
778         return;
779     }
780 
781     pcie_aer_root_notify(dev);
782 }
783 
784 static const VMStateDescription vmstate_pcie_aer_err = {
785     .name = "PCIE_AER_ERROR",
786     .version_id = 1,
787     .minimum_version_id = 1,
788     .fields = (VMStateField[]) {
789         VMSTATE_UINT32(status, PCIEAERErr),
790         VMSTATE_UINT16(source_id, PCIEAERErr),
791         VMSTATE_UINT16(flags, PCIEAERErr),
792         VMSTATE_UINT32_ARRAY(header, PCIEAERErr, 4),
793         VMSTATE_UINT32_ARRAY(prefix, PCIEAERErr, 4),
794         VMSTATE_END_OF_LIST()
795     }
796 };
797 
798 static bool pcie_aer_state_log_num_valid(void *opaque, int version_id)
799 {
800     PCIEAERLog *s = opaque;
801 
802     return s->log_num <= s->log_max;
803 }
804 
805 const VMStateDescription vmstate_pcie_aer_log = {
806     .name = "PCIE_AER_ERROR_LOG",
807     .version_id = 1,
808     .minimum_version_id = 1,
809     .fields = (VMStateField[]) {
810         VMSTATE_UINT16(log_num, PCIEAERLog),
811         VMSTATE_UINT16_EQUAL(log_max, PCIEAERLog),
812         VMSTATE_VALIDATE("log_num <= log_max", pcie_aer_state_log_num_valid),
813         VMSTATE_STRUCT_VARRAY_POINTER_UINT16(log, PCIEAERLog, log_num,
814                               vmstate_pcie_aer_err, PCIEAERErr),
815         VMSTATE_END_OF_LIST()
816     }
817 };
818 
819 typedef struct PCIEAERErrorName {
820     const char *name;
821     uint32_t val;
822     bool correctable;
823 } PCIEAERErrorName;
824 
825 /*
826  * AER error name -> value conversion table
827  * This naming scheme is same to linux aer-injection tool.
828  */
829 static const struct PCIEAERErrorName pcie_aer_error_list[] = {
830     {
831         .name = "DLP",
832         .val = PCI_ERR_UNC_DLP,
833         .correctable = false,
834     }, {
835         .name = "SDN",
836         .val = PCI_ERR_UNC_SDN,
837         .correctable = false,
838     }, {
839         .name = "POISON_TLP",
840         .val = PCI_ERR_UNC_POISON_TLP,
841         .correctable = false,
842     }, {
843         .name = "FCP",
844         .val = PCI_ERR_UNC_FCP,
845         .correctable = false,
846     }, {
847         .name = "COMP_TIME",
848         .val = PCI_ERR_UNC_COMP_TIME,
849         .correctable = false,
850     }, {
851         .name = "COMP_ABORT",
852         .val = PCI_ERR_UNC_COMP_ABORT,
853         .correctable = false,
854     }, {
855         .name = "UNX_COMP",
856         .val = PCI_ERR_UNC_UNX_COMP,
857         .correctable = false,
858     }, {
859         .name = "RX_OVER",
860         .val = PCI_ERR_UNC_RX_OVER,
861         .correctable = false,
862     }, {
863         .name = "MALF_TLP",
864         .val = PCI_ERR_UNC_MALF_TLP,
865         .correctable = false,
866     }, {
867         .name = "ECRC",
868         .val = PCI_ERR_UNC_ECRC,
869         .correctable = false,
870     }, {
871         .name = "UNSUP",
872         .val = PCI_ERR_UNC_UNSUP,
873         .correctable = false,
874     }, {
875         .name = "ACSV",
876         .val = PCI_ERR_UNC_ACSV,
877         .correctable = false,
878     }, {
879         .name = "INTN",
880         .val = PCI_ERR_UNC_INTN,
881         .correctable = false,
882     }, {
883         .name = "MCBTLP",
884         .val = PCI_ERR_UNC_MCBTLP,
885         .correctable = false,
886     }, {
887         .name = "ATOP_EBLOCKED",
888         .val = PCI_ERR_UNC_ATOP_EBLOCKED,
889         .correctable = false,
890     }, {
891         .name = "TLP_PRF_BLOCKED",
892         .val = PCI_ERR_UNC_TLP_PRF_BLOCKED,
893         .correctable = false,
894     }, {
895         .name = "RCVR",
896         .val = PCI_ERR_COR_RCVR,
897         .correctable = true,
898     }, {
899         .name = "BAD_TLP",
900         .val = PCI_ERR_COR_BAD_TLP,
901         .correctable = true,
902     }, {
903         .name = "BAD_DLLP",
904         .val = PCI_ERR_COR_BAD_DLLP,
905         .correctable = true,
906     }, {
907         .name = "REP_ROLL",
908         .val = PCI_ERR_COR_REP_ROLL,
909         .correctable = true,
910     }, {
911         .name = "REP_TIMER",
912         .val = PCI_ERR_COR_REP_TIMER,
913         .correctable = true,
914     }, {
915         .name = "ADV_NONFATAL",
916         .val = PCI_ERR_COR_ADV_NONFATAL,
917         .correctable = true,
918     }, {
919         .name = "INTERNAL",
920         .val = PCI_ERR_COR_INTERNAL,
921         .correctable = true,
922     }, {
923         .name = "HL_OVERFLOW",
924         .val = PCI_ERR_COR_HL_OVERFLOW,
925         .correctable = true,
926     },
927 };
928 
929 static int pcie_aer_parse_error_string(const char *error_name,
930                                        uint32_t *status, bool *correctable)
931 {
932     int i;
933 
934     for (i = 0; i < ARRAY_SIZE(pcie_aer_error_list); i++) {
935         const  PCIEAERErrorName *e = &pcie_aer_error_list[i];
936         if (strcmp(error_name, e->name)) {
937             continue;
938         }
939 
940         *status = e->val;
941         *correctable = e->correctable;
942         return 0;
943     }
944     return -EINVAL;
945 }
946 
947 static int do_pcie_aer_inject_error(Monitor *mon,
948                                     const QDict *qdict, QObject **ret_data)
949 {
950     const char *id = qdict_get_str(qdict, "id");
951     const char *error_name;
952     uint32_t error_status;
953     bool correctable;
954     PCIDevice *dev;
955     PCIEAERErr err;
956     int ret;
957 
958     ret = pci_qdev_find_device(id, &dev);
959     if (ret < 0) {
960         monitor_printf(mon,
961                        "id or pci device path is invalid or device not "
962                        "found. %s\n", id);
963         return ret;
964     }
965     if (!pci_is_express(dev)) {
966         monitor_printf(mon, "the device doesn't support pci express. %s\n",
967                        id);
968         return -ENOSYS;
969     }
970 
971     error_name = qdict_get_str(qdict, "error_status");
972     if (pcie_aer_parse_error_string(error_name, &error_status, &correctable)) {
973         char *e = NULL;
974         error_status = strtoul(error_name, &e, 0);
975         correctable = qdict_get_try_bool(qdict, "correctable", false);
976         if (!e || *e != '\0') {
977             monitor_printf(mon, "invalid error status value. \"%s\"",
978                            error_name);
979             return -EINVAL;
980         }
981     }
982     err.status = error_status;
983     err.source_id = pci_requester_id(dev);
984 
985     err.flags = 0;
986     if (correctable) {
987         err.flags |= PCIE_AER_ERR_IS_CORRECTABLE;
988     }
989     if (qdict_get_try_bool(qdict, "advisory_non_fatal", false)) {
990         err.flags |= PCIE_AER_ERR_MAYBE_ADVISORY;
991     }
992     if (qdict_haskey(qdict, "header0")) {
993         err.flags |= PCIE_AER_ERR_HEADER_VALID;
994     }
995     if (qdict_haskey(qdict, "prefix0")) {
996         err.flags |= PCIE_AER_ERR_TLP_PREFIX_PRESENT;
997     }
998 
999     err.header[0] = qdict_get_try_int(qdict, "header0", 0);
1000     err.header[1] = qdict_get_try_int(qdict, "header1", 0);
1001     err.header[2] = qdict_get_try_int(qdict, "header2", 0);
1002     err.header[3] = qdict_get_try_int(qdict, "header3", 0);
1003 
1004     err.prefix[0] = qdict_get_try_int(qdict, "prefix0", 0);
1005     err.prefix[1] = qdict_get_try_int(qdict, "prefix1", 0);
1006     err.prefix[2] = qdict_get_try_int(qdict, "prefix2", 0);
1007     err.prefix[3] = qdict_get_try_int(qdict, "prefix3", 0);
1008 
1009     ret = pcie_aer_inject_error(dev, &err);
1010     *ret_data = qobject_from_jsonf("{'id': %s, "
1011                                    "'root_bus': %s, 'bus': %d, 'devfn': %d, "
1012                                    "'ret': %d}",
1013                                    id, pci_root_bus_path(dev),
1014                                    pci_bus_num(dev->bus), dev->devfn,
1015                                    ret);
1016     assert(*ret_data);
1017 
1018     return 0;
1019 }
1020 
1021 void hmp_pcie_aer_inject_error(Monitor *mon, const QDict *qdict)
1022 {
1023     QObject *data;
1024     int devfn;
1025 
1026     if (do_pcie_aer_inject_error(mon, qdict, &data) < 0) {
1027         return;
1028     }
1029 
1030     assert(qobject_type(data) == QTYPE_QDICT);
1031     qdict = qobject_to_qdict(data);
1032 
1033     devfn = (int)qdict_get_int(qdict, "devfn");
1034     monitor_printf(mon, "OK id: %s root bus: %s, bus: %x devfn: %x.%x\n",
1035                    qdict_get_str(qdict, "id"),
1036                    qdict_get_str(qdict, "root_bus"),
1037                    (int) qdict_get_int(qdict, "bus"),
1038                    PCI_SLOT(devfn), PCI_FUNC(devfn));
1039 }
1040