xref: /openbmc/qemu/hw/pci/pcie_aer.c (revision 9cdd2a736b99bad19fb4f88d2230c75f680c31ec)
1 /*
2  * pcie_aer.c
3  *
4  * Copyright (c) 2010 Isaku Yamahata <yamahata at valinux co jp>
5  *                    VA Linux Systems Japan K.K.
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License along
18  * with this program; if not, see <http://www.gnu.org/licenses/>.
19  */
20 
21 #include "qemu/osdep.h"
22 #include "sysemu/sysemu.h"
23 #include "qapi/qmp/qdict.h"
24 #include "monitor/monitor.h"
25 #include "hw/pci/pci_bridge.h"
26 #include "hw/pci/pcie.h"
27 #include "hw/pci/msix.h"
28 #include "hw/pci/msi.h"
29 #include "hw/pci/pci_bus.h"
30 #include "hw/pci/pcie_regs.h"
31 #include "qapi/error.h"
32 
33 //#define DEBUG_PCIE
34 #ifdef DEBUG_PCIE
35 # define PCIE_DPRINTF(fmt, ...)                                         \
36     fprintf(stderr, "%s:%d " fmt, __func__, __LINE__, ## __VA_ARGS__)
37 #else
38 # define PCIE_DPRINTF(fmt, ...) do {} while (0)
39 #endif
40 #define PCIE_DEV_PRINTF(dev, fmt, ...)                                  \
41     PCIE_DPRINTF("%s:%x "fmt, (dev)->name, (dev)->devfn, ## __VA_ARGS__)
42 
43 #define PCI_ERR_SRC_COR_OFFS    0
44 #define PCI_ERR_SRC_UNCOR_OFFS  2
45 
46 typedef struct PCIEErrorDetails {
47     const char *id;
48     const char *root_bus;
49     int bus;
50     int devfn;
51 } PCIEErrorDetails;
52 
53 /* From 6.2.7 Error Listing and Rules. Table 6-2, 6-3 and 6-4 */
54 static uint32_t pcie_aer_uncor_default_severity(uint32_t status)
55 {
56     switch (status) {
57     case PCI_ERR_UNC_INTN:
58     case PCI_ERR_UNC_DLP:
59     case PCI_ERR_UNC_SDN:
60     case PCI_ERR_UNC_RX_OVER:
61     case PCI_ERR_UNC_FCP:
62     case PCI_ERR_UNC_MALF_TLP:
63         return PCI_ERR_ROOT_CMD_FATAL_EN;
64     case PCI_ERR_UNC_POISON_TLP:
65     case PCI_ERR_UNC_ECRC:
66     case PCI_ERR_UNC_UNSUP:
67     case PCI_ERR_UNC_COMP_TIME:
68     case PCI_ERR_UNC_COMP_ABORT:
69     case PCI_ERR_UNC_UNX_COMP:
70     case PCI_ERR_UNC_ACSV:
71     case PCI_ERR_UNC_MCBTLP:
72     case PCI_ERR_UNC_ATOP_EBLOCKED:
73     case PCI_ERR_UNC_TLP_PRF_BLOCKED:
74         return PCI_ERR_ROOT_CMD_NONFATAL_EN;
75     default:
76         abort();
77         break;
78     }
79     return PCI_ERR_ROOT_CMD_FATAL_EN;
80 }
81 
82 static int aer_log_add_err(PCIEAERLog *aer_log, const PCIEAERErr *err)
83 {
84     if (aer_log->log_num == aer_log->log_max) {
85         return -1;
86     }
87     memcpy(&aer_log->log[aer_log->log_num], err, sizeof *err);
88     aer_log->log_num++;
89     return 0;
90 }
91 
92 static void aer_log_del_err(PCIEAERLog *aer_log, PCIEAERErr *err)
93 {
94     assert(aer_log->log_num);
95     *err = aer_log->log[0];
96     aer_log->log_num--;
97     memmove(&aer_log->log[0], &aer_log->log[1],
98             aer_log->log_num * sizeof *err);
99 }
100 
101 static void aer_log_clear_all_err(PCIEAERLog *aer_log)
102 {
103     aer_log->log_num = 0;
104 }
105 
106 int pcie_aer_init(PCIDevice *dev, uint8_t cap_ver, uint16_t offset,
107                   uint16_t size, Error **errp)
108 {
109     pcie_add_capability(dev, PCI_EXT_CAP_ID_ERR, cap_ver,
110                         offset, size);
111     dev->exp.aer_cap = offset;
112 
113     /* clip down the value to avoid unreasonable memory usage */
114     if (dev->exp.aer_log.log_max > PCIE_AER_LOG_MAX_LIMIT) {
115         error_setg(errp, "Invalid aer_log_max %d. The max number of aer log "
116                 "is %d", dev->exp.aer_log.log_max, PCIE_AER_LOG_MAX_LIMIT);
117         return -EINVAL;
118     }
119     dev->exp.aer_log.log = g_malloc0(sizeof dev->exp.aer_log.log[0] *
120                                         dev->exp.aer_log.log_max);
121 
122     pci_set_long(dev->w1cmask + offset + PCI_ERR_UNCOR_STATUS,
123                  PCI_ERR_UNC_SUPPORTED);
124 
125     pci_set_long(dev->config + offset + PCI_ERR_UNCOR_SEVER,
126                  PCI_ERR_UNC_SEVERITY_DEFAULT);
127     pci_set_long(dev->wmask + offset + PCI_ERR_UNCOR_SEVER,
128                  PCI_ERR_UNC_SUPPORTED);
129 
130     pci_long_test_and_set_mask(dev->w1cmask + offset + PCI_ERR_COR_STATUS,
131                                PCI_ERR_COR_SUPPORTED);
132 
133     pci_set_long(dev->config + offset + PCI_ERR_COR_MASK,
134                  PCI_ERR_COR_MASK_DEFAULT);
135     pci_set_long(dev->wmask + offset + PCI_ERR_COR_MASK,
136                  PCI_ERR_COR_SUPPORTED);
137 
138     /* capabilities and control. multiple header logging is supported */
139     if (dev->exp.aer_log.log_max > 0) {
140         pci_set_long(dev->config + offset + PCI_ERR_CAP,
141                      PCI_ERR_CAP_ECRC_GENC | PCI_ERR_CAP_ECRC_CHKC |
142                      PCI_ERR_CAP_MHRC);
143         pci_set_long(dev->wmask + offset + PCI_ERR_CAP,
144                      PCI_ERR_CAP_ECRC_GENE | PCI_ERR_CAP_ECRC_CHKE |
145                      PCI_ERR_CAP_MHRE);
146     } else {
147         pci_set_long(dev->config + offset + PCI_ERR_CAP,
148                      PCI_ERR_CAP_ECRC_GENC | PCI_ERR_CAP_ECRC_CHKC);
149         pci_set_long(dev->wmask + offset + PCI_ERR_CAP,
150                      PCI_ERR_CAP_ECRC_GENE | PCI_ERR_CAP_ECRC_CHKE);
151     }
152 
153     switch (pcie_cap_get_type(dev)) {
154     case PCI_EXP_TYPE_ROOT_PORT:
155         /* this case will be set by pcie_aer_root_init() */
156         /* fallthrough */
157     case PCI_EXP_TYPE_DOWNSTREAM:
158     case PCI_EXP_TYPE_UPSTREAM:
159         pci_word_test_and_set_mask(dev->wmask + PCI_BRIDGE_CONTROL,
160                                    PCI_BRIDGE_CTL_SERR);
161         pci_long_test_and_set_mask(dev->w1cmask + PCI_STATUS,
162                                    PCI_SEC_STATUS_RCV_SYSTEM_ERROR);
163         break;
164     default:
165         /* nothing */
166         break;
167     }
168     return 0;
169 }
170 
171 void pcie_aer_exit(PCIDevice *dev)
172 {
173     g_free(dev->exp.aer_log.log);
174 }
175 
176 static void pcie_aer_update_uncor_status(PCIDevice *dev)
177 {
178     uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
179     PCIEAERLog *aer_log = &dev->exp.aer_log;
180 
181     uint16_t i;
182     for (i = 0; i < aer_log->log_num; i++) {
183         pci_long_test_and_set_mask(aer_cap + PCI_ERR_UNCOR_STATUS,
184                                    dev->exp.aer_log.log[i].status);
185     }
186 }
187 
188 /*
189  * return value:
190  * true: error message needs to be sent up
191  * false: error message is masked
192  *
193  * 6.2.6 Error Message Control
194  * Figure 6-3
195  * all pci express devices part
196  */
197 static bool
198 pcie_aer_msg_alldev(PCIDevice *dev, const PCIEAERMsg *msg)
199 {
200     if (!(pcie_aer_msg_is_uncor(msg) &&
201           (pci_get_word(dev->config + PCI_COMMAND) & PCI_COMMAND_SERR))) {
202         return false;
203     }
204 
205     /* Signaled System Error
206      *
207      * 7.5.1.1 Command register
208      * Bit 8 SERR# Enable
209      *
210      * When Set, this bit enables reporting of Non-fatal and Fatal
211      * errors detected by the Function to the Root Complex. Note that
212      * errors are reported if enabled either through this bit or through
213      * the PCI Express specific bits in the Device Control register (see
214      * Section 7.8.4).
215      */
216     pci_word_test_and_set_mask(dev->config + PCI_STATUS,
217                                PCI_STATUS_SIG_SYSTEM_ERROR);
218 
219     if (!(msg->severity &
220           pci_get_word(dev->config + dev->exp.exp_cap + PCI_EXP_DEVCTL))) {
221         return false;
222     }
223 
224     /* send up error message */
225     return true;
226 }
227 
228 /*
229  * return value:
230  * true: error message is sent up
231  * false: error message is masked
232  *
233  * 6.2.6 Error Message Control
234  * Figure 6-3
235  * virtual pci bridge part
236  */
237 static bool pcie_aer_msg_vbridge(PCIDevice *dev, const PCIEAERMsg *msg)
238 {
239     uint16_t bridge_control = pci_get_word(dev->config + PCI_BRIDGE_CONTROL);
240 
241     if (pcie_aer_msg_is_uncor(msg)) {
242         /* Received System Error */
243         pci_word_test_and_set_mask(dev->config + PCI_SEC_STATUS,
244                                    PCI_SEC_STATUS_RCV_SYSTEM_ERROR);
245     }
246 
247     if (!(bridge_control & PCI_BRIDGE_CTL_SERR)) {
248         return false;
249     }
250     return true;
251 }
252 
253 void pcie_aer_root_set_vector(PCIDevice *dev, unsigned int vector)
254 {
255     uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
256     assert(vector < PCI_ERR_ROOT_IRQ_MAX);
257     pci_long_test_and_clear_mask(aer_cap + PCI_ERR_ROOT_STATUS,
258                                  PCI_ERR_ROOT_IRQ);
259     pci_long_test_and_set_mask(aer_cap + PCI_ERR_ROOT_STATUS,
260                                vector << PCI_ERR_ROOT_IRQ_SHIFT);
261 }
262 
263 static unsigned int pcie_aer_root_get_vector(PCIDevice *dev)
264 {
265     uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
266     uint32_t root_status = pci_get_long(aer_cap + PCI_ERR_ROOT_STATUS);
267     return (root_status & PCI_ERR_ROOT_IRQ) >> PCI_ERR_ROOT_IRQ_SHIFT;
268 }
269 
270 /* Given a status register, get corresponding bits in the command register */
271 static uint32_t pcie_aer_status_to_cmd(uint32_t status)
272 {
273     uint32_t cmd = 0;
274     if (status & PCI_ERR_ROOT_COR_RCV) {
275         cmd |= PCI_ERR_ROOT_CMD_COR_EN;
276     }
277     if (status & PCI_ERR_ROOT_NONFATAL_RCV) {
278         cmd |= PCI_ERR_ROOT_CMD_NONFATAL_EN;
279     }
280     if (status & PCI_ERR_ROOT_FATAL_RCV) {
281         cmd |= PCI_ERR_ROOT_CMD_FATAL_EN;
282     }
283     return cmd;
284 }
285 
286 static void pcie_aer_root_notify(PCIDevice *dev)
287 {
288     if (msix_enabled(dev)) {
289         msix_notify(dev, pcie_aer_root_get_vector(dev));
290     } else if (msi_enabled(dev)) {
291         msi_notify(dev, pcie_aer_root_get_vector(dev));
292     } else {
293         pci_irq_assert(dev);
294     }
295 }
296 
297 /*
298  * 6.2.6 Error Message Control
299  * Figure 6-3
300  * root port part
301  */
302 static void pcie_aer_msg_root_port(PCIDevice *dev, const PCIEAERMsg *msg)
303 {
304     uint16_t cmd;
305     uint8_t *aer_cap;
306     uint32_t root_cmd;
307     uint32_t root_status, prev_status;
308 
309     cmd = pci_get_word(dev->config + PCI_COMMAND);
310     aer_cap = dev->config + dev->exp.aer_cap;
311     root_cmd = pci_get_long(aer_cap + PCI_ERR_ROOT_COMMAND);
312     prev_status = root_status = pci_get_long(aer_cap + PCI_ERR_ROOT_STATUS);
313 
314     if (cmd & PCI_COMMAND_SERR) {
315         /* System Error.
316          *
317          * The way to report System Error is platform specific and
318          * it isn't implemented in qemu right now.
319          * So just discard the error for now.
320          * OS which cares of aer would receive errors via
321          * native aer mechanims, so this wouldn't matter.
322          */
323     }
324 
325     /* Errro Message Received: Root Error Status register */
326     switch (msg->severity) {
327     case PCI_ERR_ROOT_CMD_COR_EN:
328         if (root_status & PCI_ERR_ROOT_COR_RCV) {
329             root_status |= PCI_ERR_ROOT_MULTI_COR_RCV;
330         } else {
331             pci_set_word(aer_cap + PCI_ERR_ROOT_ERR_SRC + PCI_ERR_SRC_COR_OFFS,
332                          msg->source_id);
333         }
334         root_status |= PCI_ERR_ROOT_COR_RCV;
335         break;
336     case PCI_ERR_ROOT_CMD_NONFATAL_EN:
337         root_status |= PCI_ERR_ROOT_NONFATAL_RCV;
338         break;
339     case PCI_ERR_ROOT_CMD_FATAL_EN:
340         if (!(root_status & PCI_ERR_ROOT_UNCOR_RCV)) {
341             root_status |= PCI_ERR_ROOT_FIRST_FATAL;
342         }
343         root_status |= PCI_ERR_ROOT_FATAL_RCV;
344         break;
345     default:
346         abort();
347         break;
348     }
349     if (pcie_aer_msg_is_uncor(msg)) {
350         if (root_status & PCI_ERR_ROOT_UNCOR_RCV) {
351             root_status |= PCI_ERR_ROOT_MULTI_UNCOR_RCV;
352         } else {
353             pci_set_word(aer_cap + PCI_ERR_ROOT_ERR_SRC +
354                          PCI_ERR_SRC_UNCOR_OFFS, msg->source_id);
355         }
356         root_status |= PCI_ERR_ROOT_UNCOR_RCV;
357     }
358     pci_set_long(aer_cap + PCI_ERR_ROOT_STATUS, root_status);
359 
360     /* 6.2.4.1.2 Interrupt Generation */
361     /* All the above did was set some bits in the status register.
362      * Specifically these that match message severity.
363      * The below code relies on this fact. */
364     if (!(root_cmd & msg->severity) ||
365         (pcie_aer_status_to_cmd(prev_status) & root_cmd)) {
366         /* Condition is not being set or was already true so nothing to do. */
367         return;
368     }
369 
370     pcie_aer_root_notify(dev);
371 }
372 
373 /*
374  * 6.2.6 Error Message Control Figure 6-3
375  *
376  * Walk up the bus tree from the device, propagate the error message.
377  */
378 static void pcie_aer_msg(PCIDevice *dev, const PCIEAERMsg *msg)
379 {
380     uint8_t type;
381 
382     while (dev) {
383         if (!pci_is_express(dev)) {
384             /* just ignore it */
385             /* TODO: Shouldn't we set PCI_STATUS_SIG_SYSTEM_ERROR?
386              * Consider e.g. a PCI bridge above a PCI Express device. */
387             return;
388         }
389 
390         type = pcie_cap_get_type(dev);
391         if ((type == PCI_EXP_TYPE_ROOT_PORT ||
392             type == PCI_EXP_TYPE_UPSTREAM ||
393             type == PCI_EXP_TYPE_DOWNSTREAM) &&
394             !pcie_aer_msg_vbridge(dev, msg)) {
395                 return;
396         }
397         if (!pcie_aer_msg_alldev(dev, msg)) {
398             return;
399         }
400         if (type == PCI_EXP_TYPE_ROOT_PORT) {
401             pcie_aer_msg_root_port(dev, msg);
402             /* Root port can notify system itself,
403                or send the error message to root complex event collector. */
404             /*
405              * if root port is associated with an event collector,
406              * return the root complex event collector here.
407              * For now root complex event collector isn't supported.
408              */
409             return;
410         }
411         dev = pci_bridge_get_device(pci_get_bus(dev));
412     }
413 }
414 
415 static void pcie_aer_update_log(PCIDevice *dev, const PCIEAERErr *err)
416 {
417     uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
418     uint8_t first_bit = ctz32(err->status);
419     uint32_t errcap = pci_get_long(aer_cap + PCI_ERR_CAP);
420     int i;
421 
422     assert(err->status);
423     assert(!(err->status & (err->status - 1)));
424 
425     errcap &= ~(PCI_ERR_CAP_FEP_MASK | PCI_ERR_CAP_TLP);
426     errcap |= PCI_ERR_CAP_FEP(first_bit);
427 
428     if (err->flags & PCIE_AER_ERR_HEADER_VALID) {
429         for (i = 0; i < ARRAY_SIZE(err->header); ++i) {
430             /* 7.10.8 Header Log Register */
431             uint8_t *header_log =
432                 aer_cap + PCI_ERR_HEADER_LOG + i * sizeof err->header[0];
433             stl_be_p(header_log, err->header[i]);
434         }
435     } else {
436         assert(!(err->flags & PCIE_AER_ERR_TLP_PREFIX_PRESENT));
437         memset(aer_cap + PCI_ERR_HEADER_LOG, 0, PCI_ERR_HEADER_LOG_SIZE);
438     }
439 
440     if ((err->flags & PCIE_AER_ERR_TLP_PREFIX_PRESENT) &&
441         (pci_get_long(dev->config + dev->exp.exp_cap + PCI_EXP_DEVCAP2) &
442          PCI_EXP_DEVCAP2_EETLPP)) {
443         for (i = 0; i < ARRAY_SIZE(err->prefix); ++i) {
444             /* 7.10.12 tlp prefix log register */
445             uint8_t *prefix_log =
446                 aer_cap + PCI_ERR_TLP_PREFIX_LOG + i * sizeof err->prefix[0];
447             stl_be_p(prefix_log, err->prefix[i]);
448         }
449         errcap |= PCI_ERR_CAP_TLP;
450     } else {
451         memset(aer_cap + PCI_ERR_TLP_PREFIX_LOG, 0,
452                PCI_ERR_TLP_PREFIX_LOG_SIZE);
453     }
454     pci_set_long(aer_cap + PCI_ERR_CAP, errcap);
455 }
456 
457 static void pcie_aer_clear_log(PCIDevice *dev)
458 {
459     uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
460 
461     pci_long_test_and_clear_mask(aer_cap + PCI_ERR_CAP,
462                                  PCI_ERR_CAP_FEP_MASK | PCI_ERR_CAP_TLP);
463 
464     memset(aer_cap + PCI_ERR_HEADER_LOG, 0, PCI_ERR_HEADER_LOG_SIZE);
465     memset(aer_cap + PCI_ERR_TLP_PREFIX_LOG, 0, PCI_ERR_TLP_PREFIX_LOG_SIZE);
466 }
467 
468 static void pcie_aer_clear_error(PCIDevice *dev)
469 {
470     uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
471     uint32_t errcap = pci_get_long(aer_cap + PCI_ERR_CAP);
472     PCIEAERLog *aer_log = &dev->exp.aer_log;
473     PCIEAERErr err;
474 
475     if (!(errcap & PCI_ERR_CAP_MHRE) || !aer_log->log_num) {
476         pcie_aer_clear_log(dev);
477         return;
478     }
479 
480     /*
481      * If more errors are queued, set corresponding bits in uncorrectable
482      * error status.
483      * We emulate uncorrectable error status register as W1CS.
484      * So set bit in uncorrectable error status here again for multiple
485      * error recording support.
486      *
487      * 6.2.4.2 Multiple Error Handling(Advanced Error Reporting Capability)
488      */
489     pcie_aer_update_uncor_status(dev);
490 
491     aer_log_del_err(aer_log, &err);
492     pcie_aer_update_log(dev, &err);
493 }
494 
495 static int pcie_aer_record_error(PCIDevice *dev,
496                                  const PCIEAERErr *err)
497 {
498     uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
499     uint32_t errcap = pci_get_long(aer_cap + PCI_ERR_CAP);
500     int fep = PCI_ERR_CAP_FEP(errcap);
501 
502     assert(err->status);
503     assert(!(err->status & (err->status - 1)));
504 
505     if (errcap & PCI_ERR_CAP_MHRE &&
506         (pci_get_long(aer_cap + PCI_ERR_UNCOR_STATUS) & (1U << fep))) {
507         /*  Not first error. queue error */
508         if (aer_log_add_err(&dev->exp.aer_log, err) < 0) {
509             /* overflow */
510             return -1;
511         }
512         return 0;
513     }
514 
515     pcie_aer_update_log(dev, err);
516     return 0;
517 }
518 
519 typedef struct PCIEAERInject {
520     PCIDevice *dev;
521     uint8_t *aer_cap;
522     const PCIEAERErr *err;
523     uint16_t devctl;
524     uint16_t devsta;
525     uint32_t error_status;
526     bool unsupported_request;
527     bool log_overflow;
528     PCIEAERMsg msg;
529 } PCIEAERInject;
530 
531 static bool pcie_aer_inject_cor_error(PCIEAERInject *inj,
532                                       uint32_t uncor_status,
533                                       bool is_advisory_nonfatal)
534 {
535     PCIDevice *dev = inj->dev;
536 
537     inj->devsta |= PCI_EXP_DEVSTA_CED;
538     if (inj->unsupported_request) {
539         inj->devsta |= PCI_EXP_DEVSTA_URD;
540     }
541     pci_set_word(dev->config + dev->exp.exp_cap + PCI_EXP_DEVSTA, inj->devsta);
542 
543     if (inj->aer_cap) {
544         uint32_t mask;
545         pci_long_test_and_set_mask(inj->aer_cap + PCI_ERR_COR_STATUS,
546                                    inj->error_status);
547         mask = pci_get_long(inj->aer_cap + PCI_ERR_COR_MASK);
548         if (mask & inj->error_status) {
549             return false;
550         }
551         if (is_advisory_nonfatal) {
552             uint32_t uncor_mask =
553                 pci_get_long(inj->aer_cap + PCI_ERR_UNCOR_MASK);
554             if (!(uncor_mask & uncor_status)) {
555                 inj->log_overflow = !!pcie_aer_record_error(dev, inj->err);
556             }
557             pci_long_test_and_set_mask(inj->aer_cap + PCI_ERR_UNCOR_STATUS,
558                                        uncor_status);
559         }
560     }
561 
562     if (inj->unsupported_request && !(inj->devctl & PCI_EXP_DEVCTL_URRE)) {
563         return false;
564     }
565     if (!(inj->devctl & PCI_EXP_DEVCTL_CERE)) {
566         return false;
567     }
568 
569     inj->msg.severity = PCI_ERR_ROOT_CMD_COR_EN;
570     return true;
571 }
572 
573 static bool pcie_aer_inject_uncor_error(PCIEAERInject *inj, bool is_fatal)
574 {
575     PCIDevice *dev = inj->dev;
576     uint16_t cmd;
577 
578     if (is_fatal) {
579         inj->devsta |= PCI_EXP_DEVSTA_FED;
580     } else {
581         inj->devsta |= PCI_EXP_DEVSTA_NFED;
582     }
583     if (inj->unsupported_request) {
584         inj->devsta |= PCI_EXP_DEVSTA_URD;
585     }
586     pci_set_long(dev->config + dev->exp.exp_cap + PCI_EXP_DEVSTA, inj->devsta);
587 
588     if (inj->aer_cap) {
589         uint32_t mask = pci_get_long(inj->aer_cap + PCI_ERR_UNCOR_MASK);
590         if (mask & inj->error_status) {
591             pci_long_test_and_set_mask(inj->aer_cap + PCI_ERR_UNCOR_STATUS,
592                                        inj->error_status);
593             return false;
594         }
595 
596         inj->log_overflow = !!pcie_aer_record_error(dev, inj->err);
597         pci_long_test_and_set_mask(inj->aer_cap + PCI_ERR_UNCOR_STATUS,
598                                    inj->error_status);
599     }
600 
601     cmd = pci_get_word(dev->config + PCI_COMMAND);
602     if (inj->unsupported_request &&
603         !(inj->devctl & PCI_EXP_DEVCTL_URRE) && !(cmd & PCI_COMMAND_SERR)) {
604         return false;
605     }
606     if (is_fatal) {
607         if (!((cmd & PCI_COMMAND_SERR) ||
608               (inj->devctl & PCI_EXP_DEVCTL_FERE))) {
609             return false;
610         }
611         inj->msg.severity = PCI_ERR_ROOT_CMD_FATAL_EN;
612     } else {
613         if (!((cmd & PCI_COMMAND_SERR) ||
614               (inj->devctl & PCI_EXP_DEVCTL_NFERE))) {
615             return false;
616         }
617         inj->msg.severity = PCI_ERR_ROOT_CMD_NONFATAL_EN;
618     }
619     return true;
620 }
621 
622 /*
623  * non-Function specific error must be recorded in all functions.
624  * It is the responsibility of the caller of this function.
625  * It is also caller's responsibility to determine which function should
626  * report the error.
627  *
628  * 6.2.4 Error Logging
629  * 6.2.5 Sequence of Device Error Signaling and Logging Operations
630  * Figure 6-2: Flowchart Showing Sequence of Device Error Signaling and Logging
631  *             Operations
632  */
633 static int pcie_aer_inject_error(PCIDevice *dev, const PCIEAERErr *err)
634 {
635     uint8_t *aer_cap = NULL;
636     uint16_t devctl = 0;
637     uint16_t devsta = 0;
638     uint32_t error_status = err->status;
639     PCIEAERInject inj;
640 
641     if (!pci_is_express(dev)) {
642         return -ENOSYS;
643     }
644 
645     if (err->flags & PCIE_AER_ERR_IS_CORRECTABLE) {
646         error_status &= PCI_ERR_COR_SUPPORTED;
647     } else {
648         error_status &= PCI_ERR_UNC_SUPPORTED;
649     }
650 
651     /* invalid status bit. one and only one bit must be set */
652     if (!error_status || (error_status & (error_status - 1))) {
653         return -EINVAL;
654     }
655 
656     if (dev->exp.aer_cap) {
657         uint8_t *exp_cap = dev->config + dev->exp.exp_cap;
658         aer_cap = dev->config + dev->exp.aer_cap;
659         devctl = pci_get_long(exp_cap + PCI_EXP_DEVCTL);
660         devsta = pci_get_long(exp_cap + PCI_EXP_DEVSTA);
661     }
662 
663     inj.dev = dev;
664     inj.aer_cap = aer_cap;
665     inj.err = err;
666     inj.devctl = devctl;
667     inj.devsta = devsta;
668     inj.error_status = error_status;
669     inj.unsupported_request = !(err->flags & PCIE_AER_ERR_IS_CORRECTABLE) &&
670         err->status == PCI_ERR_UNC_UNSUP;
671     inj.log_overflow = false;
672 
673     if (err->flags & PCIE_AER_ERR_IS_CORRECTABLE) {
674         if (!pcie_aer_inject_cor_error(&inj, 0, false)) {
675             return 0;
676         }
677     } else {
678         bool is_fatal =
679             pcie_aer_uncor_default_severity(error_status) ==
680             PCI_ERR_ROOT_CMD_FATAL_EN;
681         if (aer_cap) {
682             is_fatal =
683                 error_status & pci_get_long(aer_cap + PCI_ERR_UNCOR_SEVER);
684         }
685         if (!is_fatal && (err->flags & PCIE_AER_ERR_MAYBE_ADVISORY)) {
686             inj.error_status = PCI_ERR_COR_ADV_NONFATAL;
687             if (!pcie_aer_inject_cor_error(&inj, error_status, true)) {
688                 return 0;
689             }
690         } else {
691             if (!pcie_aer_inject_uncor_error(&inj, is_fatal)) {
692                 return 0;
693             }
694         }
695     }
696 
697     /* send up error message */
698     inj.msg.source_id = err->source_id;
699     pcie_aer_msg(dev, &inj.msg);
700 
701     if (inj.log_overflow) {
702         PCIEAERErr header_log_overflow = {
703             .status = PCI_ERR_COR_HL_OVERFLOW,
704             .flags = PCIE_AER_ERR_IS_CORRECTABLE,
705         };
706         int ret = pcie_aer_inject_error(dev, &header_log_overflow);
707         assert(!ret);
708     }
709     return 0;
710 }
711 
712 void pcie_aer_write_config(PCIDevice *dev,
713                            uint32_t addr, uint32_t val, int len)
714 {
715     uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
716     uint32_t errcap = pci_get_long(aer_cap + PCI_ERR_CAP);
717     uint32_t first_error = 1U << PCI_ERR_CAP_FEP(errcap);
718     uint32_t uncorsta = pci_get_long(aer_cap + PCI_ERR_UNCOR_STATUS);
719 
720     /* uncorrectable error */
721     if (!(uncorsta & first_error)) {
722         /* the bit that corresponds to the first error is cleared */
723         pcie_aer_clear_error(dev);
724     } else if (errcap & PCI_ERR_CAP_MHRE) {
725         /* When PCI_ERR_CAP_MHRE is enabled and the first error isn't cleared
726          * nothing should happen. So we have to revert the modification to
727          * the register.
728          */
729         pcie_aer_update_uncor_status(dev);
730     } else {
731         /* capability & control
732          * PCI_ERR_CAP_MHRE might be cleared, so clear of header log.
733          */
734         aer_log_clear_all_err(&dev->exp.aer_log);
735     }
736 }
737 
738 void pcie_aer_root_init(PCIDevice *dev)
739 {
740     uint16_t pos = dev->exp.aer_cap;
741 
742     pci_set_long(dev->wmask + pos + PCI_ERR_ROOT_COMMAND,
743                  PCI_ERR_ROOT_CMD_EN_MASK);
744     pci_set_long(dev->w1cmask + pos + PCI_ERR_ROOT_STATUS,
745                  PCI_ERR_ROOT_STATUS_REPORT_MASK);
746     /* PCI_ERR_ROOT_IRQ is RO but devices change it using a
747      * device-specific method.
748      */
749     pci_set_long(dev->cmask + pos + PCI_ERR_ROOT_STATUS,
750                  ~PCI_ERR_ROOT_IRQ);
751 }
752 
753 void pcie_aer_root_reset(PCIDevice *dev)
754 {
755     uint8_t* aer_cap = dev->config + dev->exp.aer_cap;
756 
757     pci_set_long(aer_cap + PCI_ERR_ROOT_COMMAND, 0);
758 
759     /*
760      * Advanced Error Interrupt Message Number in Root Error Status Register
761      * must be updated by chip dependent code because it's chip dependent
762      * which number is used.
763      */
764 }
765 
766 void pcie_aer_root_write_config(PCIDevice *dev,
767                                 uint32_t addr, uint32_t val, int len,
768                                 uint32_t root_cmd_prev)
769 {
770     uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
771     uint32_t root_status = pci_get_long(aer_cap + PCI_ERR_ROOT_STATUS);
772     uint32_t enabled_cmd = pcie_aer_status_to_cmd(root_status);
773     uint32_t root_cmd = pci_get_long(aer_cap + PCI_ERR_ROOT_COMMAND);
774     /* 6.2.4.1.2 Interrupt Generation */
775     if (!msix_enabled(dev) && !msi_enabled(dev)) {
776         pci_set_irq(dev, !!(root_cmd & enabled_cmd));
777         return;
778     }
779 
780     if ((root_cmd_prev & enabled_cmd) || !(root_cmd & enabled_cmd)) {
781         /* Send MSI on transition from false to true. */
782         return;
783     }
784 
785     pcie_aer_root_notify(dev);
786 }
787 
788 static const VMStateDescription vmstate_pcie_aer_err = {
789     .name = "PCIE_AER_ERROR",
790     .version_id = 1,
791     .minimum_version_id = 1,
792     .fields = (VMStateField[]) {
793         VMSTATE_UINT32(status, PCIEAERErr),
794         VMSTATE_UINT16(source_id, PCIEAERErr),
795         VMSTATE_UINT16(flags, PCIEAERErr),
796         VMSTATE_UINT32_ARRAY(header, PCIEAERErr, 4),
797         VMSTATE_UINT32_ARRAY(prefix, PCIEAERErr, 4),
798         VMSTATE_END_OF_LIST()
799     }
800 };
801 
802 static bool pcie_aer_state_log_num_valid(void *opaque, int version_id)
803 {
804     PCIEAERLog *s = opaque;
805 
806     return s->log_num <= s->log_max;
807 }
808 
809 const VMStateDescription vmstate_pcie_aer_log = {
810     .name = "PCIE_AER_ERROR_LOG",
811     .version_id = 1,
812     .minimum_version_id = 1,
813     .fields = (VMStateField[]) {
814         VMSTATE_UINT16(log_num, PCIEAERLog),
815         VMSTATE_UINT16_EQUAL(log_max, PCIEAERLog, NULL),
816         VMSTATE_VALIDATE("log_num <= log_max", pcie_aer_state_log_num_valid),
817         VMSTATE_STRUCT_VARRAY_POINTER_UINT16(log, PCIEAERLog, log_num,
818                               vmstate_pcie_aer_err, PCIEAERErr),
819         VMSTATE_END_OF_LIST()
820     }
821 };
822 
823 typedef struct PCIEAERErrorName {
824     const char *name;
825     uint32_t val;
826     bool correctable;
827 } PCIEAERErrorName;
828 
829 /*
830  * AER error name -> value conversion table
831  * This naming scheme is same to linux aer-injection tool.
832  */
833 static const struct PCIEAERErrorName pcie_aer_error_list[] = {
834     {
835         .name = "DLP",
836         .val = PCI_ERR_UNC_DLP,
837         .correctable = false,
838     }, {
839         .name = "SDN",
840         .val = PCI_ERR_UNC_SDN,
841         .correctable = false,
842     }, {
843         .name = "POISON_TLP",
844         .val = PCI_ERR_UNC_POISON_TLP,
845         .correctable = false,
846     }, {
847         .name = "FCP",
848         .val = PCI_ERR_UNC_FCP,
849         .correctable = false,
850     }, {
851         .name = "COMP_TIME",
852         .val = PCI_ERR_UNC_COMP_TIME,
853         .correctable = false,
854     }, {
855         .name = "COMP_ABORT",
856         .val = PCI_ERR_UNC_COMP_ABORT,
857         .correctable = false,
858     }, {
859         .name = "UNX_COMP",
860         .val = PCI_ERR_UNC_UNX_COMP,
861         .correctable = false,
862     }, {
863         .name = "RX_OVER",
864         .val = PCI_ERR_UNC_RX_OVER,
865         .correctable = false,
866     }, {
867         .name = "MALF_TLP",
868         .val = PCI_ERR_UNC_MALF_TLP,
869         .correctable = false,
870     }, {
871         .name = "ECRC",
872         .val = PCI_ERR_UNC_ECRC,
873         .correctable = false,
874     }, {
875         .name = "UNSUP",
876         .val = PCI_ERR_UNC_UNSUP,
877         .correctable = false,
878     }, {
879         .name = "ACSV",
880         .val = PCI_ERR_UNC_ACSV,
881         .correctable = false,
882     }, {
883         .name = "INTN",
884         .val = PCI_ERR_UNC_INTN,
885         .correctable = false,
886     }, {
887         .name = "MCBTLP",
888         .val = PCI_ERR_UNC_MCBTLP,
889         .correctable = false,
890     }, {
891         .name = "ATOP_EBLOCKED",
892         .val = PCI_ERR_UNC_ATOP_EBLOCKED,
893         .correctable = false,
894     }, {
895         .name = "TLP_PRF_BLOCKED",
896         .val = PCI_ERR_UNC_TLP_PRF_BLOCKED,
897         .correctable = false,
898     }, {
899         .name = "RCVR",
900         .val = PCI_ERR_COR_RCVR,
901         .correctable = true,
902     }, {
903         .name = "BAD_TLP",
904         .val = PCI_ERR_COR_BAD_TLP,
905         .correctable = true,
906     }, {
907         .name = "BAD_DLLP",
908         .val = PCI_ERR_COR_BAD_DLLP,
909         .correctable = true,
910     }, {
911         .name = "REP_ROLL",
912         .val = PCI_ERR_COR_REP_ROLL,
913         .correctable = true,
914     }, {
915         .name = "REP_TIMER",
916         .val = PCI_ERR_COR_REP_TIMER,
917         .correctable = true,
918     }, {
919         .name = "ADV_NONFATAL",
920         .val = PCI_ERR_COR_ADV_NONFATAL,
921         .correctable = true,
922     }, {
923         .name = "INTERNAL",
924         .val = PCI_ERR_COR_INTERNAL,
925         .correctable = true,
926     }, {
927         .name = "HL_OVERFLOW",
928         .val = PCI_ERR_COR_HL_OVERFLOW,
929         .correctable = true,
930     },
931 };
932 
933 static int pcie_aer_parse_error_string(const char *error_name,
934                                        uint32_t *status, bool *correctable)
935 {
936     int i;
937 
938     for (i = 0; i < ARRAY_SIZE(pcie_aer_error_list); i++) {
939         const  PCIEAERErrorName *e = &pcie_aer_error_list[i];
940         if (strcmp(error_name, e->name)) {
941             continue;
942         }
943 
944         *status = e->val;
945         *correctable = e->correctable;
946         return 0;
947     }
948     return -EINVAL;
949 }
950 
951 /*
952  * Inject an error described by @qdict.
953  * On success, set @details to show where error was sent.
954  * Return negative errno if injection failed and a message was emitted.
955  */
956 static int do_pcie_aer_inject_error(Monitor *mon,
957                                     const QDict *qdict,
958                                     PCIEErrorDetails *details)
959 {
960     const char *id = qdict_get_str(qdict, "id");
961     const char *error_name;
962     uint32_t error_status;
963     bool correctable;
964     PCIDevice *dev;
965     PCIEAERErr err;
966     int ret;
967 
968     ret = pci_qdev_find_device(id, &dev);
969     if (ret < 0) {
970         monitor_printf(mon,
971                        "id or pci device path is invalid or device not "
972                        "found. %s\n", id);
973         return ret;
974     }
975     if (!pci_is_express(dev)) {
976         monitor_printf(mon, "the device doesn't support pci express. %s\n",
977                        id);
978         return -ENOSYS;
979     }
980 
981     error_name = qdict_get_str(qdict, "error_status");
982     if (pcie_aer_parse_error_string(error_name, &error_status, &correctable)) {
983         char *e = NULL;
984         error_status = strtoul(error_name, &e, 0);
985         correctable = qdict_get_try_bool(qdict, "correctable", false);
986         if (!e || *e != '\0') {
987             monitor_printf(mon, "invalid error status value. \"%s\"",
988                            error_name);
989             return -EINVAL;
990         }
991     }
992     err.status = error_status;
993     err.source_id = pci_requester_id(dev);
994 
995     err.flags = 0;
996     if (correctable) {
997         err.flags |= PCIE_AER_ERR_IS_CORRECTABLE;
998     }
999     if (qdict_get_try_bool(qdict, "advisory_non_fatal", false)) {
1000         err.flags |= PCIE_AER_ERR_MAYBE_ADVISORY;
1001     }
1002     if (qdict_haskey(qdict, "header0")) {
1003         err.flags |= PCIE_AER_ERR_HEADER_VALID;
1004     }
1005     if (qdict_haskey(qdict, "prefix0")) {
1006         err.flags |= PCIE_AER_ERR_TLP_PREFIX_PRESENT;
1007     }
1008 
1009     err.header[0] = qdict_get_try_int(qdict, "header0", 0);
1010     err.header[1] = qdict_get_try_int(qdict, "header1", 0);
1011     err.header[2] = qdict_get_try_int(qdict, "header2", 0);
1012     err.header[3] = qdict_get_try_int(qdict, "header3", 0);
1013 
1014     err.prefix[0] = qdict_get_try_int(qdict, "prefix0", 0);
1015     err.prefix[1] = qdict_get_try_int(qdict, "prefix1", 0);
1016     err.prefix[2] = qdict_get_try_int(qdict, "prefix2", 0);
1017     err.prefix[3] = qdict_get_try_int(qdict, "prefix3", 0);
1018 
1019     ret = pcie_aer_inject_error(dev, &err);
1020     if (ret < 0) {
1021         monitor_printf(mon, "failed to inject error: %s\n",
1022                        strerror(-ret));
1023         return ret;
1024     }
1025     details->id = id;
1026     details->root_bus = pci_root_bus_path(dev);
1027     details->bus = pci_dev_bus_num(dev);
1028     details->devfn = dev->devfn;
1029 
1030     return 0;
1031 }
1032 
1033 void hmp_pcie_aer_inject_error(Monitor *mon, const QDict *qdict)
1034 {
1035     PCIEErrorDetails data;
1036 
1037     if (do_pcie_aer_inject_error(mon, qdict, &data) < 0) {
1038         return;
1039     }
1040 
1041     monitor_printf(mon, "OK id: %s root bus: %s, bus: %x devfn: %x.%x\n",
1042                    data.id, data.root_bus, data.bus,
1043                    PCI_SLOT(data.devfn), PCI_FUNC(data.devfn));
1044 }
1045