xref: /openbmc/qemu/hw/pci/pcie_aer.c (revision 87a45cfe)
1 /*
2  * pcie_aer.c
3  *
4  * Copyright (c) 2010 Isaku Yamahata <yamahata at valinux co jp>
5  *                    VA Linux Systems Japan K.K.
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License along
18  * with this program; if not, see <http://www.gnu.org/licenses/>.
19  */
20 
21 #include "sysemu/sysemu.h"
22 #include "qapi/qmp/types.h"
23 #include "monitor/monitor.h"
24 #include "hw/pci/pci_bridge.h"
25 #include "hw/pci/pcie.h"
26 #include "hw/pci/msix.h"
27 #include "hw/pci/msi.h"
28 #include "hw/pci/pci_bus.h"
29 #include "hw/pci/pcie_regs.h"
30 
31 //#define DEBUG_PCIE
32 #ifdef DEBUG_PCIE
33 # define PCIE_DPRINTF(fmt, ...)                                         \
34     fprintf(stderr, "%s:%d " fmt, __func__, __LINE__, ## __VA_ARGS__)
35 #else
36 # define PCIE_DPRINTF(fmt, ...) do {} while (0)
37 #endif
38 #define PCIE_DEV_PRINTF(dev, fmt, ...)                                  \
39     PCIE_DPRINTF("%s:%x "fmt, (dev)->name, (dev)->devfn, ## __VA_ARGS__)
40 
41 #define PCI_ERR_SRC_COR_OFFS    0
42 #define PCI_ERR_SRC_UNCOR_OFFS  2
43 
44 /* From 6.2.7 Error Listing and Rules. Table 6-2, 6-3 and 6-4 */
45 static uint32_t pcie_aer_uncor_default_severity(uint32_t status)
46 {
47     switch (status) {
48     case PCI_ERR_UNC_INTN:
49     case PCI_ERR_UNC_DLP:
50     case PCI_ERR_UNC_SDN:
51     case PCI_ERR_UNC_RX_OVER:
52     case PCI_ERR_UNC_FCP:
53     case PCI_ERR_UNC_MALF_TLP:
54         return PCI_ERR_ROOT_CMD_FATAL_EN;
55     case PCI_ERR_UNC_POISON_TLP:
56     case PCI_ERR_UNC_ECRC:
57     case PCI_ERR_UNC_UNSUP:
58     case PCI_ERR_UNC_COMP_TIME:
59     case PCI_ERR_UNC_COMP_ABORT:
60     case PCI_ERR_UNC_UNX_COMP:
61     case PCI_ERR_UNC_ACSV:
62     case PCI_ERR_UNC_MCBTLP:
63     case PCI_ERR_UNC_ATOP_EBLOCKED:
64     case PCI_ERR_UNC_TLP_PRF_BLOCKED:
65         return PCI_ERR_ROOT_CMD_NONFATAL_EN;
66     default:
67         abort();
68         break;
69     }
70     return PCI_ERR_ROOT_CMD_FATAL_EN;
71 }
72 
73 static int aer_log_add_err(PCIEAERLog *aer_log, const PCIEAERErr *err)
74 {
75     if (aer_log->log_num == aer_log->log_max) {
76         return -1;
77     }
78     memcpy(&aer_log->log[aer_log->log_num], err, sizeof *err);
79     aer_log->log_num++;
80     return 0;
81 }
82 
83 static void aer_log_del_err(PCIEAERLog *aer_log, PCIEAERErr *err)
84 {
85     assert(aer_log->log_num);
86     *err = aer_log->log[0];
87     aer_log->log_num--;
88     memmove(&aer_log->log[0], &aer_log->log[1],
89             aer_log->log_num * sizeof *err);
90 }
91 
92 static void aer_log_clear_all_err(PCIEAERLog *aer_log)
93 {
94     aer_log->log_num = 0;
95 }
96 
97 int pcie_aer_init(PCIDevice *dev, uint16_t offset)
98 {
99     PCIExpressDevice *exp;
100 
101     pcie_add_capability(dev, PCI_EXT_CAP_ID_ERR, PCI_ERR_VER,
102                         offset, PCI_ERR_SIZEOF);
103     exp = &dev->exp;
104     exp->aer_cap = offset;
105 
106     /* log_max is property */
107     if (dev->exp.aer_log.log_max == PCIE_AER_LOG_MAX_UNSET) {
108         dev->exp.aer_log.log_max = PCIE_AER_LOG_MAX_DEFAULT;
109     }
110     /* clip down the value to avoid unreasobale memory usage */
111     if (dev->exp.aer_log.log_max > PCIE_AER_LOG_MAX_LIMIT) {
112         return -EINVAL;
113     }
114     dev->exp.aer_log.log = g_malloc0(sizeof dev->exp.aer_log.log[0] *
115                                         dev->exp.aer_log.log_max);
116 
117     pci_set_long(dev->w1cmask + offset + PCI_ERR_UNCOR_STATUS,
118                  PCI_ERR_UNC_SUPPORTED);
119 
120     pci_set_long(dev->config + offset + PCI_ERR_UNCOR_SEVER,
121                  PCI_ERR_UNC_SEVERITY_DEFAULT);
122     pci_set_long(dev->wmask + offset + PCI_ERR_UNCOR_SEVER,
123                  PCI_ERR_UNC_SUPPORTED);
124 
125     pci_long_test_and_set_mask(dev->w1cmask + offset + PCI_ERR_COR_STATUS,
126                                PCI_ERR_COR_STATUS);
127 
128     pci_set_long(dev->config + offset + PCI_ERR_COR_MASK,
129                  PCI_ERR_COR_MASK_DEFAULT);
130     pci_set_long(dev->wmask + offset + PCI_ERR_COR_MASK,
131                  PCI_ERR_COR_SUPPORTED);
132 
133     /* capabilities and control. multiple header logging is supported */
134     if (dev->exp.aer_log.log_max > 0) {
135         pci_set_long(dev->config + offset + PCI_ERR_CAP,
136                      PCI_ERR_CAP_ECRC_GENC | PCI_ERR_CAP_ECRC_CHKC |
137                      PCI_ERR_CAP_MHRC);
138         pci_set_long(dev->wmask + offset + PCI_ERR_CAP,
139                      PCI_ERR_CAP_ECRC_GENE | PCI_ERR_CAP_ECRC_CHKE |
140                      PCI_ERR_CAP_MHRE);
141     } else {
142         pci_set_long(dev->config + offset + PCI_ERR_CAP,
143                      PCI_ERR_CAP_ECRC_GENC | PCI_ERR_CAP_ECRC_CHKC);
144         pci_set_long(dev->wmask + offset + PCI_ERR_CAP,
145                      PCI_ERR_CAP_ECRC_GENE | PCI_ERR_CAP_ECRC_CHKE);
146     }
147 
148     switch (pcie_cap_get_type(dev)) {
149     case PCI_EXP_TYPE_ROOT_PORT:
150         /* this case will be set by pcie_aer_root_init() */
151         /* fallthrough */
152     case PCI_EXP_TYPE_DOWNSTREAM:
153     case PCI_EXP_TYPE_UPSTREAM:
154         pci_word_test_and_set_mask(dev->wmask + PCI_BRIDGE_CONTROL,
155                                    PCI_BRIDGE_CTL_SERR);
156         pci_long_test_and_set_mask(dev->w1cmask + PCI_STATUS,
157                                    PCI_SEC_STATUS_RCV_SYSTEM_ERROR);
158         break;
159     default:
160         /* nothing */
161         break;
162     }
163     return 0;
164 }
165 
166 void pcie_aer_exit(PCIDevice *dev)
167 {
168     g_free(dev->exp.aer_log.log);
169 }
170 
171 static void pcie_aer_update_uncor_status(PCIDevice *dev)
172 {
173     uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
174     PCIEAERLog *aer_log = &dev->exp.aer_log;
175 
176     uint16_t i;
177     for (i = 0; i < aer_log->log_num; i++) {
178         pci_long_test_and_set_mask(aer_cap + PCI_ERR_UNCOR_STATUS,
179                                    dev->exp.aer_log.log[i].status);
180     }
181 }
182 
183 /*
184  * return value:
185  * true: error message needs to be sent up
186  * false: error message is masked
187  *
188  * 6.2.6 Error Message Control
189  * Figure 6-3
190  * all pci express devices part
191  */
192 static bool
193 pcie_aer_msg_alldev(PCIDevice *dev, const PCIEAERMsg *msg)
194 {
195     if (!(pcie_aer_msg_is_uncor(msg) &&
196           (pci_get_word(dev->config + PCI_COMMAND) & PCI_COMMAND_SERR))) {
197         return false;
198     }
199 
200     /* Signaled System Error
201      *
202      * 7.5.1.1 Command register
203      * Bit 8 SERR# Enable
204      *
205      * When Set, this bit enables reporting of Non-fatal and Fatal
206      * errors detected by the Function to the Root Complex. Note that
207      * errors are reported if enabled either through this bit or through
208      * the PCI Express specific bits in the Device Control register (see
209      * Section 7.8.4).
210      */
211     pci_word_test_and_set_mask(dev->config + PCI_STATUS,
212                                PCI_STATUS_SIG_SYSTEM_ERROR);
213 
214     if (!(msg->severity &
215           pci_get_word(dev->config + dev->exp.exp_cap + PCI_EXP_DEVCTL))) {
216         return false;
217     }
218 
219     /* send up error message */
220     return true;
221 }
222 
223 /*
224  * return value:
225  * true: error message is sent up
226  * false: error message is masked
227  *
228  * 6.2.6 Error Message Control
229  * Figure 6-3
230  * virtual pci bridge part
231  */
232 static bool pcie_aer_msg_vbridge(PCIDevice *dev, const PCIEAERMsg *msg)
233 {
234     uint16_t bridge_control = pci_get_word(dev->config + PCI_BRIDGE_CONTROL);
235 
236     if (pcie_aer_msg_is_uncor(msg)) {
237         /* Received System Error */
238         pci_word_test_and_set_mask(dev->config + PCI_SEC_STATUS,
239                                    PCI_SEC_STATUS_RCV_SYSTEM_ERROR);
240     }
241 
242     if (!(bridge_control & PCI_BRIDGE_CTL_SERR)) {
243         return false;
244     }
245     return true;
246 }
247 
248 void pcie_aer_root_set_vector(PCIDevice *dev, unsigned int vector)
249 {
250     uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
251     assert(vector < PCI_ERR_ROOT_IRQ_MAX);
252     pci_long_test_and_clear_mask(aer_cap + PCI_ERR_ROOT_STATUS,
253                                  PCI_ERR_ROOT_IRQ);
254     pci_long_test_and_set_mask(aer_cap + PCI_ERR_ROOT_STATUS,
255                                vector << PCI_ERR_ROOT_IRQ_SHIFT);
256 }
257 
258 static unsigned int pcie_aer_root_get_vector(PCIDevice *dev)
259 {
260     uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
261     uint32_t root_status = pci_get_long(aer_cap + PCI_ERR_ROOT_STATUS);
262     return (root_status & PCI_ERR_ROOT_IRQ) >> PCI_ERR_ROOT_IRQ_SHIFT;
263 }
264 
265 /* Given a status register, get corresponding bits in the command register */
266 static uint32_t pcie_aer_status_to_cmd(uint32_t status)
267 {
268     uint32_t cmd = 0;
269     if (status & PCI_ERR_ROOT_COR_RCV) {
270         cmd |= PCI_ERR_ROOT_CMD_COR_EN;
271     }
272     if (status & PCI_ERR_ROOT_NONFATAL_RCV) {
273         cmd |= PCI_ERR_ROOT_CMD_NONFATAL_EN;
274     }
275     if (status & PCI_ERR_ROOT_FATAL_RCV) {
276         cmd |= PCI_ERR_ROOT_CMD_FATAL_EN;
277     }
278     return cmd;
279 }
280 
281 static void pcie_aer_root_notify(PCIDevice *dev)
282 {
283     if (msix_enabled(dev)) {
284         msix_notify(dev, pcie_aer_root_get_vector(dev));
285     } else if (msi_enabled(dev)) {
286         msi_notify(dev, pcie_aer_root_get_vector(dev));
287     } else {
288         pci_irq_assert(dev);
289     }
290 }
291 
292 /*
293  * 6.2.6 Error Message Control
294  * Figure 6-3
295  * root port part
296  */
297 static void pcie_aer_msg_root_port(PCIDevice *dev, const PCIEAERMsg *msg)
298 {
299     uint16_t cmd;
300     uint8_t *aer_cap;
301     uint32_t root_cmd;
302     uint32_t root_status, prev_status;
303 
304     cmd = pci_get_word(dev->config + PCI_COMMAND);
305     aer_cap = dev->config + dev->exp.aer_cap;
306     root_cmd = pci_get_long(aer_cap + PCI_ERR_ROOT_COMMAND);
307     prev_status = root_status = pci_get_long(aer_cap + PCI_ERR_ROOT_STATUS);
308 
309     if (cmd & PCI_COMMAND_SERR) {
310         /* System Error.
311          *
312          * The way to report System Error is platform specific and
313          * it isn't implemented in qemu right now.
314          * So just discard the error for now.
315          * OS which cares of aer would receive errors via
316          * native aer mechanims, so this wouldn't matter.
317          */
318     }
319 
320     /* Errro Message Received: Root Error Status register */
321     switch (msg->severity) {
322     case PCI_ERR_ROOT_CMD_COR_EN:
323         if (root_status & PCI_ERR_ROOT_COR_RCV) {
324             root_status |= PCI_ERR_ROOT_MULTI_COR_RCV;
325         } else {
326             pci_set_word(aer_cap + PCI_ERR_ROOT_ERR_SRC + PCI_ERR_SRC_COR_OFFS,
327                          msg->source_id);
328         }
329         root_status |= PCI_ERR_ROOT_COR_RCV;
330         break;
331     case PCI_ERR_ROOT_CMD_NONFATAL_EN:
332         root_status |= PCI_ERR_ROOT_NONFATAL_RCV;
333         break;
334     case PCI_ERR_ROOT_CMD_FATAL_EN:
335         if (!(root_status & PCI_ERR_ROOT_UNCOR_RCV)) {
336             root_status |= PCI_ERR_ROOT_FIRST_FATAL;
337         }
338         root_status |= PCI_ERR_ROOT_FATAL_RCV;
339         break;
340     default:
341         abort();
342         break;
343     }
344     if (pcie_aer_msg_is_uncor(msg)) {
345         if (root_status & PCI_ERR_ROOT_UNCOR_RCV) {
346             root_status |= PCI_ERR_ROOT_MULTI_UNCOR_RCV;
347         } else {
348             pci_set_word(aer_cap + PCI_ERR_ROOT_ERR_SRC +
349                          PCI_ERR_SRC_UNCOR_OFFS, msg->source_id);
350         }
351         root_status |= PCI_ERR_ROOT_UNCOR_RCV;
352     }
353     pci_set_long(aer_cap + PCI_ERR_ROOT_STATUS, root_status);
354 
355     /* 6.2.4.1.2 Interrupt Generation */
356     /* All the above did was set some bits in the status register.
357      * Specifically these that match message severity.
358      * The below code relies on this fact. */
359     if (!(root_cmd & msg->severity) ||
360         (pcie_aer_status_to_cmd(prev_status) & root_cmd)) {
361         /* Condition is not being set or was already true so nothing to do. */
362         return;
363     }
364 
365     pcie_aer_root_notify(dev);
366 }
367 
368 /*
369  * 6.2.6 Error Message Control Figure 6-3
370  *
371  * Walk up the bus tree from the device, propagate the error message.
372  */
373 static void pcie_aer_msg(PCIDevice *dev, const PCIEAERMsg *msg)
374 {
375     uint8_t type;
376 
377     while (dev) {
378         if (!pci_is_express(dev)) {
379             /* just ignore it */
380             /* TODO: Shouldn't we set PCI_STATUS_SIG_SYSTEM_ERROR?
381              * Consider e.g. a PCI bridge above a PCI Express device. */
382             return;
383         }
384 
385         type = pcie_cap_get_type(dev);
386         if ((type == PCI_EXP_TYPE_ROOT_PORT ||
387             type == PCI_EXP_TYPE_UPSTREAM ||
388             type == PCI_EXP_TYPE_DOWNSTREAM) &&
389             !pcie_aer_msg_vbridge(dev, msg)) {
390                 return;
391         }
392         if (!pcie_aer_msg_alldev(dev, msg)) {
393             return;
394         }
395         if (type == PCI_EXP_TYPE_ROOT_PORT) {
396             pcie_aer_msg_root_port(dev, msg);
397             /* Root port can notify system itself,
398                or send the error message to root complex event collector. */
399             /*
400              * if root port is associated with an event collector,
401              * return the root complex event collector here.
402              * For now root complex event collector isn't supported.
403              */
404             return;
405         }
406         dev = pci_bridge_get_device(dev->bus);
407     }
408 }
409 
410 static void pcie_aer_update_log(PCIDevice *dev, const PCIEAERErr *err)
411 {
412     uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
413     uint8_t first_bit = ffs(err->status) - 1;
414     uint32_t errcap = pci_get_long(aer_cap + PCI_ERR_CAP);
415     int i;
416 
417     assert(err->status);
418     assert(!(err->status & (err->status - 1)));
419 
420     errcap &= ~(PCI_ERR_CAP_FEP_MASK | PCI_ERR_CAP_TLP);
421     errcap |= PCI_ERR_CAP_FEP(first_bit);
422 
423     if (err->flags & PCIE_AER_ERR_HEADER_VALID) {
424         for (i = 0; i < ARRAY_SIZE(err->header); ++i) {
425             /* 7.10.8 Header Log Register */
426             uint8_t *header_log =
427                 aer_cap + PCI_ERR_HEADER_LOG + i * sizeof err->header[0];
428             stl_be_p(header_log, err->header[i]);
429         }
430     } else {
431         assert(!(err->flags & PCIE_AER_ERR_TLP_PREFIX_PRESENT));
432         memset(aer_cap + PCI_ERR_HEADER_LOG, 0, PCI_ERR_HEADER_LOG_SIZE);
433     }
434 
435     if ((err->flags & PCIE_AER_ERR_TLP_PREFIX_PRESENT) &&
436         (pci_get_long(dev->config + dev->exp.exp_cap + PCI_EXP_DEVCTL2) &
437          PCI_EXP_DEVCAP2_EETLPP)) {
438         for (i = 0; i < ARRAY_SIZE(err->prefix); ++i) {
439             /* 7.10.12 tlp prefix log register */
440             uint8_t *prefix_log =
441                 aer_cap + PCI_ERR_TLP_PREFIX_LOG + i * sizeof err->prefix[0];
442             stl_be_p(prefix_log, err->prefix[i]);
443         }
444         errcap |= PCI_ERR_CAP_TLP;
445     } else {
446         memset(aer_cap + PCI_ERR_TLP_PREFIX_LOG, 0,
447                PCI_ERR_TLP_PREFIX_LOG_SIZE);
448     }
449     pci_set_long(aer_cap + PCI_ERR_CAP, errcap);
450 }
451 
452 static void pcie_aer_clear_log(PCIDevice *dev)
453 {
454     uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
455 
456     pci_long_test_and_clear_mask(aer_cap + PCI_ERR_CAP,
457                                  PCI_ERR_CAP_FEP_MASK | PCI_ERR_CAP_TLP);
458 
459     memset(aer_cap + PCI_ERR_HEADER_LOG, 0, PCI_ERR_HEADER_LOG_SIZE);
460     memset(aer_cap + PCI_ERR_TLP_PREFIX_LOG, 0, PCI_ERR_TLP_PREFIX_LOG_SIZE);
461 }
462 
463 static void pcie_aer_clear_error(PCIDevice *dev)
464 {
465     uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
466     uint32_t errcap = pci_get_long(aer_cap + PCI_ERR_CAP);
467     PCIEAERLog *aer_log = &dev->exp.aer_log;
468     PCIEAERErr err;
469 
470     if (!(errcap & PCI_ERR_CAP_MHRE) || !aer_log->log_num) {
471         pcie_aer_clear_log(dev);
472         return;
473     }
474 
475     /*
476      * If more errors are queued, set corresponding bits in uncorrectable
477      * error status.
478      * We emulate uncorrectable error status register as W1CS.
479      * So set bit in uncorrectable error status here again for multiple
480      * error recording support.
481      *
482      * 6.2.4.2 Multiple Error Handling(Advanced Error Reporting Capability)
483      */
484     pcie_aer_update_uncor_status(dev);
485 
486     aer_log_del_err(aer_log, &err);
487     pcie_aer_update_log(dev, &err);
488 }
489 
490 static int pcie_aer_record_error(PCIDevice *dev,
491                                  const PCIEAERErr *err)
492 {
493     uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
494     uint32_t errcap = pci_get_long(aer_cap + PCI_ERR_CAP);
495     int fep = PCI_ERR_CAP_FEP(errcap);
496 
497     assert(err->status);
498     assert(!(err->status & (err->status - 1)));
499 
500     if (errcap & PCI_ERR_CAP_MHRE &&
501         (pci_get_long(aer_cap + PCI_ERR_UNCOR_STATUS) & (1U << fep))) {
502         /*  Not first error. queue error */
503         if (aer_log_add_err(&dev->exp.aer_log, err) < 0) {
504             /* overflow */
505             return -1;
506         }
507         return 0;
508     }
509 
510     pcie_aer_update_log(dev, err);
511     return 0;
512 }
513 
514 typedef struct PCIEAERInject {
515     PCIDevice *dev;
516     uint8_t *aer_cap;
517     const PCIEAERErr *err;
518     uint16_t devctl;
519     uint16_t devsta;
520     uint32_t error_status;
521     bool unsupported_request;
522     bool log_overflow;
523     PCIEAERMsg msg;
524 } PCIEAERInject;
525 
526 static bool pcie_aer_inject_cor_error(PCIEAERInject *inj,
527                                       uint32_t uncor_status,
528                                       bool is_advisory_nonfatal)
529 {
530     PCIDevice *dev = inj->dev;
531 
532     inj->devsta |= PCI_EXP_DEVSTA_CED;
533     if (inj->unsupported_request) {
534         inj->devsta |= PCI_EXP_DEVSTA_URD;
535     }
536     pci_set_word(dev->config + dev->exp.exp_cap + PCI_EXP_DEVSTA, inj->devsta);
537 
538     if (inj->aer_cap) {
539         uint32_t mask;
540         pci_long_test_and_set_mask(inj->aer_cap + PCI_ERR_COR_STATUS,
541                                    inj->error_status);
542         mask = pci_get_long(inj->aer_cap + PCI_ERR_COR_MASK);
543         if (mask & inj->error_status) {
544             return false;
545         }
546         if (is_advisory_nonfatal) {
547             uint32_t uncor_mask =
548                 pci_get_long(inj->aer_cap + PCI_ERR_UNCOR_MASK);
549             if (!(uncor_mask & uncor_status)) {
550                 inj->log_overflow = !!pcie_aer_record_error(dev, inj->err);
551             }
552             pci_long_test_and_set_mask(inj->aer_cap + PCI_ERR_UNCOR_STATUS,
553                                        uncor_status);
554         }
555     }
556 
557     if (inj->unsupported_request && !(inj->devctl & PCI_EXP_DEVCTL_URRE)) {
558         return false;
559     }
560     if (!(inj->devctl & PCI_EXP_DEVCTL_CERE)) {
561         return false;
562     }
563 
564     inj->msg.severity = PCI_ERR_ROOT_CMD_COR_EN;
565     return true;
566 }
567 
568 static bool pcie_aer_inject_uncor_error(PCIEAERInject *inj, bool is_fatal)
569 {
570     PCIDevice *dev = inj->dev;
571     uint16_t cmd;
572 
573     if (is_fatal) {
574         inj->devsta |= PCI_EXP_DEVSTA_FED;
575     } else {
576         inj->devsta |= PCI_EXP_DEVSTA_NFED;
577     }
578     if (inj->unsupported_request) {
579         inj->devsta |= PCI_EXP_DEVSTA_URD;
580     }
581     pci_set_long(dev->config + dev->exp.exp_cap + PCI_EXP_DEVSTA, inj->devsta);
582 
583     if (inj->aer_cap) {
584         uint32_t mask = pci_get_long(inj->aer_cap + PCI_ERR_UNCOR_MASK);
585         if (mask & inj->error_status) {
586             pci_long_test_and_set_mask(inj->aer_cap + PCI_ERR_UNCOR_STATUS,
587                                        inj->error_status);
588             return false;
589         }
590 
591         inj->log_overflow = !!pcie_aer_record_error(dev, inj->err);
592         pci_long_test_and_set_mask(inj->aer_cap + PCI_ERR_UNCOR_STATUS,
593                                    inj->error_status);
594     }
595 
596     cmd = pci_get_word(dev->config + PCI_COMMAND);
597     if (inj->unsupported_request &&
598         !(inj->devctl & PCI_EXP_DEVCTL_URRE) && !(cmd & PCI_COMMAND_SERR)) {
599         return false;
600     }
601     if (is_fatal) {
602         if (!((cmd & PCI_COMMAND_SERR) ||
603               (inj->devctl & PCI_EXP_DEVCTL_FERE))) {
604             return false;
605         }
606         inj->msg.severity = PCI_ERR_ROOT_CMD_FATAL_EN;
607     } else {
608         if (!((cmd & PCI_COMMAND_SERR) ||
609               (inj->devctl & PCI_EXP_DEVCTL_NFERE))) {
610             return false;
611         }
612         inj->msg.severity = PCI_ERR_ROOT_CMD_NONFATAL_EN;
613     }
614     return true;
615 }
616 
617 /*
618  * non-Function specific error must be recorded in all functions.
619  * It is the responsibility of the caller of this function.
620  * It is also caller's responsibility to determine which function should
621  * report the rerror.
622  *
623  * 6.2.4 Error Logging
624  * 6.2.5 Sqeunce of Device Error Signaling and Logging Operations
625  * table 6-2: Flowchard Showing Sequence of Device Error Signaling and Logging
626  *            Operations
627  */
628 int pcie_aer_inject_error(PCIDevice *dev, const PCIEAERErr *err)
629 {
630     uint8_t *aer_cap = NULL;
631     uint16_t devctl = 0;
632     uint16_t devsta = 0;
633     uint32_t error_status = err->status;
634     PCIEAERInject inj;
635 
636     if (!pci_is_express(dev)) {
637         return -ENOSYS;
638     }
639 
640     if (err->flags & PCIE_AER_ERR_IS_CORRECTABLE) {
641         error_status &= PCI_ERR_COR_SUPPORTED;
642     } else {
643         error_status &= PCI_ERR_UNC_SUPPORTED;
644     }
645 
646     /* invalid status bit. one and only one bit must be set */
647     if (!error_status || (error_status & (error_status - 1))) {
648         return -EINVAL;
649     }
650 
651     if (dev->exp.aer_cap) {
652         uint8_t *exp_cap = dev->config + dev->exp.exp_cap;
653         aer_cap = dev->config + dev->exp.aer_cap;
654         devctl = pci_get_long(exp_cap + PCI_EXP_DEVCTL);
655         devsta = pci_get_long(exp_cap + PCI_EXP_DEVSTA);
656     }
657 
658     inj.dev = dev;
659     inj.aer_cap = aer_cap;
660     inj.err = err;
661     inj.devctl = devctl;
662     inj.devsta = devsta;
663     inj.error_status = error_status;
664     inj.unsupported_request = !(err->flags & PCIE_AER_ERR_IS_CORRECTABLE) &&
665         err->status == PCI_ERR_UNC_UNSUP;
666     inj.log_overflow = false;
667 
668     if (err->flags & PCIE_AER_ERR_IS_CORRECTABLE) {
669         if (!pcie_aer_inject_cor_error(&inj, 0, false)) {
670             return 0;
671         }
672     } else {
673         bool is_fatal =
674             pcie_aer_uncor_default_severity(error_status) ==
675             PCI_ERR_ROOT_CMD_FATAL_EN;
676         if (aer_cap) {
677             is_fatal =
678                 error_status & pci_get_long(aer_cap + PCI_ERR_UNCOR_SEVER);
679         }
680         if (!is_fatal && (err->flags & PCIE_AER_ERR_MAYBE_ADVISORY)) {
681             inj.error_status = PCI_ERR_COR_ADV_NONFATAL;
682             if (!pcie_aer_inject_cor_error(&inj, error_status, true)) {
683                 return 0;
684             }
685         } else {
686             if (!pcie_aer_inject_uncor_error(&inj, is_fatal)) {
687                 return 0;
688             }
689         }
690     }
691 
692     /* send up error message */
693     inj.msg.source_id = err->source_id;
694     pcie_aer_msg(dev, &inj.msg);
695 
696     if (inj.log_overflow) {
697         PCIEAERErr header_log_overflow = {
698             .status = PCI_ERR_COR_HL_OVERFLOW,
699             .flags = PCIE_AER_ERR_IS_CORRECTABLE,
700         };
701         int ret = pcie_aer_inject_error(dev, &header_log_overflow);
702         assert(!ret);
703     }
704     return 0;
705 }
706 
707 void pcie_aer_write_config(PCIDevice *dev,
708                            uint32_t addr, uint32_t val, int len)
709 {
710     uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
711     uint32_t errcap = pci_get_long(aer_cap + PCI_ERR_CAP);
712     uint32_t first_error = 1U << PCI_ERR_CAP_FEP(errcap);
713     uint32_t uncorsta = pci_get_long(aer_cap + PCI_ERR_UNCOR_STATUS);
714 
715     /* uncorrectable error */
716     if (!(uncorsta & first_error)) {
717         /* the bit that corresponds to the first error is cleared */
718         pcie_aer_clear_error(dev);
719     } else if (errcap & PCI_ERR_CAP_MHRE) {
720         /* When PCI_ERR_CAP_MHRE is enabled and the first error isn't cleared
721          * nothing should happen. So we have to revert the modification to
722          * the register.
723          */
724         pcie_aer_update_uncor_status(dev);
725     } else {
726         /* capability & control
727          * PCI_ERR_CAP_MHRE might be cleared, so clear of header log.
728          */
729         aer_log_clear_all_err(&dev->exp.aer_log);
730     }
731 }
732 
733 void pcie_aer_root_init(PCIDevice *dev)
734 {
735     uint16_t pos = dev->exp.aer_cap;
736 
737     pci_set_long(dev->wmask + pos + PCI_ERR_ROOT_COMMAND,
738                  PCI_ERR_ROOT_CMD_EN_MASK);
739     pci_set_long(dev->w1cmask + pos + PCI_ERR_ROOT_STATUS,
740                  PCI_ERR_ROOT_STATUS_REPORT_MASK);
741     /* PCI_ERR_ROOT_IRQ is RO but devices change it using a
742      * device-specific method.
743      */
744     pci_set_long(dev->cmask + pos + PCI_ERR_ROOT_STATUS,
745                  ~PCI_ERR_ROOT_IRQ);
746 }
747 
748 void pcie_aer_root_reset(PCIDevice *dev)
749 {
750     uint8_t* aer_cap = dev->config + dev->exp.aer_cap;
751 
752     pci_set_long(aer_cap + PCI_ERR_ROOT_COMMAND, 0);
753 
754     /*
755      * Advanced Error Interrupt Message Number in Root Error Status Register
756      * must be updated by chip dependent code because it's chip dependent
757      * which number is used.
758      */
759 }
760 
761 void pcie_aer_root_write_config(PCIDevice *dev,
762                                 uint32_t addr, uint32_t val, int len,
763                                 uint32_t root_cmd_prev)
764 {
765     uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
766     uint32_t root_status = pci_get_long(aer_cap + PCI_ERR_ROOT_STATUS);
767     uint32_t enabled_cmd = pcie_aer_status_to_cmd(root_status);
768     uint32_t root_cmd = pci_get_long(aer_cap + PCI_ERR_ROOT_COMMAND);
769     /* 6.2.4.1.2 Interrupt Generation */
770     if (!msix_enabled(dev) && !msi_enabled(dev)) {
771         pci_set_irq(dev, !!(root_cmd & enabled_cmd));
772         return;
773     }
774 
775     if ((root_cmd_prev & enabled_cmd) || !(root_cmd & enabled_cmd)) {
776         /* Send MSI on transition from false to true. */
777         return;
778     }
779 
780     pcie_aer_root_notify(dev);
781 }
782 
783 static const VMStateDescription vmstate_pcie_aer_err = {
784     .name = "PCIE_AER_ERROR",
785     .version_id = 1,
786     .minimum_version_id = 1,
787     .fields = (VMStateField[]) {
788         VMSTATE_UINT32(status, PCIEAERErr),
789         VMSTATE_UINT16(source_id, PCIEAERErr),
790         VMSTATE_UINT16(flags, PCIEAERErr),
791         VMSTATE_UINT32_ARRAY(header, PCIEAERErr, 4),
792         VMSTATE_UINT32_ARRAY(prefix, PCIEAERErr, 4),
793         VMSTATE_END_OF_LIST()
794     }
795 };
796 
797 static bool pcie_aer_state_log_num_valid(void *opaque, int version_id)
798 {
799     PCIEAERLog *s = opaque;
800 
801     return s->log_num <= s->log_max;
802 }
803 
804 const VMStateDescription vmstate_pcie_aer_log = {
805     .name = "PCIE_AER_ERROR_LOG",
806     .version_id = 1,
807     .minimum_version_id = 1,
808     .fields = (VMStateField[]) {
809         VMSTATE_UINT16(log_num, PCIEAERLog),
810         VMSTATE_UINT16_EQUAL(log_max, PCIEAERLog),
811         VMSTATE_VALIDATE("log_num <= log_max", pcie_aer_state_log_num_valid),
812         VMSTATE_STRUCT_VARRAY_POINTER_UINT16(log, PCIEAERLog, log_num,
813                               vmstate_pcie_aer_err, PCIEAERErr),
814         VMSTATE_END_OF_LIST()
815     }
816 };
817 
818 void pcie_aer_inject_error_print(Monitor *mon, const QObject *data)
819 {
820     QDict *qdict;
821     int devfn;
822     assert(qobject_type(data) == QTYPE_QDICT);
823     qdict = qobject_to_qdict(data);
824 
825     devfn = (int)qdict_get_int(qdict, "devfn");
826     monitor_printf(mon, "OK id: %s root bus: %s, bus: %x devfn: %x.%x\n",
827                    qdict_get_str(qdict, "id"),
828                    qdict_get_str(qdict, "root_bus"),
829                    (int) qdict_get_int(qdict, "bus"),
830                    PCI_SLOT(devfn), PCI_FUNC(devfn));
831 }
832 
833 typedef struct PCIEAERErrorName {
834     const char *name;
835     uint32_t val;
836     bool correctable;
837 } PCIEAERErrorName;
838 
839 /*
840  * AER error name -> value conversion table
841  * This naming scheme is same to linux aer-injection tool.
842  */
843 static const struct PCIEAERErrorName pcie_aer_error_list[] = {
844     {
845         .name = "TRAIN",
846         .val = PCI_ERR_UNC_TRAIN,
847         .correctable = false,
848     }, {
849         .name = "DLP",
850         .val = PCI_ERR_UNC_DLP,
851         .correctable = false,
852     }, {
853         .name = "SDN",
854         .val = PCI_ERR_UNC_SDN,
855         .correctable = false,
856     }, {
857         .name = "POISON_TLP",
858         .val = PCI_ERR_UNC_POISON_TLP,
859         .correctable = false,
860     }, {
861         .name = "FCP",
862         .val = PCI_ERR_UNC_FCP,
863         .correctable = false,
864     }, {
865         .name = "COMP_TIME",
866         .val = PCI_ERR_UNC_COMP_TIME,
867         .correctable = false,
868     }, {
869         .name = "COMP_ABORT",
870         .val = PCI_ERR_UNC_COMP_ABORT,
871         .correctable = false,
872     }, {
873         .name = "UNX_COMP",
874         .val = PCI_ERR_UNC_UNX_COMP,
875         .correctable = false,
876     }, {
877         .name = "RX_OVER",
878         .val = PCI_ERR_UNC_RX_OVER,
879         .correctable = false,
880     }, {
881         .name = "MALF_TLP",
882         .val = PCI_ERR_UNC_MALF_TLP,
883         .correctable = false,
884     }, {
885         .name = "ECRC",
886         .val = PCI_ERR_UNC_ECRC,
887         .correctable = false,
888     }, {
889         .name = "UNSUP",
890         .val = PCI_ERR_UNC_UNSUP,
891         .correctable = false,
892     }, {
893         .name = "ACSV",
894         .val = PCI_ERR_UNC_ACSV,
895         .correctable = false,
896     }, {
897         .name = "INTN",
898         .val = PCI_ERR_UNC_INTN,
899         .correctable = false,
900     }, {
901         .name = "MCBTLP",
902         .val = PCI_ERR_UNC_MCBTLP,
903         .correctable = false,
904     }, {
905         .name = "ATOP_EBLOCKED",
906         .val = PCI_ERR_UNC_ATOP_EBLOCKED,
907         .correctable = false,
908     }, {
909         .name = "TLP_PRF_BLOCKED",
910         .val = PCI_ERR_UNC_TLP_PRF_BLOCKED,
911         .correctable = false,
912     }, {
913         .name = "RCVR",
914         .val = PCI_ERR_COR_RCVR,
915         .correctable = true,
916     }, {
917         .name = "BAD_TLP",
918         .val = PCI_ERR_COR_BAD_TLP,
919         .correctable = true,
920     }, {
921         .name = "BAD_DLLP",
922         .val = PCI_ERR_COR_BAD_DLLP,
923         .correctable = true,
924     }, {
925         .name = "REP_ROLL",
926         .val = PCI_ERR_COR_REP_ROLL,
927         .correctable = true,
928     }, {
929         .name = "REP_TIMER",
930         .val = PCI_ERR_COR_REP_TIMER,
931         .correctable = true,
932     }, {
933         .name = "ADV_NONFATAL",
934         .val = PCI_ERR_COR_ADV_NONFATAL,
935         .correctable = true,
936     }, {
937         .name = "INTERNAL",
938         .val = PCI_ERR_COR_INTERNAL,
939         .correctable = true,
940     }, {
941         .name = "HL_OVERFLOW",
942         .val = PCI_ERR_COR_HL_OVERFLOW,
943         .correctable = true,
944     },
945 };
946 
947 static int pcie_aer_parse_error_string(const char *error_name,
948                                        uint32_t *status, bool *correctable)
949 {
950     int i;
951 
952     for (i = 0; i < ARRAY_SIZE(pcie_aer_error_list); i++) {
953         const  PCIEAERErrorName *e = &pcie_aer_error_list[i];
954         if (strcmp(error_name, e->name)) {
955             continue;
956         }
957 
958         *status = e->val;
959         *correctable = e->correctable;
960         return 0;
961     }
962     return -EINVAL;
963 }
964 
965 int hmp_pcie_aer_inject_error(Monitor *mon,
966                              const QDict *qdict, QObject **ret_data)
967 {
968     const char *id = qdict_get_str(qdict, "id");
969     const char *error_name;
970     uint32_t error_status;
971     bool correctable;
972     PCIDevice *dev;
973     PCIEAERErr err;
974     int ret;
975 
976     ret = pci_qdev_find_device(id, &dev);
977     if (ret < 0) {
978         monitor_printf(mon,
979                        "id or pci device path is invalid or device not "
980                        "found. %s\n", id);
981         return ret;
982     }
983     if (!pci_is_express(dev)) {
984         monitor_printf(mon, "the device doesn't support pci express. %s\n",
985                        id);
986         return -ENOSYS;
987     }
988 
989     error_name = qdict_get_str(qdict, "error_status");
990     if (pcie_aer_parse_error_string(error_name, &error_status, &correctable)) {
991         char *e = NULL;
992         error_status = strtoul(error_name, &e, 0);
993         correctable = qdict_get_try_bool(qdict, "correctable", 0);
994         if (!e || *e != '\0') {
995             monitor_printf(mon, "invalid error status value. \"%s\"",
996                            error_name);
997             return -EINVAL;
998         }
999     }
1000     err.status = error_status;
1001     err.source_id = (pci_bus_num(dev->bus) << 8) | dev->devfn;
1002 
1003     err.flags = 0;
1004     if (correctable) {
1005         err.flags |= PCIE_AER_ERR_IS_CORRECTABLE;
1006     }
1007     if (qdict_get_try_bool(qdict, "advisory_non_fatal", 0)) {
1008         err.flags |= PCIE_AER_ERR_MAYBE_ADVISORY;
1009     }
1010     if (qdict_haskey(qdict, "header0")) {
1011         err.flags |= PCIE_AER_ERR_HEADER_VALID;
1012     }
1013     if (qdict_haskey(qdict, "prefix0")) {
1014         err.flags |= PCIE_AER_ERR_TLP_PREFIX_PRESENT;
1015     }
1016 
1017     err.header[0] = qdict_get_try_int(qdict, "header0", 0);
1018     err.header[1] = qdict_get_try_int(qdict, "header1", 0);
1019     err.header[2] = qdict_get_try_int(qdict, "header2", 0);
1020     err.header[3] = qdict_get_try_int(qdict, "header3", 0);
1021 
1022     err.prefix[0] = qdict_get_try_int(qdict, "prefix0", 0);
1023     err.prefix[1] = qdict_get_try_int(qdict, "prefix1", 0);
1024     err.prefix[2] = qdict_get_try_int(qdict, "prefix2", 0);
1025     err.prefix[3] = qdict_get_try_int(qdict, "prefix3", 0);
1026 
1027     ret = pcie_aer_inject_error(dev, &err);
1028     *ret_data = qobject_from_jsonf("{'id': %s, "
1029                                    "'root_bus': %s, 'bus': %d, 'devfn': %d, "
1030                                    "'ret': %d}",
1031                                    id, pci_root_bus_path(dev),
1032                                    pci_bus_num(dev->bus), dev->devfn,
1033                                    ret);
1034     assert(*ret_data);
1035 
1036     return 0;
1037 }
1038