xref: /openbmc/qemu/hw/pci/pcie_aer.c (revision 5e6aceb2)
1 /*
2  * pcie_aer.c
3  *
4  * Copyright (c) 2010 Isaku Yamahata <yamahata at valinux co jp>
5  *                    VA Linux Systems Japan K.K.
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License along
18  * with this program; if not, see <http://www.gnu.org/licenses/>.
19  */
20 
21 #include "qemu/osdep.h"
22 #include "migration/vmstate.h"
23 #include "hw/pci/pci_bridge.h"
24 #include "hw/pci/pcie.h"
25 #include "hw/pci/msix.h"
26 #include "hw/pci/msi.h"
27 #include "hw/pci/pci_bus.h"
28 #include "hw/pci/pcie_regs.h"
29 #include "pci-internal.h"
30 
31 //#define DEBUG_PCIE
32 #ifdef DEBUG_PCIE
33 # define PCIE_DPRINTF(fmt, ...)                                         \
34     fprintf(stderr, "%s:%d " fmt, __func__, __LINE__, ## __VA_ARGS__)
35 #else
36 # define PCIE_DPRINTF(fmt, ...) do {} while (0)
37 #endif
38 #define PCIE_DEV_PRINTF(dev, fmt, ...)                                  \
39     PCIE_DPRINTF("%s:%x "fmt, (dev)->name, (dev)->devfn, ## __VA_ARGS__)
40 
41 #define PCI_ERR_SRC_COR_OFFS    0
42 #define PCI_ERR_SRC_UNCOR_OFFS  2
43 
44 /* From 6.2.7 Error Listing and Rules. Table 6-2, 6-3 and 6-4 */
45 static uint32_t pcie_aer_uncor_default_severity(uint32_t status)
46 {
47     switch (status) {
48     case PCI_ERR_UNC_INTN:
49     case PCI_ERR_UNC_DLP:
50     case PCI_ERR_UNC_SDN:
51     case PCI_ERR_UNC_RX_OVER:
52     case PCI_ERR_UNC_FCP:
53     case PCI_ERR_UNC_MALF_TLP:
54         return PCI_ERR_ROOT_CMD_FATAL_EN;
55     case PCI_ERR_UNC_POISON_TLP:
56     case PCI_ERR_UNC_ECRC:
57     case PCI_ERR_UNC_UNSUP:
58     case PCI_ERR_UNC_COMP_TIME:
59     case PCI_ERR_UNC_COMP_ABORT:
60     case PCI_ERR_UNC_UNX_COMP:
61     case PCI_ERR_UNC_ACSV:
62     case PCI_ERR_UNC_MCBTLP:
63     case PCI_ERR_UNC_ATOP_EBLOCKED:
64     case PCI_ERR_UNC_TLP_PRF_BLOCKED:
65         return PCI_ERR_ROOT_CMD_NONFATAL_EN;
66     default:
67         abort();
68         break;
69     }
70     return PCI_ERR_ROOT_CMD_FATAL_EN;
71 }
72 
73 static int aer_log_add_err(PCIEAERLog *aer_log, const PCIEAERErr *err)
74 {
75     if (aer_log->log_num == aer_log->log_max) {
76         return -1;
77     }
78     memcpy(&aer_log->log[aer_log->log_num], err, sizeof *err);
79     aer_log->log_num++;
80     return 0;
81 }
82 
83 static void aer_log_del_err(PCIEAERLog *aer_log, PCIEAERErr *err)
84 {
85     assert(aer_log->log_num);
86     *err = aer_log->log[0];
87     aer_log->log_num--;
88     memmove(&aer_log->log[0], &aer_log->log[1],
89             aer_log->log_num * sizeof *err);
90 }
91 
92 static void aer_log_clear_all_err(PCIEAERLog *aer_log)
93 {
94     aer_log->log_num = 0;
95 }
96 
97 int pcie_aer_init(PCIDevice *dev, uint8_t cap_ver, uint16_t offset,
98                   uint16_t size, Error **errp)
99 {
100     pcie_add_capability(dev, PCI_EXT_CAP_ID_ERR, cap_ver,
101                         offset, size);
102     dev->exp.aer_cap = offset;
103 
104     /* clip down the value to avoid unreasonable memory usage */
105     if (dev->exp.aer_log.log_max > PCIE_AER_LOG_MAX_LIMIT) {
106         error_setg(errp, "Invalid aer_log_max %d. The max number of aer log "
107                 "is %d", dev->exp.aer_log.log_max, PCIE_AER_LOG_MAX_LIMIT);
108         return -EINVAL;
109     }
110     dev->exp.aer_log.log = g_malloc0(sizeof dev->exp.aer_log.log[0] *
111                                         dev->exp.aer_log.log_max);
112 
113     pci_set_long(dev->w1cmask + offset + PCI_ERR_UNCOR_STATUS,
114                  PCI_ERR_UNC_SUPPORTED);
115 
116     if (dev->cap_present & QEMU_PCIE_ERR_UNC_MASK) {
117         pci_set_long(dev->config + offset + PCI_ERR_UNCOR_MASK,
118                      PCI_ERR_UNC_MASK_DEFAULT);
119         pci_set_long(dev->wmask + offset + PCI_ERR_UNCOR_MASK,
120                      PCI_ERR_UNC_SUPPORTED);
121     }
122 
123     pci_set_long(dev->config + offset + PCI_ERR_UNCOR_SEVER,
124                  PCI_ERR_UNC_SEVERITY_DEFAULT);
125     pci_set_long(dev->wmask + offset + PCI_ERR_UNCOR_SEVER,
126                  PCI_ERR_UNC_SUPPORTED);
127 
128     pci_long_test_and_set_mask(dev->w1cmask + offset + PCI_ERR_COR_STATUS,
129                                PCI_ERR_COR_SUPPORTED);
130 
131     pci_set_long(dev->config + offset + PCI_ERR_COR_MASK,
132                  PCI_ERR_COR_MASK_DEFAULT);
133     pci_set_long(dev->wmask + offset + PCI_ERR_COR_MASK,
134                  PCI_ERR_COR_SUPPORTED);
135 
136     /* capabilities and control. multiple header logging is supported */
137     if (dev->exp.aer_log.log_max > 0) {
138         pci_set_long(dev->config + offset + PCI_ERR_CAP,
139                      PCI_ERR_CAP_ECRC_GENC | PCI_ERR_CAP_ECRC_CHKC |
140                      PCI_ERR_CAP_MHRC);
141         pci_set_long(dev->wmask + offset + PCI_ERR_CAP,
142                      PCI_ERR_CAP_ECRC_GENE | PCI_ERR_CAP_ECRC_CHKE |
143                      PCI_ERR_CAP_MHRE);
144     } else {
145         pci_set_long(dev->config + offset + PCI_ERR_CAP,
146                      PCI_ERR_CAP_ECRC_GENC | PCI_ERR_CAP_ECRC_CHKC);
147         pci_set_long(dev->wmask + offset + PCI_ERR_CAP,
148                      PCI_ERR_CAP_ECRC_GENE | PCI_ERR_CAP_ECRC_CHKE);
149     }
150 
151     switch (pcie_cap_get_type(dev)) {
152     case PCI_EXP_TYPE_ROOT_PORT:
153         /* this case will be set by pcie_aer_root_init() */
154         /* fallthrough */
155     case PCI_EXP_TYPE_DOWNSTREAM:
156     case PCI_EXP_TYPE_UPSTREAM:
157         pci_word_test_and_set_mask(dev->wmask + PCI_BRIDGE_CONTROL,
158                                    PCI_BRIDGE_CTL_SERR);
159         pci_long_test_and_set_mask(dev->w1cmask + PCI_STATUS,
160                                    PCI_SEC_STATUS_RCV_SYSTEM_ERROR);
161         break;
162     default:
163         /* nothing */
164         break;
165     }
166     return 0;
167 }
168 
169 void pcie_aer_exit(PCIDevice *dev)
170 {
171     g_free(dev->exp.aer_log.log);
172 }
173 
174 static void pcie_aer_update_uncor_status(PCIDevice *dev)
175 {
176     uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
177     PCIEAERLog *aer_log = &dev->exp.aer_log;
178 
179     uint16_t i;
180     for (i = 0; i < aer_log->log_num; i++) {
181         pci_long_test_and_set_mask(aer_cap + PCI_ERR_UNCOR_STATUS,
182                                    dev->exp.aer_log.log[i].status);
183     }
184 }
185 
186 /*
187  * return value:
188  * true: error message needs to be sent up
189  * false: error message is masked
190  *
191  * 6.2.6 Error Message Control
192  * Figure 6-3
193  * all pci express devices part
194  */
195 static bool
196 pcie_aer_msg_alldev(PCIDevice *dev, const PCIEAERMsg *msg)
197 {
198     uint16_t devctl = pci_get_word(dev->config + dev->exp.exp_cap +
199                                    PCI_EXP_DEVCTL);
200     if (!(pcie_aer_msg_is_uncor(msg) &&
201           (pci_get_word(dev->config + PCI_COMMAND) & PCI_COMMAND_SERR)) &&
202         !((msg->severity == PCI_ERR_ROOT_CMD_NONFATAL_EN) &&
203           (devctl & PCI_EXP_DEVCTL_NFERE)) &&
204         !((msg->severity == PCI_ERR_ROOT_CMD_COR_EN) &&
205           (devctl & PCI_EXP_DEVCTL_CERE)) &&
206         !((msg->severity == PCI_ERR_ROOT_CMD_FATAL_EN) &&
207           (devctl & PCI_EXP_DEVCTL_FERE))) {
208         return false;
209     }
210 
211     /* Signaled System Error
212      *
213      * 7.5.1.1 Command register
214      * Bit 8 SERR# Enable
215      *
216      * When Set, this bit enables reporting of Non-fatal and Fatal
217      * errors detected by the Function to the Root Complex. Note that
218      * errors are reported if enabled either through this bit or through
219      * the PCI Express specific bits in the Device Control register (see
220      * Section 7.8.4).
221      */
222     pci_word_test_and_set_mask(dev->config + PCI_STATUS,
223                                PCI_STATUS_SIG_SYSTEM_ERROR);
224 
225     if (!(msg->severity &
226           pci_get_word(dev->config + dev->exp.exp_cap + PCI_EXP_DEVCTL))) {
227         return false;
228     }
229 
230     /* send up error message */
231     return true;
232 }
233 
234 /*
235  * return value:
236  * true: error message is sent up
237  * false: error message is masked
238  *
239  * 6.2.6 Error Message Control
240  * Figure 6-3
241  * virtual pci bridge part
242  */
243 static bool pcie_aer_msg_vbridge(PCIDevice *dev, const PCIEAERMsg *msg)
244 {
245     uint16_t bridge_control = pci_get_word(dev->config + PCI_BRIDGE_CONTROL);
246 
247     if (pcie_aer_msg_is_uncor(msg)) {
248         /* Received System Error */
249         pci_word_test_and_set_mask(dev->config + PCI_SEC_STATUS,
250                                    PCI_SEC_STATUS_RCV_SYSTEM_ERROR);
251     }
252 
253     if (!(bridge_control & PCI_BRIDGE_CTL_SERR)) {
254         return false;
255     }
256     return true;
257 }
258 
259 void pcie_aer_root_set_vector(PCIDevice *dev, unsigned int vector)
260 {
261     uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
262     assert(vector < PCI_ERR_ROOT_IRQ_MAX);
263     pci_long_test_and_clear_mask(aer_cap + PCI_ERR_ROOT_STATUS,
264                                  PCI_ERR_ROOT_IRQ);
265     pci_long_test_and_set_mask(aer_cap + PCI_ERR_ROOT_STATUS,
266                                vector << PCI_ERR_ROOT_IRQ_SHIFT);
267 }
268 
269 static unsigned int pcie_aer_root_get_vector(PCIDevice *dev)
270 {
271     uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
272     uint32_t root_status = pci_get_long(aer_cap + PCI_ERR_ROOT_STATUS);
273     return (root_status & PCI_ERR_ROOT_IRQ) >> PCI_ERR_ROOT_IRQ_SHIFT;
274 }
275 
276 /* Given a status register, get corresponding bits in the command register */
277 static uint32_t pcie_aer_status_to_cmd(uint32_t status)
278 {
279     uint32_t cmd = 0;
280     if (status & PCI_ERR_ROOT_COR_RCV) {
281         cmd |= PCI_ERR_ROOT_CMD_COR_EN;
282     }
283     if (status & PCI_ERR_ROOT_NONFATAL_RCV) {
284         cmd |= PCI_ERR_ROOT_CMD_NONFATAL_EN;
285     }
286     if (status & PCI_ERR_ROOT_FATAL_RCV) {
287         cmd |= PCI_ERR_ROOT_CMD_FATAL_EN;
288     }
289     return cmd;
290 }
291 
292 static void pcie_aer_root_notify(PCIDevice *dev)
293 {
294     if (msix_enabled(dev)) {
295         msix_notify(dev, pcie_aer_root_get_vector(dev));
296     } else if (msi_enabled(dev)) {
297         msi_notify(dev, pcie_aer_root_get_vector(dev));
298     } else if (pci_intx(dev) != -1) {
299         pci_irq_assert(dev);
300     }
301 }
302 
303 /*
304  * 6.2.6 Error Message Control
305  * Figure 6-3
306  * root port part
307  */
308 static void pcie_aer_msg_root_port(PCIDevice *dev, const PCIEAERMsg *msg)
309 {
310     uint16_t cmd;
311     uint8_t *aer_cap;
312     uint32_t root_cmd;
313     uint32_t root_status, prev_status;
314 
315     cmd = pci_get_word(dev->config + PCI_COMMAND);
316     aer_cap = dev->config + dev->exp.aer_cap;
317     root_cmd = pci_get_long(aer_cap + PCI_ERR_ROOT_COMMAND);
318     prev_status = root_status = pci_get_long(aer_cap + PCI_ERR_ROOT_STATUS);
319 
320     if (cmd & PCI_COMMAND_SERR) {
321         /* System Error.
322          *
323          * The way to report System Error is platform specific and
324          * it isn't implemented in qemu right now.
325          * So just discard the error for now.
326          * OS which cares of aer would receive errors via
327          * native aer mechanisms, so this wouldn't matter.
328          */
329     }
330 
331     /* Error Message Received: Root Error Status register */
332     switch (msg->severity) {
333     case PCI_ERR_ROOT_CMD_COR_EN:
334         if (root_status & PCI_ERR_ROOT_COR_RCV) {
335             root_status |= PCI_ERR_ROOT_MULTI_COR_RCV;
336         } else {
337             pci_set_word(aer_cap + PCI_ERR_ROOT_ERR_SRC + PCI_ERR_SRC_COR_OFFS,
338                          msg->source_id);
339         }
340         root_status |= PCI_ERR_ROOT_COR_RCV;
341         break;
342     case PCI_ERR_ROOT_CMD_NONFATAL_EN:
343         root_status |= PCI_ERR_ROOT_NONFATAL_RCV;
344         break;
345     case PCI_ERR_ROOT_CMD_FATAL_EN:
346         if (!(root_status & PCI_ERR_ROOT_UNCOR_RCV)) {
347             root_status |= PCI_ERR_ROOT_FIRST_FATAL;
348         }
349         root_status |= PCI_ERR_ROOT_FATAL_RCV;
350         break;
351     default:
352         abort();
353         break;
354     }
355     if (pcie_aer_msg_is_uncor(msg)) {
356         if (root_status & PCI_ERR_ROOT_UNCOR_RCV) {
357             root_status |= PCI_ERR_ROOT_MULTI_UNCOR_RCV;
358         } else {
359             pci_set_word(aer_cap + PCI_ERR_ROOT_ERR_SRC +
360                          PCI_ERR_SRC_UNCOR_OFFS, msg->source_id);
361         }
362         root_status |= PCI_ERR_ROOT_UNCOR_RCV;
363     }
364     pci_set_long(aer_cap + PCI_ERR_ROOT_STATUS, root_status);
365 
366     /* 6.2.4.1.2 Interrupt Generation */
367     /* All the above did was set some bits in the status register.
368      * Specifically these that match message severity.
369      * The below code relies on this fact. */
370     if (!(root_cmd & msg->severity) ||
371         (pcie_aer_status_to_cmd(prev_status) & root_cmd)) {
372         /* Condition is not being set or was already true so nothing to do. */
373         return;
374     }
375 
376     pcie_aer_root_notify(dev);
377 }
378 
379 /*
380  * 6.2.6 Error Message Control Figure 6-3
381  *
382  * Walk up the bus tree from the device, propagate the error message.
383  */
384 static void pcie_aer_msg(PCIDevice *dev, const PCIEAERMsg *msg)
385 {
386     uint8_t type;
387 
388     while (dev) {
389         if (!pci_is_express(dev)) {
390             /* just ignore it */
391             /* TODO: Shouldn't we set PCI_STATUS_SIG_SYSTEM_ERROR?
392              * Consider e.g. a PCI bridge above a PCI Express device. */
393             return;
394         }
395 
396         type = pcie_cap_get_type(dev);
397         if ((type == PCI_EXP_TYPE_ROOT_PORT ||
398             type == PCI_EXP_TYPE_UPSTREAM ||
399             type == PCI_EXP_TYPE_DOWNSTREAM) &&
400             !pcie_aer_msg_vbridge(dev, msg)) {
401                 return;
402         }
403         if (!pcie_aer_msg_alldev(dev, msg)) {
404             return;
405         }
406         if (type == PCI_EXP_TYPE_ROOT_PORT) {
407             pcie_aer_msg_root_port(dev, msg);
408             /* Root port can notify system itself,
409                or send the error message to root complex event collector. */
410             /*
411              * if root port is associated with an event collector,
412              * return the root complex event collector here.
413              * For now root complex event collector isn't supported.
414              */
415             return;
416         }
417         dev = pci_bridge_get_device(pci_get_bus(dev));
418     }
419 }
420 
421 static void pcie_aer_update_log(PCIDevice *dev, const PCIEAERErr *err)
422 {
423     uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
424     uint8_t first_bit = ctz32(err->status);
425     uint32_t errcap = pci_get_long(aer_cap + PCI_ERR_CAP);
426     int i;
427 
428     assert(err->status);
429     assert(!(err->status & (err->status - 1)));
430 
431     errcap &= ~(PCI_ERR_CAP_FEP_MASK | PCI_ERR_CAP_TLP);
432     errcap |= PCI_ERR_CAP_FEP(first_bit);
433 
434     if (err->flags & PCIE_AER_ERR_HEADER_VALID) {
435         for (i = 0; i < ARRAY_SIZE(err->header); ++i) {
436             /* 7.10.8 Header Log Register */
437             uint8_t *header_log =
438                 aer_cap + PCI_ERR_HEADER_LOG + i * sizeof err->header[0];
439             stl_be_p(header_log, err->header[i]);
440         }
441     } else {
442         assert(!(err->flags & PCIE_AER_ERR_TLP_PREFIX_PRESENT));
443         memset(aer_cap + PCI_ERR_HEADER_LOG, 0, PCI_ERR_HEADER_LOG_SIZE);
444     }
445 
446     if ((err->flags & PCIE_AER_ERR_TLP_PREFIX_PRESENT) &&
447         (pci_get_long(dev->config + dev->exp.exp_cap + PCI_EXP_DEVCAP2) &
448          PCI_EXP_DEVCAP2_EETLPP)) {
449         for (i = 0; i < ARRAY_SIZE(err->prefix); ++i) {
450             /* 7.10.12 tlp prefix log register */
451             uint8_t *prefix_log =
452                 aer_cap + PCI_ERR_TLP_PREFIX_LOG + i * sizeof err->prefix[0];
453             stl_be_p(prefix_log, err->prefix[i]);
454         }
455         errcap |= PCI_ERR_CAP_TLP;
456     } else {
457         memset(aer_cap + PCI_ERR_TLP_PREFIX_LOG, 0,
458                PCI_ERR_TLP_PREFIX_LOG_SIZE);
459     }
460     pci_set_long(aer_cap + PCI_ERR_CAP, errcap);
461 }
462 
463 static void pcie_aer_clear_log(PCIDevice *dev)
464 {
465     uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
466 
467     pci_long_test_and_clear_mask(aer_cap + PCI_ERR_CAP,
468                                  PCI_ERR_CAP_FEP_MASK | PCI_ERR_CAP_TLP);
469 
470     memset(aer_cap + PCI_ERR_HEADER_LOG, 0, PCI_ERR_HEADER_LOG_SIZE);
471     memset(aer_cap + PCI_ERR_TLP_PREFIX_LOG, 0, PCI_ERR_TLP_PREFIX_LOG_SIZE);
472 }
473 
474 static void pcie_aer_clear_error(PCIDevice *dev)
475 {
476     uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
477     uint32_t errcap = pci_get_long(aer_cap + PCI_ERR_CAP);
478     PCIEAERLog *aer_log = &dev->exp.aer_log;
479     PCIEAERErr err;
480 
481     if (!(errcap & PCI_ERR_CAP_MHRE) || !aer_log->log_num) {
482         pcie_aer_clear_log(dev);
483         return;
484     }
485 
486     /*
487      * If more errors are queued, set corresponding bits in uncorrectable
488      * error status.
489      * We emulate uncorrectable error status register as W1CS.
490      * So set bit in uncorrectable error status here again for multiple
491      * error recording support.
492      *
493      * 6.2.4.2 Multiple Error Handling(Advanced Error Reporting Capability)
494      */
495     pcie_aer_update_uncor_status(dev);
496 
497     aer_log_del_err(aer_log, &err);
498     pcie_aer_update_log(dev, &err);
499 }
500 
501 static int pcie_aer_record_error(PCIDevice *dev,
502                                  const PCIEAERErr *err)
503 {
504     uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
505     uint32_t errcap = pci_get_long(aer_cap + PCI_ERR_CAP);
506     int fep = PCI_ERR_CAP_FEP(errcap);
507 
508     assert(err->status);
509     assert(!(err->status & (err->status - 1)));
510 
511     if (errcap & PCI_ERR_CAP_MHRE &&
512         (pci_get_long(aer_cap + PCI_ERR_UNCOR_STATUS) & (1U << fep))) {
513         /*  Not first error. queue error */
514         if (aer_log_add_err(&dev->exp.aer_log, err) < 0) {
515             /* overflow */
516             return -1;
517         }
518         return 0;
519     }
520 
521     pcie_aer_update_log(dev, err);
522     return 0;
523 }
524 
525 typedef struct PCIEAERInject {
526     PCIDevice *dev;
527     uint8_t *aer_cap;
528     const PCIEAERErr *err;
529     uint16_t devctl;
530     uint16_t devsta;
531     uint32_t error_status;
532     bool unsupported_request;
533     bool log_overflow;
534     PCIEAERMsg msg;
535 } PCIEAERInject;
536 
537 static bool pcie_aer_inject_cor_error(PCIEAERInject *inj,
538                                       uint32_t uncor_status,
539                                       bool is_advisory_nonfatal)
540 {
541     PCIDevice *dev = inj->dev;
542 
543     inj->devsta |= PCI_EXP_DEVSTA_CED;
544     if (inj->unsupported_request) {
545         inj->devsta |= PCI_EXP_DEVSTA_URD;
546     }
547     pci_set_word(dev->config + dev->exp.exp_cap + PCI_EXP_DEVSTA, inj->devsta);
548 
549     if (inj->aer_cap) {
550         uint32_t mask;
551         pci_long_test_and_set_mask(inj->aer_cap + PCI_ERR_COR_STATUS,
552                                    inj->error_status);
553         mask = pci_get_long(inj->aer_cap + PCI_ERR_COR_MASK);
554         if (mask & inj->error_status) {
555             return false;
556         }
557         if (is_advisory_nonfatal) {
558             uint32_t uncor_mask =
559                 pci_get_long(inj->aer_cap + PCI_ERR_UNCOR_MASK);
560             if (!(uncor_mask & uncor_status)) {
561                 inj->log_overflow = !!pcie_aer_record_error(dev, inj->err);
562             }
563             pci_long_test_and_set_mask(inj->aer_cap + PCI_ERR_UNCOR_STATUS,
564                                        uncor_status);
565         }
566     }
567 
568     if (inj->unsupported_request && !(inj->devctl & PCI_EXP_DEVCTL_URRE)) {
569         return false;
570     }
571     if (!(inj->devctl & PCI_EXP_DEVCTL_CERE)) {
572         return false;
573     }
574 
575     inj->msg.severity = PCI_ERR_ROOT_CMD_COR_EN;
576     return true;
577 }
578 
579 static bool pcie_aer_inject_uncor_error(PCIEAERInject *inj, bool is_fatal)
580 {
581     PCIDevice *dev = inj->dev;
582     uint16_t cmd;
583 
584     if (is_fatal) {
585         inj->devsta |= PCI_EXP_DEVSTA_FED;
586     } else {
587         inj->devsta |= PCI_EXP_DEVSTA_NFED;
588     }
589     if (inj->unsupported_request) {
590         inj->devsta |= PCI_EXP_DEVSTA_URD;
591     }
592     pci_set_long(dev->config + dev->exp.exp_cap + PCI_EXP_DEVSTA, inj->devsta);
593 
594     if (inj->aer_cap) {
595         uint32_t mask = pci_get_long(inj->aer_cap + PCI_ERR_UNCOR_MASK);
596         if (mask & inj->error_status) {
597             pci_long_test_and_set_mask(inj->aer_cap + PCI_ERR_UNCOR_STATUS,
598                                        inj->error_status);
599             return false;
600         }
601 
602         inj->log_overflow = !!pcie_aer_record_error(dev, inj->err);
603         pci_long_test_and_set_mask(inj->aer_cap + PCI_ERR_UNCOR_STATUS,
604                                    inj->error_status);
605     }
606 
607     cmd = pci_get_word(dev->config + PCI_COMMAND);
608     if (inj->unsupported_request &&
609         !(inj->devctl & PCI_EXP_DEVCTL_URRE) && !(cmd & PCI_COMMAND_SERR)) {
610         return false;
611     }
612     if (is_fatal) {
613         if (!((cmd & PCI_COMMAND_SERR) ||
614               (inj->devctl & PCI_EXP_DEVCTL_FERE))) {
615             return false;
616         }
617         inj->msg.severity = PCI_ERR_ROOT_CMD_FATAL_EN;
618     } else {
619         if (!((cmd & PCI_COMMAND_SERR) ||
620               (inj->devctl & PCI_EXP_DEVCTL_NFERE))) {
621             return false;
622         }
623         inj->msg.severity = PCI_ERR_ROOT_CMD_NONFATAL_EN;
624     }
625     return true;
626 }
627 
628 /*
629  * non-Function specific error must be recorded in all functions.
630  * It is the responsibility of the caller of this function.
631  * It is also caller's responsibility to determine which function should
632  * report the error.
633  *
634  * 6.2.4 Error Logging
635  * 6.2.5 Sequence of Device Error Signaling and Logging Operations
636  * Figure 6-2: Flowchart Showing Sequence of Device Error Signaling and Logging
637  *             Operations
638  */
639 int pcie_aer_inject_error(PCIDevice *dev, const PCIEAERErr *err)
640 {
641     uint8_t *aer_cap = NULL;
642     uint16_t devctl = 0;
643     uint16_t devsta = 0;
644     uint32_t error_status = err->status;
645     PCIEAERInject inj;
646 
647     if (!pci_is_express(dev)) {
648         return -ENOSYS;
649     }
650 
651     if (err->flags & PCIE_AER_ERR_IS_CORRECTABLE) {
652         error_status &= PCI_ERR_COR_SUPPORTED;
653     } else {
654         error_status &= PCI_ERR_UNC_SUPPORTED;
655     }
656 
657     /* invalid status bit. one and only one bit must be set */
658     if (!error_status || (error_status & (error_status - 1))) {
659         return -EINVAL;
660     }
661 
662     if (dev->exp.aer_cap) {
663         uint8_t *exp_cap = dev->config + dev->exp.exp_cap;
664         aer_cap = dev->config + dev->exp.aer_cap;
665         devctl = pci_get_long(exp_cap + PCI_EXP_DEVCTL);
666         devsta = pci_get_long(exp_cap + PCI_EXP_DEVSTA);
667     }
668 
669     inj.dev = dev;
670     inj.aer_cap = aer_cap;
671     inj.err = err;
672     inj.devctl = devctl;
673     inj.devsta = devsta;
674     inj.error_status = error_status;
675     inj.unsupported_request = !(err->flags & PCIE_AER_ERR_IS_CORRECTABLE) &&
676         err->status == PCI_ERR_UNC_UNSUP;
677     inj.log_overflow = false;
678 
679     if (err->flags & PCIE_AER_ERR_IS_CORRECTABLE) {
680         if (!pcie_aer_inject_cor_error(&inj, 0, false)) {
681             return 0;
682         }
683     } else {
684         bool is_fatal =
685             pcie_aer_uncor_default_severity(error_status) ==
686             PCI_ERR_ROOT_CMD_FATAL_EN;
687         if (aer_cap) {
688             is_fatal =
689                 error_status & pci_get_long(aer_cap + PCI_ERR_UNCOR_SEVER);
690         }
691         if (!is_fatal && (err->flags & PCIE_AER_ERR_MAYBE_ADVISORY)) {
692             inj.error_status = PCI_ERR_COR_ADV_NONFATAL;
693             if (!pcie_aer_inject_cor_error(&inj, error_status, true)) {
694                 return 0;
695             }
696         } else {
697             if (!pcie_aer_inject_uncor_error(&inj, is_fatal)) {
698                 return 0;
699             }
700         }
701     }
702 
703     /* send up error message */
704     inj.msg.source_id = err->source_id;
705     pcie_aer_msg(dev, &inj.msg);
706 
707     if (inj.log_overflow) {
708         PCIEAERErr header_log_overflow = {
709             .status = PCI_ERR_COR_HL_OVERFLOW,
710             .flags = PCIE_AER_ERR_IS_CORRECTABLE,
711         };
712         int ret = pcie_aer_inject_error(dev, &header_log_overflow);
713         assert(!ret);
714     }
715     return 0;
716 }
717 
718 void pcie_aer_write_config(PCIDevice *dev,
719                            uint32_t addr, uint32_t val, int len)
720 {
721     uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
722     uint32_t errcap = pci_get_long(aer_cap + PCI_ERR_CAP);
723     uint32_t first_error = 1U << PCI_ERR_CAP_FEP(errcap);
724     uint32_t uncorsta = pci_get_long(aer_cap + PCI_ERR_UNCOR_STATUS);
725 
726     /* uncorrectable error */
727     if (!(uncorsta & first_error)) {
728         /* the bit that corresponds to the first error is cleared */
729         pcie_aer_clear_error(dev);
730     } else if (errcap & PCI_ERR_CAP_MHRE) {
731         /* When PCI_ERR_CAP_MHRE is enabled and the first error isn't cleared
732          * nothing should happen. So we have to revert the modification to
733          * the register.
734          */
735         pcie_aer_update_uncor_status(dev);
736     } else {
737         /* capability & control
738          * PCI_ERR_CAP_MHRE might be cleared, so clear of header log.
739          */
740         aer_log_clear_all_err(&dev->exp.aer_log);
741     }
742 }
743 
744 void pcie_aer_root_init(PCIDevice *dev)
745 {
746     uint16_t pos = dev->exp.aer_cap;
747 
748     pci_set_long(dev->wmask + pos + PCI_ERR_ROOT_COMMAND,
749                  PCI_ERR_ROOT_CMD_EN_MASK);
750     pci_set_long(dev->w1cmask + pos + PCI_ERR_ROOT_STATUS,
751                  PCI_ERR_ROOT_STATUS_REPORT_MASK);
752     /* PCI_ERR_ROOT_IRQ is RO but devices change it using a
753      * device-specific method.
754      */
755     pci_set_long(dev->cmask + pos + PCI_ERR_ROOT_STATUS,
756                  ~PCI_ERR_ROOT_IRQ);
757 }
758 
759 void pcie_aer_root_reset(PCIDevice *dev)
760 {
761     uint8_t* aer_cap = dev->config + dev->exp.aer_cap;
762 
763     pci_set_long(aer_cap + PCI_ERR_ROOT_COMMAND, 0);
764 
765     /*
766      * Advanced Error Interrupt Message Number in Root Error Status Register
767      * must be updated by chip dependent code because it's chip dependent
768      * which number is used.
769      */
770 }
771 
772 void pcie_aer_root_write_config(PCIDevice *dev,
773                                 uint32_t addr, uint32_t val, int len,
774                                 uint32_t root_cmd_prev)
775 {
776     uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
777     uint32_t root_status = pci_get_long(aer_cap + PCI_ERR_ROOT_STATUS);
778     uint32_t enabled_cmd = pcie_aer_status_to_cmd(root_status);
779     uint32_t root_cmd = pci_get_long(aer_cap + PCI_ERR_ROOT_COMMAND);
780     /* 6.2.4.1.2 Interrupt Generation */
781     if (!msix_enabled(dev) && !msi_enabled(dev)) {
782         if (pci_intx(dev) != -1) {
783             pci_set_irq(dev, !!(root_cmd & enabled_cmd));
784         }
785         return;
786     }
787 
788     if ((root_cmd_prev & enabled_cmd) || !(root_cmd & enabled_cmd)) {
789         /* Send MSI on transition from false to true. */
790         return;
791     }
792 
793     pcie_aer_root_notify(dev);
794 }
795 
796 static const VMStateDescription vmstate_pcie_aer_err = {
797     .name = "PCIE_AER_ERROR",
798     .version_id = 1,
799     .minimum_version_id = 1,
800     .fields = (const VMStateField[]) {
801         VMSTATE_UINT32(status, PCIEAERErr),
802         VMSTATE_UINT16(source_id, PCIEAERErr),
803         VMSTATE_UINT16(flags, PCIEAERErr),
804         VMSTATE_UINT32_ARRAY(header, PCIEAERErr, 4),
805         VMSTATE_UINT32_ARRAY(prefix, PCIEAERErr, 4),
806         VMSTATE_END_OF_LIST()
807     }
808 };
809 
810 static bool pcie_aer_state_log_num_valid(void *opaque, int version_id)
811 {
812     PCIEAERLog *s = opaque;
813 
814     return s->log_num <= s->log_max;
815 }
816 
817 const VMStateDescription vmstate_pcie_aer_log = {
818     .name = "PCIE_AER_ERROR_LOG",
819     .version_id = 1,
820     .minimum_version_id = 1,
821     .fields = (const VMStateField[]) {
822         VMSTATE_UINT16(log_num, PCIEAERLog),
823         VMSTATE_UINT16_EQUAL(log_max, PCIEAERLog, NULL),
824         VMSTATE_VALIDATE("log_num <= log_max", pcie_aer_state_log_num_valid),
825         VMSTATE_STRUCT_VARRAY_POINTER_UINT16(log, PCIEAERLog, log_num,
826                               vmstate_pcie_aer_err, PCIEAERErr),
827         VMSTATE_END_OF_LIST()
828     }
829 };
830 
831 typedef struct PCIEAERErrorName {
832     const char *name;
833     uint32_t val;
834     bool correctable;
835 } PCIEAERErrorName;
836 
837 /*
838  * AER error name -> value conversion table
839  * This naming scheme is same to linux aer-injection tool.
840  */
841 static const struct PCIEAERErrorName pcie_aer_error_list[] = {
842     {
843         .name = "DLP",
844         .val = PCI_ERR_UNC_DLP,
845         .correctable = false,
846     }, {
847         .name = "SDN",
848         .val = PCI_ERR_UNC_SDN,
849         .correctable = false,
850     }, {
851         .name = "POISON_TLP",
852         .val = PCI_ERR_UNC_POISON_TLP,
853         .correctable = false,
854     }, {
855         .name = "FCP",
856         .val = PCI_ERR_UNC_FCP,
857         .correctable = false,
858     }, {
859         .name = "COMP_TIME",
860         .val = PCI_ERR_UNC_COMP_TIME,
861         .correctable = false,
862     }, {
863         .name = "COMP_ABORT",
864         .val = PCI_ERR_UNC_COMP_ABORT,
865         .correctable = false,
866     }, {
867         .name = "UNX_COMP",
868         .val = PCI_ERR_UNC_UNX_COMP,
869         .correctable = false,
870     }, {
871         .name = "RX_OVER",
872         .val = PCI_ERR_UNC_RX_OVER,
873         .correctable = false,
874     }, {
875         .name = "MALF_TLP",
876         .val = PCI_ERR_UNC_MALF_TLP,
877         .correctable = false,
878     }, {
879         .name = "ECRC",
880         .val = PCI_ERR_UNC_ECRC,
881         .correctable = false,
882     }, {
883         .name = "UNSUP",
884         .val = PCI_ERR_UNC_UNSUP,
885         .correctable = false,
886     }, {
887         .name = "ACSV",
888         .val = PCI_ERR_UNC_ACSV,
889         .correctable = false,
890     }, {
891         .name = "INTN",
892         .val = PCI_ERR_UNC_INTN,
893         .correctable = false,
894     }, {
895         .name = "MCBTLP",
896         .val = PCI_ERR_UNC_MCBTLP,
897         .correctable = false,
898     }, {
899         .name = "ATOP_EBLOCKED",
900         .val = PCI_ERR_UNC_ATOP_EBLOCKED,
901         .correctable = false,
902     }, {
903         .name = "TLP_PRF_BLOCKED",
904         .val = PCI_ERR_UNC_TLP_PRF_BLOCKED,
905         .correctable = false,
906     }, {
907         .name = "RCVR",
908         .val = PCI_ERR_COR_RCVR,
909         .correctable = true,
910     }, {
911         .name = "BAD_TLP",
912         .val = PCI_ERR_COR_BAD_TLP,
913         .correctable = true,
914     }, {
915         .name = "BAD_DLLP",
916         .val = PCI_ERR_COR_BAD_DLLP,
917         .correctable = true,
918     }, {
919         .name = "REP_ROLL",
920         .val = PCI_ERR_COR_REP_ROLL,
921         .correctable = true,
922     }, {
923         .name = "REP_TIMER",
924         .val = PCI_ERR_COR_REP_TIMER,
925         .correctable = true,
926     }, {
927         .name = "ADV_NONFATAL",
928         .val = PCI_ERR_COR_ADV_NONFATAL,
929         .correctable = true,
930     }, {
931         .name = "INTERNAL",
932         .val = PCI_ERR_COR_INTERNAL,
933         .correctable = true,
934     }, {
935         .name = "HL_OVERFLOW",
936         .val = PCI_ERR_COR_HL_OVERFLOW,
937         .correctable = true,
938     },
939 };
940 
941 int pcie_aer_parse_error_string(const char *error_name,
942                                 uint32_t *status, bool *correctable)
943 {
944     int i;
945 
946     for (i = 0; i < ARRAY_SIZE(pcie_aer_error_list); i++) {
947         const  PCIEAERErrorName *e = &pcie_aer_error_list[i];
948         if (strcmp(error_name, e->name)) {
949             continue;
950         }
951 
952         *status = e->val;
953         *correctable = e->correctable;
954         return 0;
955     }
956     return -EINVAL;
957 }
958