xref: /openbmc/qemu/hw/pci/pcie_aer.c (revision d80d761d)
1 /*
2  * pcie_aer.c
3  *
4  * Copyright (c) 2010 Isaku Yamahata <yamahata at valinux co jp>
5  *                    VA Linux Systems Japan K.K.
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License along
18  * with this program; if not, see <http://www.gnu.org/licenses/>.
19  */
20 
21 #include "qemu/osdep.h"
22 #include "migration/vmstate.h"
23 #include "hw/pci/pci_bridge.h"
24 #include "hw/pci/pcie.h"
25 #include "hw/pci/msix.h"
26 #include "hw/pci/msi.h"
27 #include "hw/pci/pci_bus.h"
28 #include "hw/pci/pcie_regs.h"
29 #include "pci-internal.h"
30 
31 //#define DEBUG_PCIE
32 #ifdef DEBUG_PCIE
33 # define PCIE_DPRINTF(fmt, ...)                                         \
34     fprintf(stderr, "%s:%d " fmt, __func__, __LINE__, ## __VA_ARGS__)
35 #else
36 # define PCIE_DPRINTF(fmt, ...) do {} while (0)
37 #endif
38 #define PCIE_DEV_PRINTF(dev, fmt, ...)                                  \
39     PCIE_DPRINTF("%s:%x "fmt, (dev)->name, (dev)->devfn, ## __VA_ARGS__)
40 
41 #define PCI_ERR_SRC_COR_OFFS    0
42 #define PCI_ERR_SRC_UNCOR_OFFS  2
43 
44 /* From 6.2.7 Error Listing and Rules. Table 6-2, 6-3 and 6-4 */
45 static uint32_t pcie_aer_uncor_default_severity(uint32_t status)
46 {
47     switch (status) {
48     case PCI_ERR_UNC_INTN:
49     case PCI_ERR_UNC_DLP:
50     case PCI_ERR_UNC_SDN:
51     case PCI_ERR_UNC_RX_OVER:
52     case PCI_ERR_UNC_FCP:
53     case PCI_ERR_UNC_MALF_TLP:
54         return PCI_ERR_ROOT_CMD_FATAL_EN;
55     case PCI_ERR_UNC_POISON_TLP:
56     case PCI_ERR_UNC_ECRC:
57     case PCI_ERR_UNC_UNSUP:
58     case PCI_ERR_UNC_COMP_TIME:
59     case PCI_ERR_UNC_COMP_ABORT:
60     case PCI_ERR_UNC_UNX_COMP:
61     case PCI_ERR_UNC_ACSV:
62     case PCI_ERR_UNC_MCBTLP:
63     case PCI_ERR_UNC_ATOP_EBLOCKED:
64     case PCI_ERR_UNC_TLP_PRF_BLOCKED:
65         return PCI_ERR_ROOT_CMD_NONFATAL_EN;
66     default:
67         abort();
68         break;
69     }
70     return PCI_ERR_ROOT_CMD_FATAL_EN;
71 }
72 
73 static int aer_log_add_err(PCIEAERLog *aer_log, const PCIEAERErr *err)
74 {
75     if (aer_log->log_num == aer_log->log_max) {
76         return -1;
77     }
78     memcpy(&aer_log->log[aer_log->log_num], err, sizeof *err);
79     aer_log->log_num++;
80     return 0;
81 }
82 
83 static void aer_log_del_err(PCIEAERLog *aer_log, PCIEAERErr *err)
84 {
85     assert(aer_log->log_num);
86     *err = aer_log->log[0];
87     aer_log->log_num--;
88     memmove(&aer_log->log[0], &aer_log->log[1],
89             aer_log->log_num * sizeof *err);
90 }
91 
92 static void aer_log_clear_all_err(PCIEAERLog *aer_log)
93 {
94     aer_log->log_num = 0;
95 }
96 
97 int pcie_aer_init(PCIDevice *dev, uint8_t cap_ver, uint16_t offset,
98                   uint16_t size, Error **errp)
99 {
100     pcie_add_capability(dev, PCI_EXT_CAP_ID_ERR, cap_ver,
101                         offset, size);
102     dev->exp.aer_cap = offset;
103 
104     /* clip down the value to avoid unreasonable memory usage */
105     if (dev->exp.aer_log.log_max > PCIE_AER_LOG_MAX_LIMIT) {
106         error_setg(errp, "Invalid aer_log_max %d. The max number of aer log "
107                 "is %d", dev->exp.aer_log.log_max, PCIE_AER_LOG_MAX_LIMIT);
108         return -EINVAL;
109     }
110     dev->exp.aer_log.log = g_malloc0(sizeof dev->exp.aer_log.log[0] *
111                                         dev->exp.aer_log.log_max);
112 
113     pci_set_long(dev->w1cmask + offset + PCI_ERR_UNCOR_STATUS,
114                  PCI_ERR_UNC_SUPPORTED);
115     pci_set_long(dev->config + offset + PCI_ERR_UNCOR_MASK,
116                  PCI_ERR_UNC_MASK_DEFAULT);
117     pci_set_long(dev->wmask + offset + PCI_ERR_UNCOR_MASK,
118                  PCI_ERR_UNC_SUPPORTED);
119 
120     pci_set_long(dev->config + offset + PCI_ERR_UNCOR_SEVER,
121                  PCI_ERR_UNC_SEVERITY_DEFAULT);
122     pci_set_long(dev->wmask + offset + PCI_ERR_UNCOR_SEVER,
123                  PCI_ERR_UNC_SUPPORTED);
124 
125     pci_long_test_and_set_mask(dev->w1cmask + offset + PCI_ERR_COR_STATUS,
126                                PCI_ERR_COR_SUPPORTED);
127 
128     pci_set_long(dev->config + offset + PCI_ERR_COR_MASK,
129                  PCI_ERR_COR_MASK_DEFAULT);
130     pci_set_long(dev->wmask + offset + PCI_ERR_COR_MASK,
131                  PCI_ERR_COR_SUPPORTED);
132 
133     /* capabilities and control. multiple header logging is supported */
134     if (dev->exp.aer_log.log_max > 0) {
135         pci_set_long(dev->config + offset + PCI_ERR_CAP,
136                      PCI_ERR_CAP_ECRC_GENC | PCI_ERR_CAP_ECRC_CHKC |
137                      PCI_ERR_CAP_MHRC);
138         pci_set_long(dev->wmask + offset + PCI_ERR_CAP,
139                      PCI_ERR_CAP_ECRC_GENE | PCI_ERR_CAP_ECRC_CHKE |
140                      PCI_ERR_CAP_MHRE);
141     } else {
142         pci_set_long(dev->config + offset + PCI_ERR_CAP,
143                      PCI_ERR_CAP_ECRC_GENC | PCI_ERR_CAP_ECRC_CHKC);
144         pci_set_long(dev->wmask + offset + PCI_ERR_CAP,
145                      PCI_ERR_CAP_ECRC_GENE | PCI_ERR_CAP_ECRC_CHKE);
146     }
147 
148     switch (pcie_cap_get_type(dev)) {
149     case PCI_EXP_TYPE_ROOT_PORT:
150         /* this case will be set by pcie_aer_root_init() */
151         /* fallthrough */
152     case PCI_EXP_TYPE_DOWNSTREAM:
153     case PCI_EXP_TYPE_UPSTREAM:
154         pci_word_test_and_set_mask(dev->wmask + PCI_BRIDGE_CONTROL,
155                                    PCI_BRIDGE_CTL_SERR);
156         pci_long_test_and_set_mask(dev->w1cmask + PCI_STATUS,
157                                    PCI_SEC_STATUS_RCV_SYSTEM_ERROR);
158         break;
159     default:
160         /* nothing */
161         break;
162     }
163     return 0;
164 }
165 
166 void pcie_aer_exit(PCIDevice *dev)
167 {
168     g_free(dev->exp.aer_log.log);
169 }
170 
171 static void pcie_aer_update_uncor_status(PCIDevice *dev)
172 {
173     uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
174     PCIEAERLog *aer_log = &dev->exp.aer_log;
175 
176     uint16_t i;
177     for (i = 0; i < aer_log->log_num; i++) {
178         pci_long_test_and_set_mask(aer_cap + PCI_ERR_UNCOR_STATUS,
179                                    dev->exp.aer_log.log[i].status);
180     }
181 }
182 
183 /*
184  * return value:
185  * true: error message needs to be sent up
186  * false: error message is masked
187  *
188  * 6.2.6 Error Message Control
189  * Figure 6-3
190  * all pci express devices part
191  */
192 static bool
193 pcie_aer_msg_alldev(PCIDevice *dev, const PCIEAERMsg *msg)
194 {
195     uint16_t devctl = pci_get_word(dev->config + dev->exp.exp_cap +
196                                    PCI_EXP_DEVCTL);
197     if (!(pcie_aer_msg_is_uncor(msg) &&
198           (pci_get_word(dev->config + PCI_COMMAND) & PCI_COMMAND_SERR)) &&
199         !((msg->severity == PCI_ERR_ROOT_CMD_NONFATAL_EN) &&
200           (devctl & PCI_EXP_DEVCTL_NFERE)) &&
201         !((msg->severity == PCI_ERR_ROOT_CMD_COR_EN) &&
202           (devctl & PCI_EXP_DEVCTL_CERE)) &&
203         !((msg->severity == PCI_ERR_ROOT_CMD_FATAL_EN) &&
204           (devctl & PCI_EXP_DEVCTL_FERE))) {
205         return false;
206     }
207 
208     /* Signaled System Error
209      *
210      * 7.5.1.1 Command register
211      * Bit 8 SERR# Enable
212      *
213      * When Set, this bit enables reporting of Non-fatal and Fatal
214      * errors detected by the Function to the Root Complex. Note that
215      * errors are reported if enabled either through this bit or through
216      * the PCI Express specific bits in the Device Control register (see
217      * Section 7.8.4).
218      */
219     pci_word_test_and_set_mask(dev->config + PCI_STATUS,
220                                PCI_STATUS_SIG_SYSTEM_ERROR);
221 
222     if (!(msg->severity &
223           pci_get_word(dev->config + dev->exp.exp_cap + PCI_EXP_DEVCTL))) {
224         return false;
225     }
226 
227     /* send up error message */
228     return true;
229 }
230 
231 /*
232  * return value:
233  * true: error message is sent up
234  * false: error message is masked
235  *
236  * 6.2.6 Error Message Control
237  * Figure 6-3
238  * virtual pci bridge part
239  */
240 static bool pcie_aer_msg_vbridge(PCIDevice *dev, const PCIEAERMsg *msg)
241 {
242     uint16_t bridge_control = pci_get_word(dev->config + PCI_BRIDGE_CONTROL);
243 
244     if (pcie_aer_msg_is_uncor(msg)) {
245         /* Received System Error */
246         pci_word_test_and_set_mask(dev->config + PCI_SEC_STATUS,
247                                    PCI_SEC_STATUS_RCV_SYSTEM_ERROR);
248     }
249 
250     if (!(bridge_control & PCI_BRIDGE_CTL_SERR)) {
251         return false;
252     }
253     return true;
254 }
255 
256 void pcie_aer_root_set_vector(PCIDevice *dev, unsigned int vector)
257 {
258     uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
259     assert(vector < PCI_ERR_ROOT_IRQ_MAX);
260     pci_long_test_and_clear_mask(aer_cap + PCI_ERR_ROOT_STATUS,
261                                  PCI_ERR_ROOT_IRQ);
262     pci_long_test_and_set_mask(aer_cap + PCI_ERR_ROOT_STATUS,
263                                vector << PCI_ERR_ROOT_IRQ_SHIFT);
264 }
265 
266 static unsigned int pcie_aer_root_get_vector(PCIDevice *dev)
267 {
268     uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
269     uint32_t root_status = pci_get_long(aer_cap + PCI_ERR_ROOT_STATUS);
270     return (root_status & PCI_ERR_ROOT_IRQ) >> PCI_ERR_ROOT_IRQ_SHIFT;
271 }
272 
273 /* Given a status register, get corresponding bits in the command register */
274 static uint32_t pcie_aer_status_to_cmd(uint32_t status)
275 {
276     uint32_t cmd = 0;
277     if (status & PCI_ERR_ROOT_COR_RCV) {
278         cmd |= PCI_ERR_ROOT_CMD_COR_EN;
279     }
280     if (status & PCI_ERR_ROOT_NONFATAL_RCV) {
281         cmd |= PCI_ERR_ROOT_CMD_NONFATAL_EN;
282     }
283     if (status & PCI_ERR_ROOT_FATAL_RCV) {
284         cmd |= PCI_ERR_ROOT_CMD_FATAL_EN;
285     }
286     return cmd;
287 }
288 
289 static void pcie_aer_root_notify(PCIDevice *dev)
290 {
291     if (msix_enabled(dev)) {
292         msix_notify(dev, pcie_aer_root_get_vector(dev));
293     } else if (msi_enabled(dev)) {
294         msi_notify(dev, pcie_aer_root_get_vector(dev));
295     } else if (pci_intx(dev) != -1) {
296         pci_irq_assert(dev);
297     }
298 }
299 
300 /*
301  * 6.2.6 Error Message Control
302  * Figure 6-3
303  * root port part
304  */
305 static void pcie_aer_msg_root_port(PCIDevice *dev, const PCIEAERMsg *msg)
306 {
307     uint16_t cmd;
308     uint8_t *aer_cap;
309     uint32_t root_cmd;
310     uint32_t root_status, prev_status;
311 
312     cmd = pci_get_word(dev->config + PCI_COMMAND);
313     aer_cap = dev->config + dev->exp.aer_cap;
314     root_cmd = pci_get_long(aer_cap + PCI_ERR_ROOT_COMMAND);
315     prev_status = root_status = pci_get_long(aer_cap + PCI_ERR_ROOT_STATUS);
316 
317     if (cmd & PCI_COMMAND_SERR) {
318         /* System Error.
319          *
320          * The way to report System Error is platform specific and
321          * it isn't implemented in qemu right now.
322          * So just discard the error for now.
323          * OS which cares of aer would receive errors via
324          * native aer mechanims, so this wouldn't matter.
325          */
326     }
327 
328     /* Error Message Received: Root Error Status register */
329     switch (msg->severity) {
330     case PCI_ERR_ROOT_CMD_COR_EN:
331         if (root_status & PCI_ERR_ROOT_COR_RCV) {
332             root_status |= PCI_ERR_ROOT_MULTI_COR_RCV;
333         } else {
334             pci_set_word(aer_cap + PCI_ERR_ROOT_ERR_SRC + PCI_ERR_SRC_COR_OFFS,
335                          msg->source_id);
336         }
337         root_status |= PCI_ERR_ROOT_COR_RCV;
338         break;
339     case PCI_ERR_ROOT_CMD_NONFATAL_EN:
340         root_status |= PCI_ERR_ROOT_NONFATAL_RCV;
341         break;
342     case PCI_ERR_ROOT_CMD_FATAL_EN:
343         if (!(root_status & PCI_ERR_ROOT_UNCOR_RCV)) {
344             root_status |= PCI_ERR_ROOT_FIRST_FATAL;
345         }
346         root_status |= PCI_ERR_ROOT_FATAL_RCV;
347         break;
348     default:
349         abort();
350         break;
351     }
352     if (pcie_aer_msg_is_uncor(msg)) {
353         if (root_status & PCI_ERR_ROOT_UNCOR_RCV) {
354             root_status |= PCI_ERR_ROOT_MULTI_UNCOR_RCV;
355         } else {
356             pci_set_word(aer_cap + PCI_ERR_ROOT_ERR_SRC +
357                          PCI_ERR_SRC_UNCOR_OFFS, msg->source_id);
358         }
359         root_status |= PCI_ERR_ROOT_UNCOR_RCV;
360     }
361     pci_set_long(aer_cap + PCI_ERR_ROOT_STATUS, root_status);
362 
363     /* 6.2.4.1.2 Interrupt Generation */
364     /* All the above did was set some bits in the status register.
365      * Specifically these that match message severity.
366      * The below code relies on this fact. */
367     if (!(root_cmd & msg->severity) ||
368         (pcie_aer_status_to_cmd(prev_status) & root_cmd)) {
369         /* Condition is not being set or was already true so nothing to do. */
370         return;
371     }
372 
373     pcie_aer_root_notify(dev);
374 }
375 
376 /*
377  * 6.2.6 Error Message Control Figure 6-3
378  *
379  * Walk up the bus tree from the device, propagate the error message.
380  */
381 static void pcie_aer_msg(PCIDevice *dev, const PCIEAERMsg *msg)
382 {
383     uint8_t type;
384 
385     while (dev) {
386         if (!pci_is_express(dev)) {
387             /* just ignore it */
388             /* TODO: Shouldn't we set PCI_STATUS_SIG_SYSTEM_ERROR?
389              * Consider e.g. a PCI bridge above a PCI Express device. */
390             return;
391         }
392 
393         type = pcie_cap_get_type(dev);
394         if ((type == PCI_EXP_TYPE_ROOT_PORT ||
395             type == PCI_EXP_TYPE_UPSTREAM ||
396             type == PCI_EXP_TYPE_DOWNSTREAM) &&
397             !pcie_aer_msg_vbridge(dev, msg)) {
398                 return;
399         }
400         if (!pcie_aer_msg_alldev(dev, msg)) {
401             return;
402         }
403         if (type == PCI_EXP_TYPE_ROOT_PORT) {
404             pcie_aer_msg_root_port(dev, msg);
405             /* Root port can notify system itself,
406                or send the error message to root complex event collector. */
407             /*
408              * if root port is associated with an event collector,
409              * return the root complex event collector here.
410              * For now root complex event collector isn't supported.
411              */
412             return;
413         }
414         dev = pci_bridge_get_device(pci_get_bus(dev));
415     }
416 }
417 
418 static void pcie_aer_update_log(PCIDevice *dev, const PCIEAERErr *err)
419 {
420     uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
421     uint8_t first_bit = ctz32(err->status);
422     uint32_t errcap = pci_get_long(aer_cap + PCI_ERR_CAP);
423     int i;
424 
425     assert(err->status);
426     assert(!(err->status & (err->status - 1)));
427 
428     errcap &= ~(PCI_ERR_CAP_FEP_MASK | PCI_ERR_CAP_TLP);
429     errcap |= PCI_ERR_CAP_FEP(first_bit);
430 
431     if (err->flags & PCIE_AER_ERR_HEADER_VALID) {
432         for (i = 0; i < ARRAY_SIZE(err->header); ++i) {
433             /* 7.10.8 Header Log Register */
434             uint8_t *header_log =
435                 aer_cap + PCI_ERR_HEADER_LOG + i * sizeof err->header[0];
436             stl_be_p(header_log, err->header[i]);
437         }
438     } else {
439         assert(!(err->flags & PCIE_AER_ERR_TLP_PREFIX_PRESENT));
440         memset(aer_cap + PCI_ERR_HEADER_LOG, 0, PCI_ERR_HEADER_LOG_SIZE);
441     }
442 
443     if ((err->flags & PCIE_AER_ERR_TLP_PREFIX_PRESENT) &&
444         (pci_get_long(dev->config + dev->exp.exp_cap + PCI_EXP_DEVCAP2) &
445          PCI_EXP_DEVCAP2_EETLPP)) {
446         for (i = 0; i < ARRAY_SIZE(err->prefix); ++i) {
447             /* 7.10.12 tlp prefix log register */
448             uint8_t *prefix_log =
449                 aer_cap + PCI_ERR_TLP_PREFIX_LOG + i * sizeof err->prefix[0];
450             stl_be_p(prefix_log, err->prefix[i]);
451         }
452         errcap |= PCI_ERR_CAP_TLP;
453     } else {
454         memset(aer_cap + PCI_ERR_TLP_PREFIX_LOG, 0,
455                PCI_ERR_TLP_PREFIX_LOG_SIZE);
456     }
457     pci_set_long(aer_cap + PCI_ERR_CAP, errcap);
458 }
459 
460 static void pcie_aer_clear_log(PCIDevice *dev)
461 {
462     uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
463 
464     pci_long_test_and_clear_mask(aer_cap + PCI_ERR_CAP,
465                                  PCI_ERR_CAP_FEP_MASK | PCI_ERR_CAP_TLP);
466 
467     memset(aer_cap + PCI_ERR_HEADER_LOG, 0, PCI_ERR_HEADER_LOG_SIZE);
468     memset(aer_cap + PCI_ERR_TLP_PREFIX_LOG, 0, PCI_ERR_TLP_PREFIX_LOG_SIZE);
469 }
470 
471 static void pcie_aer_clear_error(PCIDevice *dev)
472 {
473     uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
474     uint32_t errcap = pci_get_long(aer_cap + PCI_ERR_CAP);
475     PCIEAERLog *aer_log = &dev->exp.aer_log;
476     PCIEAERErr err;
477 
478     if (!(errcap & PCI_ERR_CAP_MHRE) || !aer_log->log_num) {
479         pcie_aer_clear_log(dev);
480         return;
481     }
482 
483     /*
484      * If more errors are queued, set corresponding bits in uncorrectable
485      * error status.
486      * We emulate uncorrectable error status register as W1CS.
487      * So set bit in uncorrectable error status here again for multiple
488      * error recording support.
489      *
490      * 6.2.4.2 Multiple Error Handling(Advanced Error Reporting Capability)
491      */
492     pcie_aer_update_uncor_status(dev);
493 
494     aer_log_del_err(aer_log, &err);
495     pcie_aer_update_log(dev, &err);
496 }
497 
498 static int pcie_aer_record_error(PCIDevice *dev,
499                                  const PCIEAERErr *err)
500 {
501     uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
502     uint32_t errcap = pci_get_long(aer_cap + PCI_ERR_CAP);
503     int fep = PCI_ERR_CAP_FEP(errcap);
504 
505     assert(err->status);
506     assert(!(err->status & (err->status - 1)));
507 
508     if (errcap & PCI_ERR_CAP_MHRE &&
509         (pci_get_long(aer_cap + PCI_ERR_UNCOR_STATUS) & (1U << fep))) {
510         /*  Not first error. queue error */
511         if (aer_log_add_err(&dev->exp.aer_log, err) < 0) {
512             /* overflow */
513             return -1;
514         }
515         return 0;
516     }
517 
518     pcie_aer_update_log(dev, err);
519     return 0;
520 }
521 
522 typedef struct PCIEAERInject {
523     PCIDevice *dev;
524     uint8_t *aer_cap;
525     const PCIEAERErr *err;
526     uint16_t devctl;
527     uint16_t devsta;
528     uint32_t error_status;
529     bool unsupported_request;
530     bool log_overflow;
531     PCIEAERMsg msg;
532 } PCIEAERInject;
533 
534 static bool pcie_aer_inject_cor_error(PCIEAERInject *inj,
535                                       uint32_t uncor_status,
536                                       bool is_advisory_nonfatal)
537 {
538     PCIDevice *dev = inj->dev;
539 
540     inj->devsta |= PCI_EXP_DEVSTA_CED;
541     if (inj->unsupported_request) {
542         inj->devsta |= PCI_EXP_DEVSTA_URD;
543     }
544     pci_set_word(dev->config + dev->exp.exp_cap + PCI_EXP_DEVSTA, inj->devsta);
545 
546     if (inj->aer_cap) {
547         uint32_t mask;
548         pci_long_test_and_set_mask(inj->aer_cap + PCI_ERR_COR_STATUS,
549                                    inj->error_status);
550         mask = pci_get_long(inj->aer_cap + PCI_ERR_COR_MASK);
551         if (mask & inj->error_status) {
552             return false;
553         }
554         if (is_advisory_nonfatal) {
555             uint32_t uncor_mask =
556                 pci_get_long(inj->aer_cap + PCI_ERR_UNCOR_MASK);
557             if (!(uncor_mask & uncor_status)) {
558                 inj->log_overflow = !!pcie_aer_record_error(dev, inj->err);
559             }
560             pci_long_test_and_set_mask(inj->aer_cap + PCI_ERR_UNCOR_STATUS,
561                                        uncor_status);
562         }
563     }
564 
565     if (inj->unsupported_request && !(inj->devctl & PCI_EXP_DEVCTL_URRE)) {
566         return false;
567     }
568     if (!(inj->devctl & PCI_EXP_DEVCTL_CERE)) {
569         return false;
570     }
571 
572     inj->msg.severity = PCI_ERR_ROOT_CMD_COR_EN;
573     return true;
574 }
575 
576 static bool pcie_aer_inject_uncor_error(PCIEAERInject *inj, bool is_fatal)
577 {
578     PCIDevice *dev = inj->dev;
579     uint16_t cmd;
580 
581     if (is_fatal) {
582         inj->devsta |= PCI_EXP_DEVSTA_FED;
583     } else {
584         inj->devsta |= PCI_EXP_DEVSTA_NFED;
585     }
586     if (inj->unsupported_request) {
587         inj->devsta |= PCI_EXP_DEVSTA_URD;
588     }
589     pci_set_long(dev->config + dev->exp.exp_cap + PCI_EXP_DEVSTA, inj->devsta);
590 
591     if (inj->aer_cap) {
592         uint32_t mask = pci_get_long(inj->aer_cap + PCI_ERR_UNCOR_MASK);
593         if (mask & inj->error_status) {
594             pci_long_test_and_set_mask(inj->aer_cap + PCI_ERR_UNCOR_STATUS,
595                                        inj->error_status);
596             return false;
597         }
598 
599         inj->log_overflow = !!pcie_aer_record_error(dev, inj->err);
600         pci_long_test_and_set_mask(inj->aer_cap + PCI_ERR_UNCOR_STATUS,
601                                    inj->error_status);
602     }
603 
604     cmd = pci_get_word(dev->config + PCI_COMMAND);
605     if (inj->unsupported_request &&
606         !(inj->devctl & PCI_EXP_DEVCTL_URRE) && !(cmd & PCI_COMMAND_SERR)) {
607         return false;
608     }
609     if (is_fatal) {
610         if (!((cmd & PCI_COMMAND_SERR) ||
611               (inj->devctl & PCI_EXP_DEVCTL_FERE))) {
612             return false;
613         }
614         inj->msg.severity = PCI_ERR_ROOT_CMD_FATAL_EN;
615     } else {
616         if (!((cmd & PCI_COMMAND_SERR) ||
617               (inj->devctl & PCI_EXP_DEVCTL_NFERE))) {
618             return false;
619         }
620         inj->msg.severity = PCI_ERR_ROOT_CMD_NONFATAL_EN;
621     }
622     return true;
623 }
624 
625 /*
626  * non-Function specific error must be recorded in all functions.
627  * It is the responsibility of the caller of this function.
628  * It is also caller's responsibility to determine which function should
629  * report the error.
630  *
631  * 6.2.4 Error Logging
632  * 6.2.5 Sequence of Device Error Signaling and Logging Operations
633  * Figure 6-2: Flowchart Showing Sequence of Device Error Signaling and Logging
634  *             Operations
635  */
636 int pcie_aer_inject_error(PCIDevice *dev, const PCIEAERErr *err)
637 {
638     uint8_t *aer_cap = NULL;
639     uint16_t devctl = 0;
640     uint16_t devsta = 0;
641     uint32_t error_status = err->status;
642     PCIEAERInject inj;
643 
644     if (!pci_is_express(dev)) {
645         return -ENOSYS;
646     }
647 
648     if (err->flags & PCIE_AER_ERR_IS_CORRECTABLE) {
649         error_status &= PCI_ERR_COR_SUPPORTED;
650     } else {
651         error_status &= PCI_ERR_UNC_SUPPORTED;
652     }
653 
654     /* invalid status bit. one and only one bit must be set */
655     if (!error_status || (error_status & (error_status - 1))) {
656         return -EINVAL;
657     }
658 
659     if (dev->exp.aer_cap) {
660         uint8_t *exp_cap = dev->config + dev->exp.exp_cap;
661         aer_cap = dev->config + dev->exp.aer_cap;
662         devctl = pci_get_long(exp_cap + PCI_EXP_DEVCTL);
663         devsta = pci_get_long(exp_cap + PCI_EXP_DEVSTA);
664     }
665 
666     inj.dev = dev;
667     inj.aer_cap = aer_cap;
668     inj.err = err;
669     inj.devctl = devctl;
670     inj.devsta = devsta;
671     inj.error_status = error_status;
672     inj.unsupported_request = !(err->flags & PCIE_AER_ERR_IS_CORRECTABLE) &&
673         err->status == PCI_ERR_UNC_UNSUP;
674     inj.log_overflow = false;
675 
676     if (err->flags & PCIE_AER_ERR_IS_CORRECTABLE) {
677         if (!pcie_aer_inject_cor_error(&inj, 0, false)) {
678             return 0;
679         }
680     } else {
681         bool is_fatal =
682             pcie_aer_uncor_default_severity(error_status) ==
683             PCI_ERR_ROOT_CMD_FATAL_EN;
684         if (aer_cap) {
685             is_fatal =
686                 error_status & pci_get_long(aer_cap + PCI_ERR_UNCOR_SEVER);
687         }
688         if (!is_fatal && (err->flags & PCIE_AER_ERR_MAYBE_ADVISORY)) {
689             inj.error_status = PCI_ERR_COR_ADV_NONFATAL;
690             if (!pcie_aer_inject_cor_error(&inj, error_status, true)) {
691                 return 0;
692             }
693         } else {
694             if (!pcie_aer_inject_uncor_error(&inj, is_fatal)) {
695                 return 0;
696             }
697         }
698     }
699 
700     /* send up error message */
701     inj.msg.source_id = err->source_id;
702     pcie_aer_msg(dev, &inj.msg);
703 
704     if (inj.log_overflow) {
705         PCIEAERErr header_log_overflow = {
706             .status = PCI_ERR_COR_HL_OVERFLOW,
707             .flags = PCIE_AER_ERR_IS_CORRECTABLE,
708         };
709         int ret = pcie_aer_inject_error(dev, &header_log_overflow);
710         assert(!ret);
711     }
712     return 0;
713 }
714 
715 void pcie_aer_write_config(PCIDevice *dev,
716                            uint32_t addr, uint32_t val, int len)
717 {
718     uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
719     uint32_t errcap = pci_get_long(aer_cap + PCI_ERR_CAP);
720     uint32_t first_error = 1U << PCI_ERR_CAP_FEP(errcap);
721     uint32_t uncorsta = pci_get_long(aer_cap + PCI_ERR_UNCOR_STATUS);
722 
723     /* uncorrectable error */
724     if (!(uncorsta & first_error)) {
725         /* the bit that corresponds to the first error is cleared */
726         pcie_aer_clear_error(dev);
727     } else if (errcap & PCI_ERR_CAP_MHRE) {
728         /* When PCI_ERR_CAP_MHRE is enabled and the first error isn't cleared
729          * nothing should happen. So we have to revert the modification to
730          * the register.
731          */
732         pcie_aer_update_uncor_status(dev);
733     } else {
734         /* capability & control
735          * PCI_ERR_CAP_MHRE might be cleared, so clear of header log.
736          */
737         aer_log_clear_all_err(&dev->exp.aer_log);
738     }
739 }
740 
741 void pcie_aer_root_init(PCIDevice *dev)
742 {
743     uint16_t pos = dev->exp.aer_cap;
744 
745     pci_set_long(dev->wmask + pos + PCI_ERR_ROOT_COMMAND,
746                  PCI_ERR_ROOT_CMD_EN_MASK);
747     pci_set_long(dev->w1cmask + pos + PCI_ERR_ROOT_STATUS,
748                  PCI_ERR_ROOT_STATUS_REPORT_MASK);
749     /* PCI_ERR_ROOT_IRQ is RO but devices change it using a
750      * device-specific method.
751      */
752     pci_set_long(dev->cmask + pos + PCI_ERR_ROOT_STATUS,
753                  ~PCI_ERR_ROOT_IRQ);
754 }
755 
756 void pcie_aer_root_reset(PCIDevice *dev)
757 {
758     uint8_t* aer_cap = dev->config + dev->exp.aer_cap;
759 
760     pci_set_long(aer_cap + PCI_ERR_ROOT_COMMAND, 0);
761 
762     /*
763      * Advanced Error Interrupt Message Number in Root Error Status Register
764      * must be updated by chip dependent code because it's chip dependent
765      * which number is used.
766      */
767 }
768 
769 void pcie_aer_root_write_config(PCIDevice *dev,
770                                 uint32_t addr, uint32_t val, int len,
771                                 uint32_t root_cmd_prev)
772 {
773     uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
774     uint32_t root_status = pci_get_long(aer_cap + PCI_ERR_ROOT_STATUS);
775     uint32_t enabled_cmd = pcie_aer_status_to_cmd(root_status);
776     uint32_t root_cmd = pci_get_long(aer_cap + PCI_ERR_ROOT_COMMAND);
777     /* 6.2.4.1.2 Interrupt Generation */
778     if (!msix_enabled(dev) && !msi_enabled(dev)) {
779         if (pci_intx(dev) != -1) {
780             pci_set_irq(dev, !!(root_cmd & enabled_cmd));
781         }
782         return;
783     }
784 
785     if ((root_cmd_prev & enabled_cmd) || !(root_cmd & enabled_cmd)) {
786         /* Send MSI on transition from false to true. */
787         return;
788     }
789 
790     pcie_aer_root_notify(dev);
791 }
792 
793 static const VMStateDescription vmstate_pcie_aer_err = {
794     .name = "PCIE_AER_ERROR",
795     .version_id = 1,
796     .minimum_version_id = 1,
797     .fields = (VMStateField[]) {
798         VMSTATE_UINT32(status, PCIEAERErr),
799         VMSTATE_UINT16(source_id, PCIEAERErr),
800         VMSTATE_UINT16(flags, PCIEAERErr),
801         VMSTATE_UINT32_ARRAY(header, PCIEAERErr, 4),
802         VMSTATE_UINT32_ARRAY(prefix, PCIEAERErr, 4),
803         VMSTATE_END_OF_LIST()
804     }
805 };
806 
807 static bool pcie_aer_state_log_num_valid(void *opaque, int version_id)
808 {
809     PCIEAERLog *s = opaque;
810 
811     return s->log_num <= s->log_max;
812 }
813 
814 const VMStateDescription vmstate_pcie_aer_log = {
815     .name = "PCIE_AER_ERROR_LOG",
816     .version_id = 1,
817     .minimum_version_id = 1,
818     .fields = (VMStateField[]) {
819         VMSTATE_UINT16(log_num, PCIEAERLog),
820         VMSTATE_UINT16_EQUAL(log_max, PCIEAERLog, NULL),
821         VMSTATE_VALIDATE("log_num <= log_max", pcie_aer_state_log_num_valid),
822         VMSTATE_STRUCT_VARRAY_POINTER_UINT16(log, PCIEAERLog, log_num,
823                               vmstate_pcie_aer_err, PCIEAERErr),
824         VMSTATE_END_OF_LIST()
825     }
826 };
827 
828 typedef struct PCIEAERErrorName {
829     const char *name;
830     uint32_t val;
831     bool correctable;
832 } PCIEAERErrorName;
833 
834 /*
835  * AER error name -> value conversion table
836  * This naming scheme is same to linux aer-injection tool.
837  */
838 static const struct PCIEAERErrorName pcie_aer_error_list[] = {
839     {
840         .name = "DLP",
841         .val = PCI_ERR_UNC_DLP,
842         .correctable = false,
843     }, {
844         .name = "SDN",
845         .val = PCI_ERR_UNC_SDN,
846         .correctable = false,
847     }, {
848         .name = "POISON_TLP",
849         .val = PCI_ERR_UNC_POISON_TLP,
850         .correctable = false,
851     }, {
852         .name = "FCP",
853         .val = PCI_ERR_UNC_FCP,
854         .correctable = false,
855     }, {
856         .name = "COMP_TIME",
857         .val = PCI_ERR_UNC_COMP_TIME,
858         .correctable = false,
859     }, {
860         .name = "COMP_ABORT",
861         .val = PCI_ERR_UNC_COMP_ABORT,
862         .correctable = false,
863     }, {
864         .name = "UNX_COMP",
865         .val = PCI_ERR_UNC_UNX_COMP,
866         .correctable = false,
867     }, {
868         .name = "RX_OVER",
869         .val = PCI_ERR_UNC_RX_OVER,
870         .correctable = false,
871     }, {
872         .name = "MALF_TLP",
873         .val = PCI_ERR_UNC_MALF_TLP,
874         .correctable = false,
875     }, {
876         .name = "ECRC",
877         .val = PCI_ERR_UNC_ECRC,
878         .correctable = false,
879     }, {
880         .name = "UNSUP",
881         .val = PCI_ERR_UNC_UNSUP,
882         .correctable = false,
883     }, {
884         .name = "ACSV",
885         .val = PCI_ERR_UNC_ACSV,
886         .correctable = false,
887     }, {
888         .name = "INTN",
889         .val = PCI_ERR_UNC_INTN,
890         .correctable = false,
891     }, {
892         .name = "MCBTLP",
893         .val = PCI_ERR_UNC_MCBTLP,
894         .correctable = false,
895     }, {
896         .name = "ATOP_EBLOCKED",
897         .val = PCI_ERR_UNC_ATOP_EBLOCKED,
898         .correctable = false,
899     }, {
900         .name = "TLP_PRF_BLOCKED",
901         .val = PCI_ERR_UNC_TLP_PRF_BLOCKED,
902         .correctable = false,
903     }, {
904         .name = "RCVR",
905         .val = PCI_ERR_COR_RCVR,
906         .correctable = true,
907     }, {
908         .name = "BAD_TLP",
909         .val = PCI_ERR_COR_BAD_TLP,
910         .correctable = true,
911     }, {
912         .name = "BAD_DLLP",
913         .val = PCI_ERR_COR_BAD_DLLP,
914         .correctable = true,
915     }, {
916         .name = "REP_ROLL",
917         .val = PCI_ERR_COR_REP_ROLL,
918         .correctable = true,
919     }, {
920         .name = "REP_TIMER",
921         .val = PCI_ERR_COR_REP_TIMER,
922         .correctable = true,
923     }, {
924         .name = "ADV_NONFATAL",
925         .val = PCI_ERR_COR_ADV_NONFATAL,
926         .correctable = true,
927     }, {
928         .name = "INTERNAL",
929         .val = PCI_ERR_COR_INTERNAL,
930         .correctable = true,
931     }, {
932         .name = "HL_OVERFLOW",
933         .val = PCI_ERR_COR_HL_OVERFLOW,
934         .correctable = true,
935     },
936 };
937 
938 int pcie_aer_parse_error_string(const char *error_name,
939                                 uint32_t *status, bool *correctable)
940 {
941     int i;
942 
943     for (i = 0; i < ARRAY_SIZE(pcie_aer_error_list); i++) {
944         const  PCIEAERErrorName *e = &pcie_aer_error_list[i];
945         if (strcmp(error_name, e->name)) {
946             continue;
947         }
948 
949         *status = e->val;
950         *correctable = e->correctable;
951         return 0;
952     }
953     return -EINVAL;
954 }
955