xref: /openbmc/qemu/hw/mem/cxl_type3.c (revision 9eb9350c0e519be97716f6b27f664bd0a3c41a36)
1 /*
2  * CXL Type 3 (memory expander) device
3  *
4  * Copyright(C) 2020 Intel Corporation.
5  *
6  * This work is licensed under the terms of the GNU GPL, version 2. See the
7  * COPYING file in the top-level directory.
8  *
9  * SPDX-License-Identifier: GPL-v2-only
10  */
11 
12 #include "qemu/osdep.h"
13 #include "qemu/units.h"
14 #include "qemu/error-report.h"
15 #include "qapi/qapi-commands-cxl.h"
16 #include "hw/mem/memory-device.h"
17 #include "hw/mem/pc-dimm.h"
18 #include "hw/pci/pci.h"
19 #include "hw/qdev-properties.h"
20 #include "hw/qdev-properties-system.h"
21 #include "qapi/error.h"
22 #include "qemu/log.h"
23 #include "qemu/module.h"
24 #include "qemu/pmem.h"
25 #include "qemu/range.h"
26 #include "qemu/rcu.h"
27 #include "qemu/guest-random.h"
28 #include "sysemu/hostmem.h"
29 #include "sysemu/numa.h"
30 #include "hw/cxl/cxl.h"
31 #include "hw/pci/msix.h"
32 
33 #define DWORD_BYTE 4
34 #define CXL_CAPACITY_MULTIPLIER   (256 * MiB)
35 
36 /* Default CDAT entries for a memory region */
37 enum {
38     CT3_CDAT_DSMAS,
39     CT3_CDAT_DSLBIS0,
40     CT3_CDAT_DSLBIS1,
41     CT3_CDAT_DSLBIS2,
42     CT3_CDAT_DSLBIS3,
43     CT3_CDAT_DSEMTS,
44     CT3_CDAT_NUM_ENTRIES
45 };
46 
ct3_build_cdat_entries_for_mr(CDATSubHeader ** cdat_table,int dsmad_handle,uint64_t size,bool is_pmem,bool is_dynamic,uint64_t dpa_base)47 static void ct3_build_cdat_entries_for_mr(CDATSubHeader **cdat_table,
48                                           int dsmad_handle, uint64_t size,
49                                           bool is_pmem, bool is_dynamic,
50                                           uint64_t dpa_base)
51 {
52     CDATDsmas *dsmas;
53     CDATDslbis *dslbis0;
54     CDATDslbis *dslbis1;
55     CDATDslbis *dslbis2;
56     CDATDslbis *dslbis3;
57     CDATDsemts *dsemts;
58 
59     dsmas = g_malloc(sizeof(*dsmas));
60     *dsmas = (CDATDsmas) {
61         .header = {
62             .type = CDAT_TYPE_DSMAS,
63             .length = sizeof(*dsmas),
64         },
65         .DSMADhandle = dsmad_handle,
66         .flags = (is_pmem ? CDAT_DSMAS_FLAG_NV : 0) |
67                  (is_dynamic ? CDAT_DSMAS_FLAG_DYNAMIC_CAP : 0),
68         .DPA_base = dpa_base,
69         .DPA_length = size,
70     };
71 
72     /* For now, no memory side cache, plausiblish numbers */
73     dslbis0 = g_malloc(sizeof(*dslbis0));
74     *dslbis0 = (CDATDslbis) {
75         .header = {
76             .type = CDAT_TYPE_DSLBIS,
77             .length = sizeof(*dslbis0),
78         },
79         .handle = dsmad_handle,
80         .flags = HMAT_LB_MEM_MEMORY,
81         .data_type = HMAT_LB_DATA_READ_LATENCY,
82         .entry_base_unit = 10000, /* 10ns base */
83         .entry[0] = 15, /* 150ns */
84     };
85 
86     dslbis1 = g_malloc(sizeof(*dslbis1));
87     *dslbis1 = (CDATDslbis) {
88         .header = {
89             .type = CDAT_TYPE_DSLBIS,
90             .length = sizeof(*dslbis1),
91         },
92         .handle = dsmad_handle,
93         .flags = HMAT_LB_MEM_MEMORY,
94         .data_type = HMAT_LB_DATA_WRITE_LATENCY,
95         .entry_base_unit = 10000,
96         .entry[0] = 25, /* 250ns */
97     };
98 
99     dslbis2 = g_malloc(sizeof(*dslbis2));
100     *dslbis2 = (CDATDslbis) {
101         .header = {
102             .type = CDAT_TYPE_DSLBIS,
103             .length = sizeof(*dslbis2),
104         },
105         .handle = dsmad_handle,
106         .flags = HMAT_LB_MEM_MEMORY,
107         .data_type = HMAT_LB_DATA_READ_BANDWIDTH,
108         .entry_base_unit = 1000, /* GB/s */
109         .entry[0] = 16,
110     };
111 
112     dslbis3 = g_malloc(sizeof(*dslbis3));
113     *dslbis3 = (CDATDslbis) {
114         .header = {
115             .type = CDAT_TYPE_DSLBIS,
116             .length = sizeof(*dslbis3),
117         },
118         .handle = dsmad_handle,
119         .flags = HMAT_LB_MEM_MEMORY,
120         .data_type = HMAT_LB_DATA_WRITE_BANDWIDTH,
121         .entry_base_unit = 1000, /* GB/s */
122         .entry[0] = 16,
123     };
124 
125     dsemts = g_malloc(sizeof(*dsemts));
126     *dsemts = (CDATDsemts) {
127         .header = {
128             .type = CDAT_TYPE_DSEMTS,
129             .length = sizeof(*dsemts),
130         },
131         .DSMAS_handle = dsmad_handle,
132         /*
133          * NV: Reserved - the non volatile from DSMAS matters
134          * V: EFI_MEMORY_SP
135          */
136         .EFI_memory_type_attr = is_pmem ? 2 : 1,
137         .DPA_offset = 0,
138         .DPA_length = size,
139     };
140 
141     /* Header always at start of structure */
142     cdat_table[CT3_CDAT_DSMAS] = (CDATSubHeader *)dsmas;
143     cdat_table[CT3_CDAT_DSLBIS0] = (CDATSubHeader *)dslbis0;
144     cdat_table[CT3_CDAT_DSLBIS1] = (CDATSubHeader *)dslbis1;
145     cdat_table[CT3_CDAT_DSLBIS2] = (CDATSubHeader *)dslbis2;
146     cdat_table[CT3_CDAT_DSLBIS3] = (CDATSubHeader *)dslbis3;
147     cdat_table[CT3_CDAT_DSEMTS] = (CDATSubHeader *)dsemts;
148 }
149 
ct3_build_cdat_table(CDATSubHeader *** cdat_table,void * priv)150 static int ct3_build_cdat_table(CDATSubHeader ***cdat_table, void *priv)
151 {
152     g_autofree CDATSubHeader **table = NULL;
153     CXLType3Dev *ct3d = priv;
154     MemoryRegion *volatile_mr = NULL, *nonvolatile_mr = NULL;
155     MemoryRegion *dc_mr = NULL;
156     uint64_t vmr_size = 0, pmr_size = 0;
157     int dsmad_handle = 0;
158     int cur_ent = 0;
159     int len = 0;
160 
161     if (!ct3d->hostpmem && !ct3d->hostvmem && !ct3d->dc.num_regions) {
162         return 0;
163     }
164 
165     if (ct3d->hostvmem) {
166         volatile_mr = host_memory_backend_get_memory(ct3d->hostvmem);
167         if (!volatile_mr) {
168             return -EINVAL;
169         }
170         len += CT3_CDAT_NUM_ENTRIES;
171         vmr_size = memory_region_size(volatile_mr);
172     }
173 
174     if (ct3d->hostpmem) {
175         nonvolatile_mr = host_memory_backend_get_memory(ct3d->hostpmem);
176         if (!nonvolatile_mr) {
177             return -EINVAL;
178         }
179         len += CT3_CDAT_NUM_ENTRIES;
180         pmr_size = memory_region_size(nonvolatile_mr);
181     }
182 
183     if (ct3d->dc.num_regions) {
184         if (!ct3d->dc.host_dc) {
185             return -EINVAL;
186         }
187         dc_mr = host_memory_backend_get_memory(ct3d->dc.host_dc);
188         if (!dc_mr) {
189             return -EINVAL;
190         }
191         len += CT3_CDAT_NUM_ENTRIES * ct3d->dc.num_regions;
192     }
193 
194     table = g_malloc0(len * sizeof(*table));
195 
196     /* Now fill them in */
197     if (volatile_mr) {
198         ct3_build_cdat_entries_for_mr(table, dsmad_handle++, vmr_size,
199                                       false, false, 0);
200         cur_ent = CT3_CDAT_NUM_ENTRIES;
201     }
202 
203     if (nonvolatile_mr) {
204         uint64_t base = vmr_size;
205         ct3_build_cdat_entries_for_mr(&(table[cur_ent]), dsmad_handle++,
206                                       pmr_size, true, false, base);
207         cur_ent += CT3_CDAT_NUM_ENTRIES;
208     }
209 
210     if (dc_mr) {
211         int i;
212         uint64_t region_base = vmr_size + pmr_size;
213 
214         /*
215          * We assume the dynamic capacity to be volatile for now.
216          * Non-volatile dynamic capacity will be added if needed in the
217          * future.
218          */
219         for (i = 0; i < ct3d->dc.num_regions; i++) {
220             ct3_build_cdat_entries_for_mr(&(table[cur_ent]),
221                                           dsmad_handle++,
222                                           ct3d->dc.regions[i].len,
223                                           false, true, region_base);
224             ct3d->dc.regions[i].dsmadhandle = dsmad_handle - 1;
225 
226             cur_ent += CT3_CDAT_NUM_ENTRIES;
227             region_base += ct3d->dc.regions[i].len;
228         }
229     }
230 
231     assert(len == cur_ent);
232 
233     *cdat_table = g_steal_pointer(&table);
234 
235     return len;
236 }
237 
ct3_free_cdat_table(CDATSubHeader ** cdat_table,int num,void * priv)238 static void ct3_free_cdat_table(CDATSubHeader **cdat_table, int num, void *priv)
239 {
240     int i;
241 
242     for (i = 0; i < num; i++) {
243         g_free(cdat_table[i]);
244     }
245     g_free(cdat_table);
246 }
247 
cxl_doe_cdat_rsp(DOECap * doe_cap)248 static bool cxl_doe_cdat_rsp(DOECap *doe_cap)
249 {
250     CDATObject *cdat = &CXL_TYPE3(doe_cap->pdev)->cxl_cstate.cdat;
251     uint16_t ent;
252     void *base;
253     uint32_t len;
254     CDATReq *req = pcie_doe_get_write_mbox_ptr(doe_cap);
255     CDATRsp rsp;
256 
257     assert(cdat->entry_len);
258 
259     /* Discard if request length mismatched */
260     if (pcie_doe_get_obj_len(req) <
261         DIV_ROUND_UP(sizeof(CDATReq), DWORD_BYTE)) {
262         return false;
263     }
264 
265     ent = req->entry_handle;
266     base = cdat->entry[ent].base;
267     len = cdat->entry[ent].length;
268 
269     rsp = (CDATRsp) {
270         .header = {
271             .vendor_id = CXL_VENDOR_ID,
272             .data_obj_type = CXL_DOE_TABLE_ACCESS,
273             .reserved = 0x0,
274             .length = DIV_ROUND_UP((sizeof(rsp) + len), DWORD_BYTE),
275         },
276         .rsp_code = CXL_DOE_TAB_RSP,
277         .table_type = CXL_DOE_TAB_TYPE_CDAT,
278         .entry_handle = (ent < cdat->entry_len - 1) ?
279                         ent + 1 : CXL_DOE_TAB_ENT_MAX,
280     };
281 
282     memcpy(doe_cap->read_mbox, &rsp, sizeof(rsp));
283     memcpy(doe_cap->read_mbox + DIV_ROUND_UP(sizeof(rsp), DWORD_BYTE),
284            base, len);
285 
286     doe_cap->read_mbox_len += rsp.header.length;
287 
288     return true;
289 }
290 
ct3d_config_read(PCIDevice * pci_dev,uint32_t addr,int size)291 static uint32_t ct3d_config_read(PCIDevice *pci_dev, uint32_t addr, int size)
292 {
293     CXLType3Dev *ct3d = CXL_TYPE3(pci_dev);
294     uint32_t val;
295 
296     if (pcie_doe_read_config(&ct3d->doe_cdat, addr, size, &val)) {
297         return val;
298     }
299 
300     return pci_default_read_config(pci_dev, addr, size);
301 }
302 
ct3d_config_write(PCIDevice * pci_dev,uint32_t addr,uint32_t val,int size)303 static void ct3d_config_write(PCIDevice *pci_dev, uint32_t addr, uint32_t val,
304                               int size)
305 {
306     CXLType3Dev *ct3d = CXL_TYPE3(pci_dev);
307 
308     pcie_doe_write_config(&ct3d->doe_cdat, addr, val, size);
309     pci_default_write_config(pci_dev, addr, val, size);
310     pcie_aer_write_config(pci_dev, addr, val, size);
311 }
312 
313 /*
314  * Null value of all Fs suggested by IEEE RA guidelines for use of
315  * EU, OUI and CID
316  */
317 #define UI64_NULL ~(0ULL)
318 
build_dvsecs(CXLType3Dev * ct3d)319 static void build_dvsecs(CXLType3Dev *ct3d)
320 {
321     CXLComponentState *cxl_cstate = &ct3d->cxl_cstate;
322     uint8_t *dvsec;
323     uint32_t range1_size_hi, range1_size_lo,
324              range1_base_hi = 0, range1_base_lo = 0,
325              range2_size_hi = 0, range2_size_lo = 0,
326              range2_base_hi = 0, range2_base_lo = 0;
327 
328     /*
329      * Volatile memory is mapped as (0x0)
330      * Persistent memory is mapped at (volatile->size)
331      */
332     if (ct3d->hostvmem) {
333         range1_size_hi = ct3d->hostvmem->size >> 32;
334         range1_size_lo = (2 << 5) | (2 << 2) | 0x3 |
335                          (ct3d->hostvmem->size & 0xF0000000);
336         if (ct3d->hostpmem) {
337             range2_size_hi = ct3d->hostpmem->size >> 32;
338             range2_size_lo = (2 << 5) | (2 << 2) | 0x3 |
339                              (ct3d->hostpmem->size & 0xF0000000);
340         }
341     } else if (ct3d->hostpmem) {
342         range1_size_hi = ct3d->hostpmem->size >> 32;
343         range1_size_lo = (2 << 5) | (2 << 2) | 0x3 |
344                          (ct3d->hostpmem->size & 0xF0000000);
345     } else {
346         /*
347          * For DCD with no static memory, set memory active, memory class bits.
348          * No range is set.
349          */
350         range1_size_hi = 0;
351         range1_size_lo = (2 << 5) | (2 << 2) | 0x3;
352     }
353 
354     dvsec = (uint8_t *)&(CXLDVSECDevice){
355         .cap = 0x1e,
356         .ctrl = 0x2,
357         .status2 = 0x2,
358         .range1_size_hi = range1_size_hi,
359         .range1_size_lo = range1_size_lo,
360         .range1_base_hi = range1_base_hi,
361         .range1_base_lo = range1_base_lo,
362         .range2_size_hi = range2_size_hi,
363         .range2_size_lo = range2_size_lo,
364         .range2_base_hi = range2_base_hi,
365         .range2_base_lo = range2_base_lo,
366     };
367     cxl_component_create_dvsec(cxl_cstate, CXL2_TYPE3_DEVICE,
368                                PCIE_CXL_DEVICE_DVSEC_LENGTH,
369                                PCIE_CXL_DEVICE_DVSEC,
370                                PCIE_CXL31_DEVICE_DVSEC_REVID, dvsec);
371 
372     dvsec = (uint8_t *)&(CXLDVSECRegisterLocator){
373         .rsvd         = 0,
374         .reg0_base_lo = RBI_COMPONENT_REG | CXL_COMPONENT_REG_BAR_IDX,
375         .reg0_base_hi = 0,
376         .reg1_base_lo = RBI_CXL_DEVICE_REG | CXL_DEVICE_REG_BAR_IDX,
377         .reg1_base_hi = 0,
378     };
379     cxl_component_create_dvsec(cxl_cstate, CXL2_TYPE3_DEVICE,
380                                REG_LOC_DVSEC_LENGTH, REG_LOC_DVSEC,
381                                REG_LOC_DVSEC_REVID, dvsec);
382     dvsec = (uint8_t *)&(CXLDVSECDeviceGPF){
383         .phase2_duration = 0x603, /* 3 seconds */
384         .phase2_power = 0x33, /* 0x33 miliwatts */
385     };
386     cxl_component_create_dvsec(cxl_cstate, CXL2_TYPE3_DEVICE,
387                                GPF_DEVICE_DVSEC_LENGTH, GPF_DEVICE_DVSEC,
388                                GPF_DEVICE_DVSEC_REVID, dvsec);
389 
390     dvsec = (uint8_t *)&(CXLDVSECPortFlexBus){
391         .cap                     = 0x26, /* 68B, IO, Mem, non-MLD */
392         .ctrl                    = 0x02, /* IO always enabled */
393         .status                  = 0x26, /* same as capabilities */
394         .rcvd_mod_ts_data_phase1 = 0xef, /* WTF? */
395     };
396     cxl_component_create_dvsec(cxl_cstate, CXL2_TYPE3_DEVICE,
397                                PCIE_CXL3_FLEXBUS_PORT_DVSEC_LENGTH,
398                                PCIE_FLEXBUS_PORT_DVSEC,
399                                PCIE_CXL3_FLEXBUS_PORT_DVSEC_REVID, dvsec);
400 }
401 
hdm_decoder_commit(CXLType3Dev * ct3d,int which)402 static void hdm_decoder_commit(CXLType3Dev *ct3d, int which)
403 {
404     int hdm_inc = R_CXL_HDM_DECODER1_BASE_LO - R_CXL_HDM_DECODER0_BASE_LO;
405     ComponentRegisters *cregs = &ct3d->cxl_cstate.crb;
406     uint32_t *cache_mem = cregs->cache_mem_registers;
407     uint32_t ctrl;
408 
409     ctrl = ldl_le_p(cache_mem + R_CXL_HDM_DECODER0_CTRL + which * hdm_inc);
410     /* TODO: Sanity checks that the decoder is possible */
411     ctrl = FIELD_DP32(ctrl, CXL_HDM_DECODER0_CTRL, ERR, 0);
412     ctrl = FIELD_DP32(ctrl, CXL_HDM_DECODER0_CTRL, COMMITTED, 1);
413 
414     stl_le_p(cache_mem + R_CXL_HDM_DECODER0_CTRL + which * hdm_inc, ctrl);
415 }
416 
hdm_decoder_uncommit(CXLType3Dev * ct3d,int which)417 static void hdm_decoder_uncommit(CXLType3Dev *ct3d, int which)
418 {
419     int hdm_inc = R_CXL_HDM_DECODER1_BASE_LO - R_CXL_HDM_DECODER0_BASE_LO;
420     ComponentRegisters *cregs = &ct3d->cxl_cstate.crb;
421     uint32_t *cache_mem = cregs->cache_mem_registers;
422     uint32_t ctrl;
423 
424     ctrl = ldl_le_p(cache_mem + R_CXL_HDM_DECODER0_CTRL + which * hdm_inc);
425 
426     ctrl = FIELD_DP32(ctrl, CXL_HDM_DECODER0_CTRL, ERR, 0);
427     ctrl = FIELD_DP32(ctrl, CXL_HDM_DECODER0_CTRL, COMMITTED, 0);
428 
429     stl_le_p(cache_mem + R_CXL_HDM_DECODER0_CTRL + which * hdm_inc, ctrl);
430 }
431 
ct3d_qmp_uncor_err_to_cxl(CxlUncorErrorType qmp_err)432 static int ct3d_qmp_uncor_err_to_cxl(CxlUncorErrorType qmp_err)
433 {
434     switch (qmp_err) {
435     case CXL_UNCOR_ERROR_TYPE_CACHE_DATA_PARITY:
436         return CXL_RAS_UNC_ERR_CACHE_DATA_PARITY;
437     case CXL_UNCOR_ERROR_TYPE_CACHE_ADDRESS_PARITY:
438         return CXL_RAS_UNC_ERR_CACHE_ADDRESS_PARITY;
439     case CXL_UNCOR_ERROR_TYPE_CACHE_BE_PARITY:
440         return CXL_RAS_UNC_ERR_CACHE_BE_PARITY;
441     case CXL_UNCOR_ERROR_TYPE_CACHE_DATA_ECC:
442         return CXL_RAS_UNC_ERR_CACHE_DATA_ECC;
443     case CXL_UNCOR_ERROR_TYPE_MEM_DATA_PARITY:
444         return CXL_RAS_UNC_ERR_MEM_DATA_PARITY;
445     case CXL_UNCOR_ERROR_TYPE_MEM_ADDRESS_PARITY:
446         return CXL_RAS_UNC_ERR_MEM_ADDRESS_PARITY;
447     case CXL_UNCOR_ERROR_TYPE_MEM_BE_PARITY:
448         return CXL_RAS_UNC_ERR_MEM_BE_PARITY;
449     case CXL_UNCOR_ERROR_TYPE_MEM_DATA_ECC:
450         return CXL_RAS_UNC_ERR_MEM_DATA_ECC;
451     case CXL_UNCOR_ERROR_TYPE_REINIT_THRESHOLD:
452         return CXL_RAS_UNC_ERR_REINIT_THRESHOLD;
453     case CXL_UNCOR_ERROR_TYPE_RSVD_ENCODING:
454         return CXL_RAS_UNC_ERR_RSVD_ENCODING;
455     case CXL_UNCOR_ERROR_TYPE_POISON_RECEIVED:
456         return CXL_RAS_UNC_ERR_POISON_RECEIVED;
457     case CXL_UNCOR_ERROR_TYPE_RECEIVER_OVERFLOW:
458         return CXL_RAS_UNC_ERR_RECEIVER_OVERFLOW;
459     case CXL_UNCOR_ERROR_TYPE_INTERNAL:
460         return CXL_RAS_UNC_ERR_INTERNAL;
461     case CXL_UNCOR_ERROR_TYPE_CXL_IDE_TX:
462         return CXL_RAS_UNC_ERR_CXL_IDE_TX;
463     case CXL_UNCOR_ERROR_TYPE_CXL_IDE_RX:
464         return CXL_RAS_UNC_ERR_CXL_IDE_RX;
465     default:
466         return -EINVAL;
467     }
468 }
469 
ct3d_qmp_cor_err_to_cxl(CxlCorErrorType qmp_err)470 static int ct3d_qmp_cor_err_to_cxl(CxlCorErrorType qmp_err)
471 {
472     switch (qmp_err) {
473     case CXL_COR_ERROR_TYPE_CACHE_DATA_ECC:
474         return CXL_RAS_COR_ERR_CACHE_DATA_ECC;
475     case CXL_COR_ERROR_TYPE_MEM_DATA_ECC:
476         return CXL_RAS_COR_ERR_MEM_DATA_ECC;
477     case CXL_COR_ERROR_TYPE_CRC_THRESHOLD:
478         return CXL_RAS_COR_ERR_CRC_THRESHOLD;
479     case CXL_COR_ERROR_TYPE_RETRY_THRESHOLD:
480         return CXL_RAS_COR_ERR_RETRY_THRESHOLD;
481     case CXL_COR_ERROR_TYPE_CACHE_POISON_RECEIVED:
482         return CXL_RAS_COR_ERR_CACHE_POISON_RECEIVED;
483     case CXL_COR_ERROR_TYPE_MEM_POISON_RECEIVED:
484         return CXL_RAS_COR_ERR_MEM_POISON_RECEIVED;
485     case CXL_COR_ERROR_TYPE_PHYSICAL:
486         return CXL_RAS_COR_ERR_PHYSICAL;
487     default:
488         return -EINVAL;
489     }
490 }
491 
ct3d_reg_write(void * opaque,hwaddr offset,uint64_t value,unsigned size)492 static void ct3d_reg_write(void *opaque, hwaddr offset, uint64_t value,
493                            unsigned size)
494 {
495     CXLComponentState *cxl_cstate = opaque;
496     ComponentRegisters *cregs = &cxl_cstate->crb;
497     CXLType3Dev *ct3d = container_of(cxl_cstate, CXLType3Dev, cxl_cstate);
498     uint32_t *cache_mem = cregs->cache_mem_registers;
499     bool should_commit = false;
500     bool should_uncommit = false;
501     int which_hdm = -1;
502 
503     assert(size == 4);
504     g_assert(offset < CXL2_COMPONENT_CM_REGION_SIZE);
505 
506     switch (offset) {
507     case A_CXL_HDM_DECODER0_CTRL:
508         should_commit = FIELD_EX32(value, CXL_HDM_DECODER0_CTRL, COMMIT);
509         should_uncommit = !should_commit;
510         which_hdm = 0;
511         break;
512     case A_CXL_HDM_DECODER1_CTRL:
513         should_commit = FIELD_EX32(value, CXL_HDM_DECODER0_CTRL, COMMIT);
514         should_uncommit = !should_commit;
515         which_hdm = 1;
516         break;
517     case A_CXL_HDM_DECODER2_CTRL:
518         should_commit = FIELD_EX32(value, CXL_HDM_DECODER0_CTRL, COMMIT);
519         should_uncommit = !should_commit;
520         which_hdm = 2;
521         break;
522     case A_CXL_HDM_DECODER3_CTRL:
523         should_commit = FIELD_EX32(value, CXL_HDM_DECODER0_CTRL, COMMIT);
524         should_uncommit = !should_commit;
525         which_hdm = 3;
526         break;
527     case A_CXL_RAS_UNC_ERR_STATUS:
528     {
529         uint32_t capctrl = ldl_le_p(cache_mem + R_CXL_RAS_ERR_CAP_CTRL);
530         uint32_t fe = FIELD_EX32(capctrl, CXL_RAS_ERR_CAP_CTRL,
531                                  FIRST_ERROR_POINTER);
532         CXLError *cxl_err;
533         uint32_t unc_err;
534 
535         /*
536          * If single bit written that corresponds to the first error
537          * pointer being cleared, update the status and header log.
538          */
539         if (!QTAILQ_EMPTY(&ct3d->error_list)) {
540             if ((1 << fe) ^ value) {
541                 CXLError *cxl_next;
542                 /*
543                  * Software is using wrong flow for multiple header recording
544                  * Following behavior in PCIe r6.0 and assuming multiple
545                  * header support. Implementation defined choice to clear all
546                  * matching records if more than one bit set - which corresponds
547                  * closest to behavior of hardware not capable of multiple
548                  * header recording.
549                  */
550                 QTAILQ_FOREACH_SAFE(cxl_err, &ct3d->error_list, node,
551                                     cxl_next) {
552                     if ((1 << cxl_err->type) & value) {
553                         QTAILQ_REMOVE(&ct3d->error_list, cxl_err, node);
554                         g_free(cxl_err);
555                     }
556                 }
557             } else {
558                 /* Done with previous FE, so drop from list */
559                 cxl_err = QTAILQ_FIRST(&ct3d->error_list);
560                 QTAILQ_REMOVE(&ct3d->error_list, cxl_err, node);
561                 g_free(cxl_err);
562             }
563 
564             /*
565              * If there is another FE, then put that in place and update
566              * the header log
567              */
568             if (!QTAILQ_EMPTY(&ct3d->error_list)) {
569                 uint32_t *header_log = &cache_mem[R_CXL_RAS_ERR_HEADER0];
570                 int i;
571 
572                 cxl_err = QTAILQ_FIRST(&ct3d->error_list);
573                 for (i = 0; i < CXL_RAS_ERR_HEADER_NUM; i++) {
574                     stl_le_p(header_log + i, cxl_err->header[i]);
575                 }
576                 capctrl = FIELD_DP32(capctrl, CXL_RAS_ERR_CAP_CTRL,
577                                      FIRST_ERROR_POINTER, cxl_err->type);
578             } else {
579                 /*
580                  * If no more errors, then follow recommendation of PCI spec
581                  * r6.0 6.2.4.2 to set the first error pointer to a status
582                  * bit that will never be used.
583                  */
584                 capctrl = FIELD_DP32(capctrl, CXL_RAS_ERR_CAP_CTRL,
585                                      FIRST_ERROR_POINTER,
586                                      CXL_RAS_UNC_ERR_CXL_UNUSED);
587             }
588             stl_le_p((uint8_t *)cache_mem + A_CXL_RAS_ERR_CAP_CTRL, capctrl);
589         }
590         unc_err = 0;
591         QTAILQ_FOREACH(cxl_err, &ct3d->error_list, node) {
592             unc_err |= 1 << cxl_err->type;
593         }
594         stl_le_p((uint8_t *)cache_mem + offset, unc_err);
595 
596         return;
597     }
598     case A_CXL_RAS_COR_ERR_STATUS:
599     {
600         uint32_t rw1c = value;
601         uint32_t temp = ldl_le_p((uint8_t *)cache_mem + offset);
602         temp &= ~rw1c;
603         stl_le_p((uint8_t *)cache_mem + offset, temp);
604         return;
605     }
606     default:
607         break;
608     }
609 
610     stl_le_p((uint8_t *)cache_mem + offset, value);
611     if (should_commit) {
612         hdm_decoder_commit(ct3d, which_hdm);
613     } else if (should_uncommit) {
614         hdm_decoder_uncommit(ct3d, which_hdm);
615     }
616 }
617 
618 /*
619  * TODO: dc region configuration will be updated once host backend and address
620  * space support is added for DCD.
621  */
cxl_create_dc_regions(CXLType3Dev * ct3d,Error ** errp)622 static bool cxl_create_dc_regions(CXLType3Dev *ct3d, Error **errp)
623 {
624     int i;
625     uint64_t region_base = 0;
626     uint64_t region_len;
627     uint64_t decode_len;
628     uint64_t blk_size = 2 * MiB;
629     CXLDCRegion *region;
630     MemoryRegion *mr;
631     uint64_t dc_size;
632 
633     mr = host_memory_backend_get_memory(ct3d->dc.host_dc);
634     dc_size = memory_region_size(mr);
635     region_len = DIV_ROUND_UP(dc_size, ct3d->dc.num_regions);
636 
637     if (dc_size % (ct3d->dc.num_regions * CXL_CAPACITY_MULTIPLIER) != 0) {
638         error_setg(errp,
639                    "backend size is not multiple of region len: 0x%" PRIx64,
640                    region_len);
641         return false;
642     }
643     if (region_len % CXL_CAPACITY_MULTIPLIER != 0) {
644         error_setg(errp, "DC region size is unaligned to 0x%" PRIx64,
645                    CXL_CAPACITY_MULTIPLIER);
646         return false;
647     }
648     decode_len = region_len;
649 
650     if (ct3d->hostvmem) {
651         mr = host_memory_backend_get_memory(ct3d->hostvmem);
652         region_base += memory_region_size(mr);
653     }
654     if (ct3d->hostpmem) {
655         mr = host_memory_backend_get_memory(ct3d->hostpmem);
656         region_base += memory_region_size(mr);
657     }
658     if (region_base % CXL_CAPACITY_MULTIPLIER != 0) {
659         error_setg(errp, "DC region base not aligned to 0x%" PRIx64,
660                    CXL_CAPACITY_MULTIPLIER);
661         return false;
662     }
663 
664     for (i = 0, region = &ct3d->dc.regions[0];
665          i < ct3d->dc.num_regions;
666          i++, region++, region_base += region_len) {
667         *region = (CXLDCRegion) {
668             .base = region_base,
669             .decode_len = decode_len,
670             .len = region_len,
671             .block_size = blk_size,
672             /* dsmad_handle set when creating CDAT table entries */
673             .flags = 0,
674         };
675         ct3d->dc.total_capacity += region->len;
676         region->blk_bitmap = bitmap_new(region->len / region->block_size);
677     }
678     QTAILQ_INIT(&ct3d->dc.extents);
679     QTAILQ_INIT(&ct3d->dc.extents_pending);
680 
681     return true;
682 }
683 
cxl_destroy_dc_regions(CXLType3Dev * ct3d)684 static void cxl_destroy_dc_regions(CXLType3Dev *ct3d)
685 {
686     CXLDCExtent *ent, *ent_next;
687     CXLDCExtentGroup *group, *group_next;
688     int i;
689     CXLDCRegion *region;
690 
691     QTAILQ_FOREACH_SAFE(ent, &ct3d->dc.extents, node, ent_next) {
692         cxl_remove_extent_from_extent_list(&ct3d->dc.extents, ent);
693     }
694 
695     QTAILQ_FOREACH_SAFE(group, &ct3d->dc.extents_pending, node, group_next) {
696         QTAILQ_REMOVE(&ct3d->dc.extents_pending, group, node);
697         QTAILQ_FOREACH_SAFE(ent, &group->list, node, ent_next) {
698             cxl_remove_extent_from_extent_list(&group->list, ent);
699         }
700         g_free(group);
701     }
702 
703     for (i = 0; i < ct3d->dc.num_regions; i++) {
704         region = &ct3d->dc.regions[i];
705         g_free(region->blk_bitmap);
706     }
707 }
708 
cxl_setup_memory(CXLType3Dev * ct3d,Error ** errp)709 static bool cxl_setup_memory(CXLType3Dev *ct3d, Error **errp)
710 {
711     DeviceState *ds = DEVICE(ct3d);
712 
713     if (!ct3d->hostmem && !ct3d->hostvmem && !ct3d->hostpmem
714         && !ct3d->dc.num_regions) {
715         error_setg(errp, "at least one memdev property must be set");
716         return false;
717     } else if (ct3d->hostmem && ct3d->hostpmem) {
718         error_setg(errp, "[memdev] cannot be used with new "
719                          "[persistent-memdev] property");
720         return false;
721     } else if (ct3d->hostmem) {
722         /* Use of hostmem property implies pmem */
723         ct3d->hostpmem = ct3d->hostmem;
724         ct3d->hostmem = NULL;
725     }
726 
727     if (ct3d->hostpmem && !ct3d->lsa) {
728         error_setg(errp, "lsa property must be set for persistent devices");
729         return false;
730     }
731 
732     if (ct3d->hostvmem) {
733         MemoryRegion *vmr;
734         char *v_name;
735 
736         vmr = host_memory_backend_get_memory(ct3d->hostvmem);
737         if (!vmr) {
738             error_setg(errp, "volatile memdev must have backing device");
739             return false;
740         }
741         if (host_memory_backend_is_mapped(ct3d->hostvmem)) {
742             error_setg(errp, "memory backend %s can't be used multiple times.",
743                object_get_canonical_path_component(OBJECT(ct3d->hostvmem)));
744             return false;
745         }
746         memory_region_set_nonvolatile(vmr, false);
747         memory_region_set_enabled(vmr, true);
748         host_memory_backend_set_mapped(ct3d->hostvmem, true);
749         if (ds->id) {
750             v_name = g_strdup_printf("cxl-type3-dpa-vmem-space:%s", ds->id);
751         } else {
752             v_name = g_strdup("cxl-type3-dpa-vmem-space");
753         }
754         address_space_init(&ct3d->hostvmem_as, vmr, v_name);
755         ct3d->cxl_dstate.vmem_size = memory_region_size(vmr);
756         ct3d->cxl_dstate.static_mem_size += memory_region_size(vmr);
757         g_free(v_name);
758     }
759 
760     if (ct3d->hostpmem) {
761         MemoryRegion *pmr;
762         char *p_name;
763 
764         pmr = host_memory_backend_get_memory(ct3d->hostpmem);
765         if (!pmr) {
766             error_setg(errp, "persistent memdev must have backing device");
767             return false;
768         }
769         if (host_memory_backend_is_mapped(ct3d->hostpmem)) {
770             error_setg(errp, "memory backend %s can't be used multiple times.",
771                object_get_canonical_path_component(OBJECT(ct3d->hostpmem)));
772             return false;
773         }
774         memory_region_set_nonvolatile(pmr, true);
775         memory_region_set_enabled(pmr, true);
776         host_memory_backend_set_mapped(ct3d->hostpmem, true);
777         if (ds->id) {
778             p_name = g_strdup_printf("cxl-type3-dpa-pmem-space:%s", ds->id);
779         } else {
780             p_name = g_strdup("cxl-type3-dpa-pmem-space");
781         }
782         address_space_init(&ct3d->hostpmem_as, pmr, p_name);
783         ct3d->cxl_dstate.pmem_size = memory_region_size(pmr);
784         ct3d->cxl_dstate.static_mem_size += memory_region_size(pmr);
785         g_free(p_name);
786     }
787 
788     ct3d->dc.total_capacity = 0;
789     if (ct3d->dc.num_regions > 0) {
790         MemoryRegion *dc_mr;
791         char *dc_name;
792 
793         if (!ct3d->dc.host_dc) {
794             error_setg(errp, "dynamic capacity must have a backing device");
795             return false;
796         }
797 
798         dc_mr = host_memory_backend_get_memory(ct3d->dc.host_dc);
799         if (!dc_mr) {
800             error_setg(errp, "dynamic capacity must have a backing device");
801             return false;
802         }
803 
804         if (host_memory_backend_is_mapped(ct3d->dc.host_dc)) {
805             error_setg(errp, "memory backend %s can't be used multiple times.",
806                object_get_canonical_path_component(OBJECT(ct3d->dc.host_dc)));
807             return false;
808         }
809         /*
810          * Set DC regions as volatile for now, non-volatile support can
811          * be added in the future if needed.
812          */
813         memory_region_set_nonvolatile(dc_mr, false);
814         memory_region_set_enabled(dc_mr, true);
815         host_memory_backend_set_mapped(ct3d->dc.host_dc, true);
816         if (ds->id) {
817             dc_name = g_strdup_printf("cxl-dcd-dpa-dc-space:%s", ds->id);
818         } else {
819             dc_name = g_strdup("cxl-dcd-dpa-dc-space");
820         }
821         address_space_init(&ct3d->dc.host_dc_as, dc_mr, dc_name);
822         g_free(dc_name);
823 
824         if (!cxl_create_dc_regions(ct3d, errp)) {
825             error_append_hint(errp, "setup DC regions failed");
826             return false;
827         }
828     }
829 
830     return true;
831 }
832 
833 static DOEProtocol doe_cdat_prot[] = {
834     { CXL_VENDOR_ID, CXL_DOE_TABLE_ACCESS, cxl_doe_cdat_rsp },
835     { }
836 };
837 
ct3_realize(PCIDevice * pci_dev,Error ** errp)838 static void ct3_realize(PCIDevice *pci_dev, Error **errp)
839 {
840     ERRP_GUARD();
841     CXLType3Dev *ct3d = CXL_TYPE3(pci_dev);
842     CXLComponentState *cxl_cstate = &ct3d->cxl_cstate;
843     ComponentRegisters *regs = &cxl_cstate->crb;
844     MemoryRegion *mr = &regs->component_registers;
845     uint8_t *pci_conf = pci_dev->config;
846     unsigned short msix_num = 6;
847     int i, rc;
848     uint16_t count;
849 
850     QTAILQ_INIT(&ct3d->error_list);
851 
852     if (!cxl_setup_memory(ct3d, errp)) {
853         return;
854     }
855 
856     pci_config_set_prog_interface(pci_conf, 0x10);
857 
858     pcie_endpoint_cap_init(pci_dev, 0x80);
859     if (ct3d->sn != UI64_NULL) {
860         pcie_dev_ser_num_init(pci_dev, 0x100, ct3d->sn);
861         cxl_cstate->dvsec_offset = 0x100 + 0x0c;
862     } else {
863         cxl_cstate->dvsec_offset = 0x100;
864     }
865 
866     ct3d->cxl_cstate.pdev = pci_dev;
867     build_dvsecs(ct3d);
868 
869     regs->special_ops = g_new0(MemoryRegionOps, 1);
870     regs->special_ops->write = ct3d_reg_write;
871 
872     cxl_component_register_block_init(OBJECT(pci_dev), cxl_cstate,
873                                       TYPE_CXL_TYPE3);
874 
875     pci_register_bar(
876         pci_dev, CXL_COMPONENT_REG_BAR_IDX,
877         PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64, mr);
878 
879     cxl_device_register_block_init(OBJECT(pci_dev), &ct3d->cxl_dstate,
880                                    &ct3d->cci);
881     pci_register_bar(pci_dev, CXL_DEVICE_REG_BAR_IDX,
882                      PCI_BASE_ADDRESS_SPACE_MEMORY |
883                          PCI_BASE_ADDRESS_MEM_TYPE_64,
884                      &ct3d->cxl_dstate.device_registers);
885 
886     /* MSI(-X) Initialization */
887     rc = msix_init_exclusive_bar(pci_dev, msix_num, 4, NULL);
888     if (rc) {
889         goto err_address_space_free;
890     }
891     for (i = 0; i < msix_num; i++) {
892         msix_vector_use(pci_dev, i);
893     }
894 
895     /* DOE Initialization */
896     pcie_doe_init(pci_dev, &ct3d->doe_cdat, 0x190, doe_cdat_prot, true, 0);
897 
898     cxl_cstate->cdat.build_cdat_table = ct3_build_cdat_table;
899     cxl_cstate->cdat.free_cdat_table = ct3_free_cdat_table;
900     cxl_cstate->cdat.private = ct3d;
901     if (!cxl_doe_cdat_init(cxl_cstate, errp)) {
902         goto err_free_special_ops;
903     }
904 
905     pcie_cap_deverr_init(pci_dev);
906     /* Leave a bit of room for expansion */
907     rc = pcie_aer_init(pci_dev, PCI_ERR_VER, 0x200, PCI_ERR_SIZEOF, NULL);
908     if (rc) {
909         goto err_release_cdat;
910     }
911     cxl_event_init(&ct3d->cxl_dstate, 2);
912 
913     /* Set default value for patrol scrub attributes */
914     ct3d->patrol_scrub_attrs.scrub_cycle_cap =
915                            CXL_MEMDEV_PS_SCRUB_CYCLE_CHANGE_CAP_DEFAULT |
916                            CXL_MEMDEV_PS_SCRUB_REALTIME_REPORT_CAP_DEFAULT;
917     ct3d->patrol_scrub_attrs.scrub_cycle =
918                            CXL_MEMDEV_PS_CUR_SCRUB_CYCLE_DEFAULT |
919                            (CXL_MEMDEV_PS_MIN_SCRUB_CYCLE_DEFAULT << 8);
920     ct3d->patrol_scrub_attrs.scrub_flags = CXL_MEMDEV_PS_ENABLE_DEFAULT;
921 
922     /* Set default value for DDR5 ECS read attributes */
923     ct3d->ecs_attrs.ecs_log_cap = CXL_ECS_LOG_ENTRY_TYPE_DEFAULT;
924     for (count = 0; count < CXL_ECS_NUM_MEDIA_FRUS; count++) {
925         ct3d->ecs_attrs.fru_attrs[count].ecs_cap =
926                             CXL_ECS_REALTIME_REPORT_CAP_DEFAULT;
927         ct3d->ecs_attrs.fru_attrs[count].ecs_config =
928                             CXL_ECS_THRESHOLD_COUNT_DEFAULT |
929                             (CXL_ECS_MODE_DEFAULT << 3);
930         /* Reserved */
931         ct3d->ecs_attrs.fru_attrs[count].ecs_flags = 0;
932     }
933 
934     return;
935 
936 err_release_cdat:
937     cxl_doe_cdat_release(cxl_cstate);
938 err_free_special_ops:
939     g_free(regs->special_ops);
940 err_address_space_free:
941     if (ct3d->dc.host_dc) {
942         cxl_destroy_dc_regions(ct3d);
943         address_space_destroy(&ct3d->dc.host_dc_as);
944     }
945     if (ct3d->hostpmem) {
946         address_space_destroy(&ct3d->hostpmem_as);
947     }
948     if (ct3d->hostvmem) {
949         address_space_destroy(&ct3d->hostvmem_as);
950     }
951     return;
952 }
953 
ct3_exit(PCIDevice * pci_dev)954 static void ct3_exit(PCIDevice *pci_dev)
955 {
956     CXLType3Dev *ct3d = CXL_TYPE3(pci_dev);
957     CXLComponentState *cxl_cstate = &ct3d->cxl_cstate;
958     ComponentRegisters *regs = &cxl_cstate->crb;
959 
960     pcie_aer_exit(pci_dev);
961     cxl_doe_cdat_release(cxl_cstate);
962     g_free(regs->special_ops);
963     if (ct3d->dc.host_dc) {
964         cxl_destroy_dc_regions(ct3d);
965         address_space_destroy(&ct3d->dc.host_dc_as);
966     }
967     if (ct3d->hostpmem) {
968         address_space_destroy(&ct3d->hostpmem_as);
969     }
970     if (ct3d->hostvmem) {
971         address_space_destroy(&ct3d->hostvmem_as);
972     }
973 }
974 
975 /*
976  * Mark the DPA range [dpa, dap + len - 1] to be backed and accessible. This
977  * happens when a DC extent is added and accepted by the host.
978  */
ct3_set_region_block_backed(CXLType3Dev * ct3d,uint64_t dpa,uint64_t len)979 void ct3_set_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
980                                  uint64_t len)
981 {
982     CXLDCRegion *region;
983 
984     region = cxl_find_dc_region(ct3d, dpa, len);
985     if (!region) {
986         return;
987     }
988 
989     bitmap_set(region->blk_bitmap, (dpa - region->base) / region->block_size,
990                len / region->block_size);
991 }
992 
993 /*
994  * Check whether the DPA range [dpa, dpa + len - 1] is backed with DC extents.
995  * Used when validating read/write to dc regions
996  */
ct3_test_region_block_backed(CXLType3Dev * ct3d,uint64_t dpa,uint64_t len)997 bool ct3_test_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
998                                   uint64_t len)
999 {
1000     CXLDCRegion *region;
1001     uint64_t nbits;
1002     long nr;
1003 
1004     region = cxl_find_dc_region(ct3d, dpa, len);
1005     if (!region) {
1006         return false;
1007     }
1008 
1009     nr = (dpa - region->base) / region->block_size;
1010     nbits = DIV_ROUND_UP(len, region->block_size);
1011     /*
1012      * if bits between [dpa, dpa + len) are all 1s, meaning the DPA range is
1013      * backed with DC extents, return true; else return false.
1014      */
1015     return find_next_zero_bit(region->blk_bitmap, nr + nbits, nr) == nr + nbits;
1016 }
1017 
1018 /*
1019  * Mark the DPA range [dpa, dap + len - 1] to be unbacked and inaccessible.
1020  * This happens when a dc extent is released by the host.
1021  */
ct3_clear_region_block_backed(CXLType3Dev * ct3d,uint64_t dpa,uint64_t len)1022 void ct3_clear_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
1023                                    uint64_t len)
1024 {
1025     CXLDCRegion *region;
1026     uint64_t nbits;
1027     long nr;
1028 
1029     region = cxl_find_dc_region(ct3d, dpa, len);
1030     if (!region) {
1031         return;
1032     }
1033 
1034     nr = (dpa - region->base) / region->block_size;
1035     nbits = len / region->block_size;
1036     bitmap_clear(region->blk_bitmap, nr, nbits);
1037 }
1038 
cxl_type3_dpa(CXLType3Dev * ct3d,hwaddr host_addr,uint64_t * dpa)1039 static bool cxl_type3_dpa(CXLType3Dev *ct3d, hwaddr host_addr, uint64_t *dpa)
1040 {
1041     int hdm_inc = R_CXL_HDM_DECODER1_BASE_LO - R_CXL_HDM_DECODER0_BASE_LO;
1042     uint32_t *cache_mem = ct3d->cxl_cstate.crb.cache_mem_registers;
1043     unsigned int hdm_count;
1044     uint32_t cap;
1045     uint64_t dpa_base = 0;
1046     int i;
1047 
1048     cap = ldl_le_p(cache_mem + R_CXL_HDM_DECODER_CAPABILITY);
1049     hdm_count = cxl_decoder_count_dec(FIELD_EX32(cap,
1050                                                  CXL_HDM_DECODER_CAPABILITY,
1051                                                  DECODER_COUNT));
1052 
1053     for (i = 0; i < hdm_count; i++) {
1054         uint64_t decoder_base, decoder_size, hpa_offset, skip;
1055         uint32_t hdm_ctrl, low, high;
1056         int ig, iw;
1057 
1058         low = ldl_le_p(cache_mem + R_CXL_HDM_DECODER0_BASE_LO + i * hdm_inc);
1059         high = ldl_le_p(cache_mem + R_CXL_HDM_DECODER0_BASE_HI + i * hdm_inc);
1060         decoder_base = ((uint64_t)high << 32) | (low & 0xf0000000);
1061 
1062         low = ldl_le_p(cache_mem + R_CXL_HDM_DECODER0_SIZE_LO + i * hdm_inc);
1063         high = ldl_le_p(cache_mem + R_CXL_HDM_DECODER0_SIZE_HI + i * hdm_inc);
1064         decoder_size = ((uint64_t)high << 32) | (low & 0xf0000000);
1065 
1066         low = ldl_le_p(cache_mem + R_CXL_HDM_DECODER0_DPA_SKIP_LO +
1067                        i * hdm_inc);
1068         high = ldl_le_p(cache_mem + R_CXL_HDM_DECODER0_DPA_SKIP_HI +
1069                         i * hdm_inc);
1070         skip = ((uint64_t)high << 32) | (low & 0xf0000000);
1071         dpa_base += skip;
1072 
1073         hpa_offset = (uint64_t)host_addr - decoder_base;
1074 
1075         hdm_ctrl = ldl_le_p(cache_mem + R_CXL_HDM_DECODER0_CTRL + i * hdm_inc);
1076         iw = FIELD_EX32(hdm_ctrl, CXL_HDM_DECODER0_CTRL, IW);
1077         ig = FIELD_EX32(hdm_ctrl, CXL_HDM_DECODER0_CTRL, IG);
1078         if (!FIELD_EX32(hdm_ctrl, CXL_HDM_DECODER0_CTRL, COMMITTED)) {
1079             return false;
1080         }
1081         if (((uint64_t)host_addr < decoder_base) ||
1082             (hpa_offset >= decoder_size)) {
1083             int decoded_iw = cxl_interleave_ways_dec(iw, &error_fatal);
1084 
1085             if (decoded_iw == 0) {
1086                 return false;
1087             }
1088 
1089             dpa_base += decoder_size / decoded_iw;
1090             continue;
1091         }
1092 
1093         *dpa = dpa_base +
1094             ((MAKE_64BIT_MASK(0, 8 + ig) & hpa_offset) |
1095              ((MAKE_64BIT_MASK(8 + ig + iw, 64 - 8 - ig - iw) & hpa_offset)
1096               >> iw));
1097 
1098         return true;
1099     }
1100     return false;
1101 }
1102 
cxl_type3_hpa_to_as_and_dpa(CXLType3Dev * ct3d,hwaddr host_addr,unsigned int size,AddressSpace ** as,uint64_t * dpa_offset)1103 static int cxl_type3_hpa_to_as_and_dpa(CXLType3Dev *ct3d,
1104                                        hwaddr host_addr,
1105                                        unsigned int size,
1106                                        AddressSpace **as,
1107                                        uint64_t *dpa_offset)
1108 {
1109     MemoryRegion *vmr = NULL, *pmr = NULL, *dc_mr = NULL;
1110     uint64_t vmr_size = 0, pmr_size = 0, dc_size = 0;
1111 
1112     if (ct3d->hostvmem) {
1113         vmr = host_memory_backend_get_memory(ct3d->hostvmem);
1114         vmr_size = memory_region_size(vmr);
1115     }
1116     if (ct3d->hostpmem) {
1117         pmr = host_memory_backend_get_memory(ct3d->hostpmem);
1118         pmr_size = memory_region_size(pmr);
1119     }
1120     if (ct3d->dc.host_dc) {
1121         dc_mr = host_memory_backend_get_memory(ct3d->dc.host_dc);
1122         dc_size = memory_region_size(dc_mr);
1123     }
1124 
1125     if (!vmr && !pmr && !dc_mr) {
1126         return -ENODEV;
1127     }
1128 
1129     if (!cxl_type3_dpa(ct3d, host_addr, dpa_offset)) {
1130         return -EINVAL;
1131     }
1132 
1133     if (*dpa_offset >= vmr_size + pmr_size + dc_size) {
1134         return -EINVAL;
1135     }
1136 
1137     if (*dpa_offset < vmr_size) {
1138         *as = &ct3d->hostvmem_as;
1139     } else if (*dpa_offset < vmr_size + pmr_size) {
1140         *as = &ct3d->hostpmem_as;
1141         *dpa_offset -= vmr_size;
1142     } else {
1143         if (!ct3_test_region_block_backed(ct3d, *dpa_offset, size)) {
1144             return -ENODEV;
1145         }
1146 
1147         *as = &ct3d->dc.host_dc_as;
1148         *dpa_offset -= (vmr_size + pmr_size);
1149     }
1150 
1151     return 0;
1152 }
1153 
cxl_type3_read(PCIDevice * d,hwaddr host_addr,uint64_t * data,unsigned size,MemTxAttrs attrs)1154 MemTxResult cxl_type3_read(PCIDevice *d, hwaddr host_addr, uint64_t *data,
1155                            unsigned size, MemTxAttrs attrs)
1156 {
1157     CXLType3Dev *ct3d = CXL_TYPE3(d);
1158     uint64_t dpa_offset = 0;
1159     AddressSpace *as = NULL;
1160     int res;
1161 
1162     res = cxl_type3_hpa_to_as_and_dpa(ct3d, host_addr, size,
1163                                       &as, &dpa_offset);
1164     if (res) {
1165         return MEMTX_ERROR;
1166     }
1167 
1168     if (cxl_dev_media_disabled(&ct3d->cxl_dstate)) {
1169         qemu_guest_getrandom_nofail(data, size);
1170         return MEMTX_OK;
1171     }
1172 
1173     return address_space_read(as, dpa_offset, attrs, data, size);
1174 }
1175 
cxl_type3_write(PCIDevice * d,hwaddr host_addr,uint64_t data,unsigned size,MemTxAttrs attrs)1176 MemTxResult cxl_type3_write(PCIDevice *d, hwaddr host_addr, uint64_t data,
1177                             unsigned size, MemTxAttrs attrs)
1178 {
1179     CXLType3Dev *ct3d = CXL_TYPE3(d);
1180     uint64_t dpa_offset = 0;
1181     AddressSpace *as = NULL;
1182     int res;
1183 
1184     res = cxl_type3_hpa_to_as_and_dpa(ct3d, host_addr, size,
1185                                       &as, &dpa_offset);
1186     if (res) {
1187         return MEMTX_ERROR;
1188     }
1189 
1190     if (cxl_dev_media_disabled(&ct3d->cxl_dstate)) {
1191         return MEMTX_OK;
1192     }
1193 
1194     return address_space_write(as, dpa_offset, attrs, &data, size);
1195 }
1196 
ct3d_reset(DeviceState * dev)1197 static void ct3d_reset(DeviceState *dev)
1198 {
1199     CXLType3Dev *ct3d = CXL_TYPE3(dev);
1200     uint32_t *reg_state = ct3d->cxl_cstate.crb.cache_mem_registers;
1201     uint32_t *write_msk = ct3d->cxl_cstate.crb.cache_mem_regs_write_mask;
1202 
1203     pcie_cap_fill_link_ep_usp(PCI_DEVICE(dev), ct3d->width, ct3d->speed);
1204     cxl_component_register_init_common(reg_state, write_msk, CXL2_TYPE3_DEVICE);
1205     cxl_device_register_init_t3(ct3d);
1206 
1207     /*
1208      * Bring up an endpoint to target with MCTP over VDM.
1209      * This device is emulating an MLD with single LD for now.
1210      */
1211     cxl_initialize_t3_fm_owned_ld_mctpcci(&ct3d->vdm_fm_owned_ld_mctp_cci,
1212                                           DEVICE(ct3d), DEVICE(ct3d),
1213                                           512); /* Max payload made up */
1214     cxl_initialize_t3_ld_cci(&ct3d->ld0_cci, DEVICE(ct3d), DEVICE(ct3d),
1215                              512); /* Max payload made up */
1216 
1217 }
1218 
1219 static Property ct3_props[] = {
1220     DEFINE_PROP_LINK("memdev", CXLType3Dev, hostmem, TYPE_MEMORY_BACKEND,
1221                      HostMemoryBackend *), /* for backward compatibility */
1222     DEFINE_PROP_LINK("persistent-memdev", CXLType3Dev, hostpmem,
1223                      TYPE_MEMORY_BACKEND, HostMemoryBackend *),
1224     DEFINE_PROP_LINK("volatile-memdev", CXLType3Dev, hostvmem,
1225                      TYPE_MEMORY_BACKEND, HostMemoryBackend *),
1226     DEFINE_PROP_LINK("lsa", CXLType3Dev, lsa, TYPE_MEMORY_BACKEND,
1227                      HostMemoryBackend *),
1228     DEFINE_PROP_UINT64("sn", CXLType3Dev, sn, UI64_NULL),
1229     DEFINE_PROP_STRING("cdat", CXLType3Dev, cxl_cstate.cdat.filename),
1230     DEFINE_PROP_UINT8("num-dc-regions", CXLType3Dev, dc.num_regions, 0),
1231     DEFINE_PROP_LINK("volatile-dc-memdev", CXLType3Dev, dc.host_dc,
1232                      TYPE_MEMORY_BACKEND, HostMemoryBackend *),
1233     DEFINE_PROP_PCIE_LINK_SPEED("x-speed", CXLType3Dev,
1234                                 speed, PCIE_LINK_SPEED_32),
1235     DEFINE_PROP_PCIE_LINK_WIDTH("x-width", CXLType3Dev,
1236                                 width, PCIE_LINK_WIDTH_16),
1237     DEFINE_PROP_END_OF_LIST(),
1238 };
1239 
get_lsa_size(CXLType3Dev * ct3d)1240 static uint64_t get_lsa_size(CXLType3Dev *ct3d)
1241 {
1242     MemoryRegion *mr;
1243 
1244     if (!ct3d->lsa) {
1245         return 0;
1246     }
1247 
1248     mr = host_memory_backend_get_memory(ct3d->lsa);
1249     return memory_region_size(mr);
1250 }
1251 
validate_lsa_access(MemoryRegion * mr,uint64_t size,uint64_t offset)1252 static void validate_lsa_access(MemoryRegion *mr, uint64_t size,
1253                                 uint64_t offset)
1254 {
1255     assert(offset + size <= memory_region_size(mr));
1256     assert(offset + size > offset);
1257 }
1258 
get_lsa(CXLType3Dev * ct3d,void * buf,uint64_t size,uint64_t offset)1259 static uint64_t get_lsa(CXLType3Dev *ct3d, void *buf, uint64_t size,
1260                     uint64_t offset)
1261 {
1262     MemoryRegion *mr;
1263     void *lsa;
1264 
1265     if (!ct3d->lsa) {
1266         return 0;
1267     }
1268 
1269     mr = host_memory_backend_get_memory(ct3d->lsa);
1270     validate_lsa_access(mr, size, offset);
1271 
1272     lsa = memory_region_get_ram_ptr(mr) + offset;
1273     memcpy(buf, lsa, size);
1274 
1275     return size;
1276 }
1277 
set_lsa(CXLType3Dev * ct3d,const void * buf,uint64_t size,uint64_t offset)1278 static void set_lsa(CXLType3Dev *ct3d, const void *buf, uint64_t size,
1279                     uint64_t offset)
1280 {
1281     MemoryRegion *mr;
1282     void *lsa;
1283 
1284     if (!ct3d->lsa) {
1285         return;
1286     }
1287 
1288     mr = host_memory_backend_get_memory(ct3d->lsa);
1289     validate_lsa_access(mr, size, offset);
1290 
1291     lsa = memory_region_get_ram_ptr(mr) + offset;
1292     memcpy(lsa, buf, size);
1293     memory_region_set_dirty(mr, offset, size);
1294 
1295     /*
1296      * Just like the PMEM, if the guest is not allowed to exit gracefully, label
1297      * updates will get lost.
1298      */
1299 }
1300 
set_cacheline(CXLType3Dev * ct3d,uint64_t dpa_offset,uint8_t * data)1301 static bool set_cacheline(CXLType3Dev *ct3d, uint64_t dpa_offset, uint8_t *data)
1302 {
1303     MemoryRegion *vmr = NULL, *pmr = NULL, *dc_mr = NULL;
1304     AddressSpace *as;
1305     uint64_t vmr_size = 0, pmr_size = 0, dc_size = 0;
1306 
1307     if (ct3d->hostvmem) {
1308         vmr = host_memory_backend_get_memory(ct3d->hostvmem);
1309         vmr_size = memory_region_size(vmr);
1310     }
1311     if (ct3d->hostpmem) {
1312         pmr = host_memory_backend_get_memory(ct3d->hostpmem);
1313         pmr_size = memory_region_size(pmr);
1314     }
1315     if (ct3d->dc.host_dc) {
1316         dc_mr = host_memory_backend_get_memory(ct3d->dc.host_dc);
1317         dc_size = memory_region_size(dc_mr);
1318      }
1319 
1320     if (!vmr && !pmr && !dc_mr) {
1321         return false;
1322     }
1323 
1324     if (dpa_offset + CXL_CACHE_LINE_SIZE > vmr_size + pmr_size + dc_size) {
1325         return false;
1326     }
1327 
1328     if (dpa_offset < vmr_size) {
1329         as = &ct3d->hostvmem_as;
1330     } else if (dpa_offset < vmr_size + pmr_size) {
1331         as = &ct3d->hostpmem_as;
1332         dpa_offset -= vmr_size;
1333     } else {
1334         as = &ct3d->dc.host_dc_as;
1335         dpa_offset -= (vmr_size + pmr_size);
1336     }
1337 
1338     address_space_write(as, dpa_offset, MEMTXATTRS_UNSPECIFIED, data,
1339                         CXL_CACHE_LINE_SIZE);
1340     return true;
1341 }
1342 
cxl_set_poison_list_overflowed(CXLType3Dev * ct3d)1343 void cxl_set_poison_list_overflowed(CXLType3Dev *ct3d)
1344 {
1345         ct3d->poison_list_overflowed = true;
1346         ct3d->poison_list_overflow_ts =
1347             cxl_device_get_timestamp(&ct3d->cxl_dstate);
1348 }
1349 
cxl_clear_poison_list_overflowed(CXLType3Dev * ct3d)1350 void cxl_clear_poison_list_overflowed(CXLType3Dev *ct3d)
1351 {
1352     ct3d->poison_list_overflowed = false;
1353     ct3d->poison_list_overflow_ts = 0;
1354 }
1355 
qmp_cxl_inject_poison(const char * path,uint64_t start,uint64_t length,Error ** errp)1356 void qmp_cxl_inject_poison(const char *path, uint64_t start, uint64_t length,
1357                            Error **errp)
1358 {
1359     Object *obj = object_resolve_path(path, NULL);
1360     CXLType3Dev *ct3d;
1361     CXLPoison *p;
1362 
1363     if (length % 64) {
1364         error_setg(errp, "Poison injection must be in multiples of 64 bytes");
1365         return;
1366     }
1367     if (start % 64) {
1368         error_setg(errp, "Poison start address must be 64 byte aligned");
1369         return;
1370     }
1371     if (!obj) {
1372         error_setg(errp, "Unable to resolve path");
1373         return;
1374     }
1375     if (!object_dynamic_cast(obj, TYPE_CXL_TYPE3)) {
1376         error_setg(errp, "Path does not point to a CXL type 3 device");
1377         return;
1378     }
1379 
1380     ct3d = CXL_TYPE3(obj);
1381 
1382     QLIST_FOREACH(p, &ct3d->poison_list, node) {
1383         if ((start < p->start + p->length) && (start + length > p->start)) {
1384             error_setg(errp,
1385                        "Overlap with existing poisoned region not supported");
1386             return;
1387         }
1388     }
1389 
1390     p = g_new0(CXLPoison, 1);
1391     p->length = length;
1392     p->start = start;
1393     /* Different from injected via the mbox */
1394     p->type = CXL_POISON_TYPE_INTERNAL;
1395 
1396     if (ct3d->poison_list_cnt < CXL_POISON_LIST_LIMIT) {
1397         QLIST_INSERT_HEAD(&ct3d->poison_list, p, node);
1398         ct3d->poison_list_cnt++;
1399     } else {
1400         if (!ct3d->poison_list_overflowed) {
1401             cxl_set_poison_list_overflowed(ct3d);
1402         }
1403         QLIST_INSERT_HEAD(&ct3d->poison_list_bkp, p, node);
1404     }
1405 }
1406 
1407 /* For uncorrectable errors include support for multiple header recording */
qmp_cxl_inject_uncorrectable_errors(const char * path,CXLUncorErrorRecordList * errors,Error ** errp)1408 void qmp_cxl_inject_uncorrectable_errors(const char *path,
1409                                          CXLUncorErrorRecordList *errors,
1410                                          Error **errp)
1411 {
1412     Object *obj = object_resolve_path(path, NULL);
1413     static PCIEAERErr err = {};
1414     CXLType3Dev *ct3d;
1415     CXLError *cxl_err;
1416     uint32_t *reg_state;
1417     uint32_t unc_err;
1418     bool first;
1419 
1420     if (!obj) {
1421         error_setg(errp, "Unable to resolve path");
1422         return;
1423     }
1424 
1425     if (!object_dynamic_cast(obj, TYPE_CXL_TYPE3)) {
1426         error_setg(errp, "Path does not point to a CXL type 3 device");
1427         return;
1428     }
1429 
1430     err.status = PCI_ERR_UNC_INTN;
1431     err.source_id = pci_requester_id(PCI_DEVICE(obj));
1432     err.flags = 0;
1433 
1434     ct3d = CXL_TYPE3(obj);
1435 
1436     first = QTAILQ_EMPTY(&ct3d->error_list);
1437     reg_state = ct3d->cxl_cstate.crb.cache_mem_registers;
1438     while (errors) {
1439         uint32List *header = errors->value->header;
1440         uint8_t header_count = 0;
1441         int cxl_err_code;
1442 
1443         cxl_err_code = ct3d_qmp_uncor_err_to_cxl(errors->value->type);
1444         if (cxl_err_code < 0) {
1445             error_setg(errp, "Unknown error code");
1446             return;
1447         }
1448 
1449         /* If the error is masked, nothing to do here */
1450         if (!((1 << cxl_err_code) &
1451               ~ldl_le_p(reg_state + R_CXL_RAS_UNC_ERR_MASK))) {
1452             errors = errors->next;
1453             continue;
1454         }
1455 
1456         cxl_err = g_malloc0(sizeof(*cxl_err));
1457 
1458         cxl_err->type = cxl_err_code;
1459         while (header && header_count < 32) {
1460             cxl_err->header[header_count++] = header->value;
1461             header = header->next;
1462         }
1463         if (header_count > 32) {
1464             error_setg(errp, "Header must be 32 DWORD or less");
1465             return;
1466         }
1467         QTAILQ_INSERT_TAIL(&ct3d->error_list, cxl_err, node);
1468 
1469         errors = errors->next;
1470     }
1471 
1472     if (first && !QTAILQ_EMPTY(&ct3d->error_list)) {
1473         uint32_t *cache_mem = ct3d->cxl_cstate.crb.cache_mem_registers;
1474         uint32_t capctrl = ldl_le_p(cache_mem + R_CXL_RAS_ERR_CAP_CTRL);
1475         uint32_t *header_log = &cache_mem[R_CXL_RAS_ERR_HEADER0];
1476         int i;
1477 
1478         cxl_err = QTAILQ_FIRST(&ct3d->error_list);
1479         for (i = 0; i < CXL_RAS_ERR_HEADER_NUM; i++) {
1480             stl_le_p(header_log + i, cxl_err->header[i]);
1481         }
1482 
1483         capctrl = FIELD_DP32(capctrl, CXL_RAS_ERR_CAP_CTRL,
1484                              FIRST_ERROR_POINTER, cxl_err->type);
1485         stl_le_p(cache_mem + R_CXL_RAS_ERR_CAP_CTRL, capctrl);
1486     }
1487 
1488     unc_err = 0;
1489     QTAILQ_FOREACH(cxl_err, &ct3d->error_list, node) {
1490         unc_err |= (1 << cxl_err->type);
1491     }
1492     if (!unc_err) {
1493         return;
1494     }
1495 
1496     stl_le_p(reg_state + R_CXL_RAS_UNC_ERR_STATUS, unc_err);
1497     pcie_aer_inject_error(PCI_DEVICE(obj), &err);
1498 
1499     return;
1500 }
1501 
qmp_cxl_inject_correctable_error(const char * path,CxlCorErrorType type,Error ** errp)1502 void qmp_cxl_inject_correctable_error(const char *path, CxlCorErrorType type,
1503                                       Error **errp)
1504 {
1505     static PCIEAERErr err = {};
1506     Object *obj = object_resolve_path(path, NULL);
1507     CXLType3Dev *ct3d;
1508     uint32_t *reg_state;
1509     uint32_t cor_err;
1510     int cxl_err_type;
1511 
1512     if (!obj) {
1513         error_setg(errp, "Unable to resolve path");
1514         return;
1515     }
1516     if (!object_dynamic_cast(obj, TYPE_CXL_TYPE3)) {
1517         error_setg(errp, "Path does not point to a CXL type 3 device");
1518         return;
1519     }
1520 
1521     err.status = PCI_ERR_COR_INTERNAL;
1522     err.source_id = pci_requester_id(PCI_DEVICE(obj));
1523     err.flags = PCIE_AER_ERR_IS_CORRECTABLE;
1524 
1525     ct3d = CXL_TYPE3(obj);
1526     reg_state = ct3d->cxl_cstate.crb.cache_mem_registers;
1527     cor_err = ldl_le_p(reg_state + R_CXL_RAS_COR_ERR_STATUS);
1528 
1529     cxl_err_type = ct3d_qmp_cor_err_to_cxl(type);
1530     if (cxl_err_type < 0) {
1531         error_setg(errp, "Invalid COR error");
1532         return;
1533     }
1534     /* If the error is masked, nothting to do here */
1535     if (!((1 << cxl_err_type) &
1536           ~ldl_le_p(reg_state + R_CXL_RAS_COR_ERR_MASK))) {
1537         return;
1538     }
1539 
1540     cor_err |= (1 << cxl_err_type);
1541     stl_le_p(reg_state + R_CXL_RAS_COR_ERR_STATUS, cor_err);
1542 
1543     pcie_aer_inject_error(PCI_DEVICE(obj), &err);
1544 }
1545 
cxl_assign_event_header(CXLEventRecordHdr * hdr,const QemuUUID * uuid,uint32_t flags,uint8_t length,uint64_t timestamp)1546 static void cxl_assign_event_header(CXLEventRecordHdr *hdr,
1547                                     const QemuUUID *uuid, uint32_t flags,
1548                                     uint8_t length, uint64_t timestamp)
1549 {
1550     st24_le_p(&hdr->flags, flags);
1551     hdr->length = length;
1552     memcpy(&hdr->id, uuid, sizeof(hdr->id));
1553     stq_le_p(&hdr->timestamp, timestamp);
1554 }
1555 
1556 static const QemuUUID gen_media_uuid = {
1557     .data = UUID(0xfbcd0a77, 0xc260, 0x417f,
1558                  0x85, 0xa9, 0x08, 0x8b, 0x16, 0x21, 0xeb, 0xa6),
1559 };
1560 
1561 static const QemuUUID dram_uuid = {
1562     .data = UUID(0x601dcbb3, 0x9c06, 0x4eab, 0xb8, 0xaf,
1563                  0x4e, 0x9b, 0xfb, 0x5c, 0x96, 0x24),
1564 };
1565 
1566 static const QemuUUID memory_module_uuid = {
1567     .data = UUID(0xfe927475, 0xdd59, 0x4339, 0xa5, 0x86,
1568                  0x79, 0xba, 0xb1, 0x13, 0xb7, 0x74),
1569 };
1570 
1571 #define CXL_GMER_VALID_CHANNEL                          BIT(0)
1572 #define CXL_GMER_VALID_RANK                             BIT(1)
1573 #define CXL_GMER_VALID_DEVICE                           BIT(2)
1574 #define CXL_GMER_VALID_COMPONENT                        BIT(3)
1575 
ct3d_qmp_cxl_event_log_enc(CxlEventLog log)1576 static int ct3d_qmp_cxl_event_log_enc(CxlEventLog log)
1577 {
1578     switch (log) {
1579     case CXL_EVENT_LOG_INFORMATIONAL:
1580         return CXL_EVENT_TYPE_INFO;
1581     case CXL_EVENT_LOG_WARNING:
1582         return CXL_EVENT_TYPE_WARN;
1583     case CXL_EVENT_LOG_FAILURE:
1584         return CXL_EVENT_TYPE_FAIL;
1585     case CXL_EVENT_LOG_FATAL:
1586         return CXL_EVENT_TYPE_FATAL;
1587     default:
1588         return -EINVAL;
1589     }
1590 }
1591 /* Component ID is device specific.  Define this as a string. */
qmp_cxl_inject_general_media_event(const char * path,CxlEventLog log,uint8_t flags,uint64_t dpa,uint8_t descriptor,uint8_t type,uint8_t transaction_type,bool has_channel,uint8_t channel,bool has_rank,uint8_t rank,bool has_device,uint32_t device,const char * component_id,Error ** errp)1592 void qmp_cxl_inject_general_media_event(const char *path, CxlEventLog log,
1593                                         uint8_t flags, uint64_t dpa,
1594                                         uint8_t descriptor, uint8_t type,
1595                                         uint8_t transaction_type,
1596                                         bool has_channel, uint8_t channel,
1597                                         bool has_rank, uint8_t rank,
1598                                         bool has_device, uint32_t device,
1599                                         const char *component_id,
1600                                         Error **errp)
1601 {
1602     Object *obj = object_resolve_path(path, NULL);
1603     CXLEventGenMedia gem;
1604     CXLEventRecordHdr *hdr = &gem.hdr;
1605     CXLDeviceState *cxlds;
1606     CXLType3Dev *ct3d;
1607     uint16_t valid_flags = 0;
1608     uint8_t enc_log;
1609     int rc;
1610 
1611     if (!obj) {
1612         error_setg(errp, "Unable to resolve path");
1613         return;
1614     }
1615     if (!object_dynamic_cast(obj, TYPE_CXL_TYPE3)) {
1616         error_setg(errp, "Path does not point to a CXL type 3 device");
1617         return;
1618     }
1619     ct3d = CXL_TYPE3(obj);
1620     cxlds = &ct3d->cxl_dstate;
1621 
1622     rc = ct3d_qmp_cxl_event_log_enc(log);
1623     if (rc < 0) {
1624         error_setg(errp, "Unhandled error log type");
1625         return;
1626     }
1627     enc_log = rc;
1628 
1629     memset(&gem, 0, sizeof(gem));
1630     cxl_assign_event_header(hdr, &gen_media_uuid, flags, sizeof(gem),
1631                             cxl_device_get_timestamp(&ct3d->cxl_dstate));
1632 
1633     stq_le_p(&gem.phys_addr, dpa);
1634     gem.descriptor = descriptor;
1635     gem.type = type;
1636     gem.transaction_type = transaction_type;
1637 
1638     if (has_channel) {
1639         gem.channel = channel;
1640         valid_flags |= CXL_GMER_VALID_CHANNEL;
1641     }
1642 
1643     if (has_rank) {
1644         gem.rank = rank;
1645         valid_flags |= CXL_GMER_VALID_RANK;
1646     }
1647 
1648     if (has_device) {
1649         st24_le_p(gem.device, device);
1650         valid_flags |= CXL_GMER_VALID_DEVICE;
1651     }
1652 
1653     if (component_id) {
1654         strncpy((char *)gem.component_id, component_id,
1655                 sizeof(gem.component_id) - 1);
1656         valid_flags |= CXL_GMER_VALID_COMPONENT;
1657     }
1658 
1659     stw_le_p(&gem.validity_flags, valid_flags);
1660 
1661     if (cxl_event_insert(cxlds, enc_log, (CXLEventRecordRaw *)&gem)) {
1662         cxl_event_irq_assert(ct3d);
1663     }
1664 }
1665 
1666 #define CXL_DRAM_VALID_CHANNEL                          BIT(0)
1667 #define CXL_DRAM_VALID_RANK                             BIT(1)
1668 #define CXL_DRAM_VALID_NIBBLE_MASK                      BIT(2)
1669 #define CXL_DRAM_VALID_BANK_GROUP                       BIT(3)
1670 #define CXL_DRAM_VALID_BANK                             BIT(4)
1671 #define CXL_DRAM_VALID_ROW                              BIT(5)
1672 #define CXL_DRAM_VALID_COLUMN                           BIT(6)
1673 #define CXL_DRAM_VALID_CORRECTION_MASK                  BIT(7)
1674 
qmp_cxl_inject_dram_event(const char * path,CxlEventLog log,uint8_t flags,uint64_t dpa,uint8_t descriptor,uint8_t type,uint8_t transaction_type,bool has_channel,uint8_t channel,bool has_rank,uint8_t rank,bool has_nibble_mask,uint32_t nibble_mask,bool has_bank_group,uint8_t bank_group,bool has_bank,uint8_t bank,bool has_row,uint32_t row,bool has_column,uint16_t column,bool has_correction_mask,uint64List * correction_mask,Error ** errp)1675 void qmp_cxl_inject_dram_event(const char *path, CxlEventLog log, uint8_t flags,
1676                                uint64_t dpa, uint8_t descriptor,
1677                                uint8_t type, uint8_t transaction_type,
1678                                bool has_channel, uint8_t channel,
1679                                bool has_rank, uint8_t rank,
1680                                bool has_nibble_mask, uint32_t nibble_mask,
1681                                bool has_bank_group, uint8_t bank_group,
1682                                bool has_bank, uint8_t bank,
1683                                bool has_row, uint32_t row,
1684                                bool has_column, uint16_t column,
1685                                bool has_correction_mask,
1686                                uint64List *correction_mask,
1687                                Error **errp)
1688 {
1689     Object *obj = object_resolve_path(path, NULL);
1690     CXLEventDram dram;
1691     CXLEventRecordHdr *hdr = &dram.hdr;
1692     CXLDeviceState *cxlds;
1693     CXLType3Dev *ct3d;
1694     uint16_t valid_flags = 0;
1695     uint8_t enc_log;
1696     int rc;
1697 
1698     if (!obj) {
1699         error_setg(errp, "Unable to resolve path");
1700         return;
1701     }
1702     if (!object_dynamic_cast(obj, TYPE_CXL_TYPE3)) {
1703         error_setg(errp, "Path does not point to a CXL type 3 device");
1704         return;
1705     }
1706     ct3d = CXL_TYPE3(obj);
1707     cxlds = &ct3d->cxl_dstate;
1708 
1709     rc = ct3d_qmp_cxl_event_log_enc(log);
1710     if (rc < 0) {
1711         error_setg(errp, "Unhandled error log type");
1712         return;
1713     }
1714     enc_log = rc;
1715 
1716     memset(&dram, 0, sizeof(dram));
1717     cxl_assign_event_header(hdr, &dram_uuid, flags, sizeof(dram),
1718                             cxl_device_get_timestamp(&ct3d->cxl_dstate));
1719     stq_le_p(&dram.phys_addr, dpa);
1720     dram.descriptor = descriptor;
1721     dram.type = type;
1722     dram.transaction_type = transaction_type;
1723 
1724     if (has_channel) {
1725         dram.channel = channel;
1726         valid_flags |= CXL_DRAM_VALID_CHANNEL;
1727     }
1728 
1729     if (has_rank) {
1730         dram.rank = rank;
1731         valid_flags |= CXL_DRAM_VALID_RANK;
1732     }
1733 
1734     if (has_nibble_mask) {
1735         st24_le_p(dram.nibble_mask, nibble_mask);
1736         valid_flags |= CXL_DRAM_VALID_NIBBLE_MASK;
1737     }
1738 
1739     if (has_bank_group) {
1740         dram.bank_group = bank_group;
1741         valid_flags |= CXL_DRAM_VALID_BANK_GROUP;
1742     }
1743 
1744     if (has_bank) {
1745         dram.bank = bank;
1746         valid_flags |= CXL_DRAM_VALID_BANK;
1747     }
1748 
1749     if (has_row) {
1750         st24_le_p(dram.row, row);
1751         valid_flags |= CXL_DRAM_VALID_ROW;
1752     }
1753 
1754     if (has_column) {
1755         stw_le_p(&dram.column, column);
1756         valid_flags |= CXL_DRAM_VALID_COLUMN;
1757     }
1758 
1759     if (has_correction_mask) {
1760         int count = 0;
1761         while (correction_mask && count < 4) {
1762             stq_le_p(&dram.correction_mask[count],
1763                      correction_mask->value);
1764             count++;
1765             correction_mask = correction_mask->next;
1766         }
1767         valid_flags |= CXL_DRAM_VALID_CORRECTION_MASK;
1768     }
1769 
1770     stw_le_p(&dram.validity_flags, valid_flags);
1771 
1772     if (cxl_event_insert(cxlds, enc_log, (CXLEventRecordRaw *)&dram)) {
1773         cxl_event_irq_assert(ct3d);
1774     }
1775     return;
1776 }
1777 
qmp_cxl_inject_memory_module_event(const char * path,CxlEventLog log,uint8_t flags,uint8_t type,uint8_t health_status,uint8_t media_status,uint8_t additional_status,uint8_t life_used,int16_t temperature,uint32_t dirty_shutdown_count,uint32_t corrected_volatile_error_count,uint32_t corrected_persist_error_count,Error ** errp)1778 void qmp_cxl_inject_memory_module_event(const char *path, CxlEventLog log,
1779                                         uint8_t flags, uint8_t type,
1780                                         uint8_t health_status,
1781                                         uint8_t media_status,
1782                                         uint8_t additional_status,
1783                                         uint8_t life_used,
1784                                         int16_t temperature,
1785                                         uint32_t dirty_shutdown_count,
1786                                         uint32_t corrected_volatile_error_count,
1787                                         uint32_t corrected_persist_error_count,
1788                                         Error **errp)
1789 {
1790     Object *obj = object_resolve_path(path, NULL);
1791     CXLEventMemoryModule module;
1792     CXLEventRecordHdr *hdr = &module.hdr;
1793     CXLDeviceState *cxlds;
1794     CXLType3Dev *ct3d;
1795     uint8_t enc_log;
1796     int rc;
1797 
1798     if (!obj) {
1799         error_setg(errp, "Unable to resolve path");
1800         return;
1801     }
1802     if (!object_dynamic_cast(obj, TYPE_CXL_TYPE3)) {
1803         error_setg(errp, "Path does not point to a CXL type 3 device");
1804         return;
1805     }
1806     ct3d = CXL_TYPE3(obj);
1807     cxlds = &ct3d->cxl_dstate;
1808 
1809     rc = ct3d_qmp_cxl_event_log_enc(log);
1810     if (rc < 0) {
1811         error_setg(errp, "Unhandled error log type");
1812         return;
1813     }
1814     enc_log = rc;
1815 
1816     memset(&module, 0, sizeof(module));
1817     cxl_assign_event_header(hdr, &memory_module_uuid, flags, sizeof(module),
1818                             cxl_device_get_timestamp(&ct3d->cxl_dstate));
1819 
1820     module.type = type;
1821     module.health_status = health_status;
1822     module.media_status = media_status;
1823     module.additional_status = additional_status;
1824     module.life_used = life_used;
1825     stw_le_p(&module.temperature, temperature);
1826     stl_le_p(&module.dirty_shutdown_count, dirty_shutdown_count);
1827     stl_le_p(&module.corrected_volatile_error_count,
1828              corrected_volatile_error_count);
1829     stl_le_p(&module.corrected_persistent_error_count,
1830              corrected_persist_error_count);
1831 
1832     if (cxl_event_insert(cxlds, enc_log, (CXLEventRecordRaw *)&module)) {
1833         cxl_event_irq_assert(ct3d);
1834     }
1835 }
1836 
1837 /* CXL r3.1 Table 8-50: Dynamic Capacity Event Record */
1838 static const QemuUUID dynamic_capacity_uuid = {
1839     .data = UUID(0xca95afa7, 0xf183, 0x4018, 0x8c, 0x2f,
1840                  0x95, 0x26, 0x8e, 0x10, 0x1a, 0x2a),
1841 };
1842 
1843 typedef enum CXLDCEventType {
1844     DC_EVENT_ADD_CAPACITY = 0x0,
1845     DC_EVENT_RELEASE_CAPACITY = 0x1,
1846     DC_EVENT_FORCED_RELEASE_CAPACITY = 0x2,
1847     DC_EVENT_REGION_CONFIG_UPDATED = 0x3,
1848     DC_EVENT_ADD_CAPACITY_RSP = 0x4,
1849     DC_EVENT_CAPACITY_RELEASED = 0x5,
1850 } CXLDCEventType;
1851 
1852 /*
1853  * Check whether the range [dpa, dpa + len - 1] has overlaps with extents in
1854  * the list.
1855  * Return value: return true if has overlaps; otherwise, return false
1856  */
cxl_extents_overlaps_dpa_range(CXLDCExtentList * list,uint64_t dpa,uint64_t len)1857 static bool cxl_extents_overlaps_dpa_range(CXLDCExtentList *list,
1858                                            uint64_t dpa, uint64_t len)
1859 {
1860     CXLDCExtent *ent;
1861     Range range1, range2;
1862 
1863     if (!list) {
1864         return false;
1865     }
1866 
1867     range_init_nofail(&range1, dpa, len);
1868     QTAILQ_FOREACH(ent, list, node) {
1869         range_init_nofail(&range2, ent->start_dpa, ent->len);
1870         if (range_overlaps_range(&range1, &range2)) {
1871             return true;
1872         }
1873     }
1874     return false;
1875 }
1876 
1877 /*
1878  * Check whether the range [dpa, dpa + len - 1] is contained by extents in
1879  * the list.
1880  * Will check multiple extents containment once superset release is added.
1881  * Return value: return true if range is contained; otherwise, return false
1882  */
cxl_extents_contains_dpa_range(CXLDCExtentList * list,uint64_t dpa,uint64_t len)1883 bool cxl_extents_contains_dpa_range(CXLDCExtentList *list,
1884                                     uint64_t dpa, uint64_t len)
1885 {
1886     CXLDCExtent *ent;
1887     Range range1, range2;
1888 
1889     if (!list) {
1890         return false;
1891     }
1892 
1893     range_init_nofail(&range1, dpa, len);
1894     QTAILQ_FOREACH(ent, list, node) {
1895         range_init_nofail(&range2, ent->start_dpa, ent->len);
1896         if (range_contains_range(&range2, &range1)) {
1897             return true;
1898         }
1899     }
1900     return false;
1901 }
1902 
cxl_extent_groups_overlaps_dpa_range(CXLDCExtentGroupList * list,uint64_t dpa,uint64_t len)1903 static bool cxl_extent_groups_overlaps_dpa_range(CXLDCExtentGroupList *list,
1904                                                  uint64_t dpa, uint64_t len)
1905 {
1906     CXLDCExtentGroup *group;
1907 
1908     if (!list) {
1909         return false;
1910     }
1911 
1912     QTAILQ_FOREACH(group, list, node) {
1913         if (cxl_extents_overlaps_dpa_range(&group->list, dpa, len)) {
1914             return true;
1915         }
1916     }
1917     return false;
1918 }
1919 
1920 /*
1921  * The main function to process dynamic capacity event with extent list.
1922  * Currently DC extents add/release requests are processed.
1923  */
qmp_cxl_process_dynamic_capacity_prescriptive(const char * path,uint16_t hid,CXLDCEventType type,uint8_t rid,CxlDynamicCapacityExtentList * records,Error ** errp)1924 static void qmp_cxl_process_dynamic_capacity_prescriptive(const char *path,
1925         uint16_t hid, CXLDCEventType type, uint8_t rid,
1926         CxlDynamicCapacityExtentList *records, Error **errp)
1927 {
1928     Object *obj;
1929     CXLEventDynamicCapacity dCap = {};
1930     CXLEventRecordHdr *hdr = &dCap.hdr;
1931     CXLType3Dev *dcd;
1932     uint8_t flags = 1 << CXL_EVENT_TYPE_INFO;
1933     uint32_t num_extents = 0;
1934     CxlDynamicCapacityExtentList *list;
1935     CXLDCExtentGroup *group = NULL;
1936     g_autofree CXLDCExtentRaw *extents = NULL;
1937     uint8_t enc_log = CXL_EVENT_TYPE_DYNAMIC_CAP;
1938     uint64_t dpa, offset, len, block_size;
1939     g_autofree unsigned long *blk_bitmap = NULL;
1940     int i;
1941 
1942     obj = object_resolve_path_type(path, TYPE_CXL_TYPE3, NULL);
1943     if (!obj) {
1944         error_setg(errp, "Unable to resolve CXL type 3 device");
1945         return;
1946     }
1947 
1948     dcd = CXL_TYPE3(obj);
1949     if (!dcd->dc.num_regions) {
1950         error_setg(errp, "No dynamic capacity support from the device");
1951         return;
1952     }
1953 
1954 
1955     if (rid >= dcd->dc.num_regions) {
1956         error_setg(errp, "region id is too large");
1957         return;
1958     }
1959     block_size = dcd->dc.regions[rid].block_size;
1960     blk_bitmap = bitmap_new(dcd->dc.regions[rid].len / block_size);
1961 
1962     /* Sanity check and count the extents */
1963     list = records;
1964     while (list) {
1965         offset = list->value->offset;
1966         len = list->value->len;
1967         dpa = offset + dcd->dc.regions[rid].base;
1968 
1969         if (len == 0) {
1970             error_setg(errp, "extent with 0 length is not allowed");
1971             return;
1972         }
1973 
1974         if (offset % block_size || len % block_size) {
1975             error_setg(errp, "dpa or len is not aligned to region block size");
1976             return;
1977         }
1978 
1979         if (offset + len > dcd->dc.regions[rid].len) {
1980             error_setg(errp, "extent range is beyond the region end");
1981             return;
1982         }
1983 
1984         /* No duplicate or overlapped extents are allowed */
1985         if (test_any_bits_set(blk_bitmap, offset / block_size,
1986                               len / block_size)) {
1987             error_setg(errp, "duplicate or overlapped extents are detected");
1988             return;
1989         }
1990         bitmap_set(blk_bitmap, offset / block_size, len / block_size);
1991 
1992         if (type == DC_EVENT_RELEASE_CAPACITY) {
1993             if (cxl_extent_groups_overlaps_dpa_range(&dcd->dc.extents_pending,
1994                                                      dpa, len)) {
1995                 error_setg(errp,
1996                            "cannot release extent with pending DPA range");
1997                 return;
1998             }
1999             if (!ct3_test_region_block_backed(dcd, dpa, len)) {
2000                 error_setg(errp,
2001                            "cannot release extent with non-existing DPA range");
2002                 return;
2003             }
2004         } else if (type == DC_EVENT_ADD_CAPACITY) {
2005             if (cxl_extents_overlaps_dpa_range(&dcd->dc.extents, dpa, len)) {
2006                 error_setg(errp,
2007                            "cannot add DPA already accessible to the same LD");
2008                 return;
2009             }
2010             if (cxl_extent_groups_overlaps_dpa_range(&dcd->dc.extents_pending,
2011                                                      dpa, len)) {
2012                 error_setg(errp,
2013                            "cannot add DPA again while still pending");
2014                 return;
2015             }
2016         }
2017         list = list->next;
2018         num_extents++;
2019     }
2020 
2021     /* Create extent list for event being passed to host */
2022     i = 0;
2023     list = records;
2024     extents = g_new0(CXLDCExtentRaw, num_extents);
2025     while (list) {
2026         offset = list->value->offset;
2027         len = list->value->len;
2028         dpa = dcd->dc.regions[rid].base + offset;
2029 
2030         extents[i].start_dpa = dpa;
2031         extents[i].len = len;
2032         memset(extents[i].tag, 0, 0x10);
2033         extents[i].shared_seq = 0;
2034         if (type == DC_EVENT_ADD_CAPACITY) {
2035             group = cxl_insert_extent_to_extent_group(group,
2036                                                       extents[i].start_dpa,
2037                                                       extents[i].len,
2038                                                       extents[i].tag,
2039                                                       extents[i].shared_seq);
2040         }
2041 
2042         list = list->next;
2043         i++;
2044     }
2045     if (group) {
2046         cxl_extent_group_list_insert_tail(&dcd->dc.extents_pending, group);
2047     }
2048 
2049     /*
2050      * CXL r3.1 section 8.2.9.2.1.6: Dynamic Capacity Event Record
2051      *
2052      * All Dynamic Capacity event records shall set the Event Record Severity
2053      * field in the Common Event Record Format to Informational Event. All
2054      * Dynamic Capacity related events shall be logged in the Dynamic Capacity
2055      * Event Log.
2056      */
2057     cxl_assign_event_header(hdr, &dynamic_capacity_uuid, flags, sizeof(dCap),
2058                             cxl_device_get_timestamp(&dcd->cxl_dstate));
2059 
2060     dCap.type = type;
2061     /* FIXME: for now, validity flag is cleared */
2062     dCap.validity_flags = 0;
2063     stw_le_p(&dCap.host_id, hid);
2064     /* only valid for DC_REGION_CONFIG_UPDATED event */
2065     dCap.updated_region_id = 0;
2066     for (i = 0; i < num_extents; i++) {
2067         memcpy(&dCap.dynamic_capacity_extent, &extents[i],
2068                sizeof(CXLDCExtentRaw));
2069 
2070         dCap.flags = 0;
2071         if (i < num_extents - 1) {
2072             /* Set "More" flag */
2073             dCap.flags |= BIT(0);
2074         }
2075 
2076         if (cxl_event_insert(&dcd->cxl_dstate, enc_log,
2077                              (CXLEventRecordRaw *)&dCap)) {
2078             cxl_event_irq_assert(dcd);
2079         }
2080     }
2081 }
2082 
qmp_cxl_add_dynamic_capacity(const char * path,uint16_t host_id,CxlExtentSelectionPolicy sel_policy,uint8_t region,const char * tag,CxlDynamicCapacityExtentList * extents,Error ** errp)2083 void qmp_cxl_add_dynamic_capacity(const char *path, uint16_t host_id,
2084                                   CxlExtentSelectionPolicy sel_policy,
2085                                   uint8_t region, const char *tag,
2086                                   CxlDynamicCapacityExtentList  *extents,
2087                                   Error **errp)
2088 {
2089     switch (sel_policy) {
2090     case CXL_EXTENT_SELECTION_POLICY_PRESCRIPTIVE:
2091         qmp_cxl_process_dynamic_capacity_prescriptive(path, host_id,
2092                                                       DC_EVENT_ADD_CAPACITY,
2093                                                       region, extents, errp);
2094         return;
2095     default:
2096         error_setg(errp, "Selection policy not supported");
2097         return;
2098     }
2099 }
2100 
qmp_cxl_release_dynamic_capacity(const char * path,uint16_t host_id,CxlExtentRemovalPolicy removal_policy,bool has_forced_removal,bool forced_removal,bool has_sanitize_on_release,bool sanitize_on_release,uint8_t region,const char * tag,CxlDynamicCapacityExtentList * extents,Error ** errp)2101 void qmp_cxl_release_dynamic_capacity(const char *path, uint16_t host_id,
2102                                       CxlExtentRemovalPolicy removal_policy,
2103                                       bool has_forced_removal,
2104                                       bool forced_removal,
2105                                       bool has_sanitize_on_release,
2106                                       bool sanitize_on_release,
2107                                       uint8_t region,
2108                                       const char *tag,
2109                                       CxlDynamicCapacityExtentList  *extents,
2110                                       Error **errp)
2111 {
2112     CXLDCEventType type = DC_EVENT_RELEASE_CAPACITY;
2113 
2114     if (has_forced_removal && forced_removal) {
2115         /* TODO: enable forced removal in the future */
2116         type = DC_EVENT_FORCED_RELEASE_CAPACITY;
2117         error_setg(errp, "Forced removal not supported yet");
2118         return;
2119     }
2120 
2121     switch (removal_policy) {
2122     case CXL_EXTENT_REMOVAL_POLICY_PRESCRIPTIVE:
2123         qmp_cxl_process_dynamic_capacity_prescriptive(path, host_id, type,
2124                                                       region, extents, errp);
2125         return;
2126     default:
2127         error_setg(errp, "Removal policy not supported");
2128         return;
2129     }
2130 }
2131 
ct3_class_init(ObjectClass * oc,void * data)2132 static void ct3_class_init(ObjectClass *oc, void *data)
2133 {
2134     DeviceClass *dc = DEVICE_CLASS(oc);
2135     PCIDeviceClass *pc = PCI_DEVICE_CLASS(oc);
2136     CXLType3Class *cvc = CXL_TYPE3_CLASS(oc);
2137 
2138     pc->realize = ct3_realize;
2139     pc->exit = ct3_exit;
2140     pc->class_id = PCI_CLASS_MEMORY_CXL;
2141     pc->vendor_id = PCI_VENDOR_ID_INTEL;
2142     pc->device_id = 0xd93; /* LVF for now */
2143     pc->revision = 1;
2144 
2145     pc->config_write = ct3d_config_write;
2146     pc->config_read = ct3d_config_read;
2147 
2148     set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
2149     dc->desc = "CXL Memory Device (Type 3)";
2150     device_class_set_legacy_reset(dc, ct3d_reset);
2151     device_class_set_props(dc, ct3_props);
2152 
2153     cvc->get_lsa_size = get_lsa_size;
2154     cvc->get_lsa = get_lsa;
2155     cvc->set_lsa = set_lsa;
2156     cvc->set_cacheline = set_cacheline;
2157 }
2158 
2159 static const TypeInfo ct3d_info = {
2160     .name = TYPE_CXL_TYPE3,
2161     .parent = TYPE_PCI_DEVICE,
2162     .class_size = sizeof(struct CXLType3Class),
2163     .class_init = ct3_class_init,
2164     .instance_size = sizeof(CXLType3Dev),
2165     .interfaces = (InterfaceInfo[]) {
2166         { INTERFACE_CXL_DEVICE },
2167         { INTERFACE_PCIE_DEVICE },
2168         {}
2169     },
2170 };
2171 
ct3d_registers(void)2172 static void ct3d_registers(void)
2173 {
2174     type_register_static(&ct3d_info);
2175 }
2176 
2177 type_init(ct3d_registers);
2178