1 /*
2 * CXL Type 3 (memory expander) device
3 *
4 * Copyright(C) 2020 Intel Corporation.
5 *
6 * This work is licensed under the terms of the GNU GPL, version 2. See the
7 * COPYING file in the top-level directory.
8 *
9 * SPDX-License-Identifier: GPL-v2-only
10 */
11 #include <math.h>
12
13 #include "qemu/osdep.h"
14 #include "qemu/units.h"
15 #include "qemu/error-report.h"
16 #include "qapi/qapi-commands-cxl.h"
17 #include "hw/mem/memory-device.h"
18 #include "hw/mem/pc-dimm.h"
19 #include "hw/pci/pci.h"
20 #include "hw/qdev-properties.h"
21 #include "hw/qdev-properties-system.h"
22 #include "qapi/error.h"
23 #include "qemu/log.h"
24 #include "qemu/module.h"
25 #include "qemu/pmem.h"
26 #include "qemu/range.h"
27 #include "qemu/rcu.h"
28 #include "qemu/guest-random.h"
29 #include "system/hostmem.h"
30 #include "system/numa.h"
31 #include "hw/cxl/cxl.h"
32 #include "hw/pci/msix.h"
33
34 /* type3 device private */
35 enum CXL_T3_MSIX_VECTOR {
36 CXL_T3_MSIX_PCIE_DOE_TABLE_ACCESS = 0,
37 CXL_T3_MSIX_EVENT_START = 2,
38 CXL_T3_MSIX_MBOX = CXL_T3_MSIX_EVENT_START + CXL_EVENT_TYPE_MAX,
39 CXL_T3_MSIX_VECTOR_NR
40 };
41
42 #define DWORD_BYTE 4
43 #define CXL_CAPACITY_MULTIPLIER (256 * MiB)
44
45 /* Default CDAT entries for a memory region */
46 enum {
47 CT3_CDAT_DSMAS,
48 CT3_CDAT_DSLBIS0,
49 CT3_CDAT_DSLBIS1,
50 CT3_CDAT_DSLBIS2,
51 CT3_CDAT_DSLBIS3,
52 CT3_CDAT_DSEMTS,
53 CT3_CDAT_NUM_ENTRIES
54 };
55
ct3_build_cdat_entries_for_mr(CDATSubHeader ** cdat_table,int dsmad_handle,uint64_t size,bool is_pmem,bool is_dynamic,uint64_t dpa_base)56 static void ct3_build_cdat_entries_for_mr(CDATSubHeader **cdat_table,
57 int dsmad_handle, uint64_t size,
58 bool is_pmem, bool is_dynamic,
59 uint64_t dpa_base)
60 {
61 CDATDsmas *dsmas;
62 CDATDslbis *dslbis0;
63 CDATDslbis *dslbis1;
64 CDATDslbis *dslbis2;
65 CDATDslbis *dslbis3;
66 CDATDsemts *dsemts;
67
68 dsmas = g_malloc(sizeof(*dsmas));
69 *dsmas = (CDATDsmas) {
70 .header = {
71 .type = CDAT_TYPE_DSMAS,
72 .length = sizeof(*dsmas),
73 },
74 .DSMADhandle = dsmad_handle,
75 .flags = (is_pmem ? CDAT_DSMAS_FLAG_NV : 0) |
76 (is_dynamic ? CDAT_DSMAS_FLAG_DYNAMIC_CAP : 0),
77 .DPA_base = dpa_base,
78 .DPA_length = size,
79 };
80
81 /* For now, no memory side cache, plausiblish numbers */
82 dslbis0 = g_malloc(sizeof(*dslbis0));
83 *dslbis0 = (CDATDslbis) {
84 .header = {
85 .type = CDAT_TYPE_DSLBIS,
86 .length = sizeof(*dslbis0),
87 },
88 .handle = dsmad_handle,
89 .flags = HMAT_LB_MEM_MEMORY,
90 .data_type = HMAT_LB_DATA_READ_LATENCY,
91 .entry_base_unit = 10000, /* 10ns base */
92 .entry[0] = 15, /* 150ns */
93 };
94
95 dslbis1 = g_malloc(sizeof(*dslbis1));
96 *dslbis1 = (CDATDslbis) {
97 .header = {
98 .type = CDAT_TYPE_DSLBIS,
99 .length = sizeof(*dslbis1),
100 },
101 .handle = dsmad_handle,
102 .flags = HMAT_LB_MEM_MEMORY,
103 .data_type = HMAT_LB_DATA_WRITE_LATENCY,
104 .entry_base_unit = 10000,
105 .entry[0] = 25, /* 250ns */
106 };
107
108 dslbis2 = g_malloc(sizeof(*dslbis2));
109 *dslbis2 = (CDATDslbis) {
110 .header = {
111 .type = CDAT_TYPE_DSLBIS,
112 .length = sizeof(*dslbis2),
113 },
114 .handle = dsmad_handle,
115 .flags = HMAT_LB_MEM_MEMORY,
116 .data_type = HMAT_LB_DATA_READ_BANDWIDTH,
117 .entry_base_unit = 1000, /* GB/s */
118 .entry[0] = 16,
119 };
120
121 dslbis3 = g_malloc(sizeof(*dslbis3));
122 *dslbis3 = (CDATDslbis) {
123 .header = {
124 .type = CDAT_TYPE_DSLBIS,
125 .length = sizeof(*dslbis3),
126 },
127 .handle = dsmad_handle,
128 .flags = HMAT_LB_MEM_MEMORY,
129 .data_type = HMAT_LB_DATA_WRITE_BANDWIDTH,
130 .entry_base_unit = 1000, /* GB/s */
131 .entry[0] = 16,
132 };
133
134 dsemts = g_malloc(sizeof(*dsemts));
135 *dsemts = (CDATDsemts) {
136 .header = {
137 .type = CDAT_TYPE_DSEMTS,
138 .length = sizeof(*dsemts),
139 },
140 .DSMAS_handle = dsmad_handle,
141 /*
142 * NV: Reserved - the non volatile from DSMAS matters
143 * V: EFI_MEMORY_SP
144 */
145 .EFI_memory_type_attr = is_pmem ? 2 : 1,
146 .DPA_offset = 0,
147 .DPA_length = size,
148 };
149
150 /* Header always at start of structure */
151 cdat_table[CT3_CDAT_DSMAS] = (CDATSubHeader *)dsmas;
152 cdat_table[CT3_CDAT_DSLBIS0] = (CDATSubHeader *)dslbis0;
153 cdat_table[CT3_CDAT_DSLBIS1] = (CDATSubHeader *)dslbis1;
154 cdat_table[CT3_CDAT_DSLBIS2] = (CDATSubHeader *)dslbis2;
155 cdat_table[CT3_CDAT_DSLBIS3] = (CDATSubHeader *)dslbis3;
156 cdat_table[CT3_CDAT_DSEMTS] = (CDATSubHeader *)dsemts;
157 }
158
ct3_build_cdat_table(CDATSubHeader *** cdat_table,void * priv)159 static int ct3_build_cdat_table(CDATSubHeader ***cdat_table, void *priv)
160 {
161 g_autofree CDATSubHeader **table = NULL;
162 CXLType3Dev *ct3d = priv;
163 MemoryRegion *volatile_mr = NULL, *nonvolatile_mr = NULL;
164 MemoryRegion *dc_mr = NULL;
165 uint64_t vmr_size = 0, pmr_size = 0;
166 int dsmad_handle = 0;
167 int cur_ent = 0;
168 int len = 0;
169
170 if (!ct3d->hostpmem && !ct3d->hostvmem && !ct3d->dc.num_regions) {
171 return 0;
172 }
173
174 if (ct3d->hostvmem) {
175 volatile_mr = host_memory_backend_get_memory(ct3d->hostvmem);
176 if (!volatile_mr) {
177 return -EINVAL;
178 }
179 len += CT3_CDAT_NUM_ENTRIES;
180 vmr_size = memory_region_size(volatile_mr);
181 }
182
183 if (ct3d->hostpmem) {
184 nonvolatile_mr = host_memory_backend_get_memory(ct3d->hostpmem);
185 if (!nonvolatile_mr) {
186 return -EINVAL;
187 }
188 len += CT3_CDAT_NUM_ENTRIES;
189 pmr_size = memory_region_size(nonvolatile_mr);
190 }
191
192 if (ct3d->dc.num_regions) {
193 if (!ct3d->dc.host_dc) {
194 return -EINVAL;
195 }
196 dc_mr = host_memory_backend_get_memory(ct3d->dc.host_dc);
197 if (!dc_mr) {
198 return -EINVAL;
199 }
200 len += CT3_CDAT_NUM_ENTRIES * ct3d->dc.num_regions;
201 }
202
203 table = g_malloc0(len * sizeof(*table));
204
205 /* Now fill them in */
206 if (volatile_mr) {
207 ct3_build_cdat_entries_for_mr(table, dsmad_handle++, vmr_size,
208 false, false, 0);
209 cur_ent = CT3_CDAT_NUM_ENTRIES;
210 }
211
212 if (nonvolatile_mr) {
213 uint64_t base = vmr_size;
214 ct3_build_cdat_entries_for_mr(&(table[cur_ent]), dsmad_handle++,
215 pmr_size, true, false, base);
216 cur_ent += CT3_CDAT_NUM_ENTRIES;
217 }
218
219 if (dc_mr) {
220 int i;
221 uint64_t region_base = vmr_size + pmr_size;
222
223 /*
224 * We assume the dynamic capacity to be volatile for now.
225 * Non-volatile dynamic capacity will be added if needed in the
226 * future.
227 */
228 for (i = 0; i < ct3d->dc.num_regions; i++) {
229 ct3d->dc.regions[i].nonvolatile = false;
230 ct3d->dc.regions[i].sharable = false;
231 ct3d->dc.regions[i].hw_managed_coherency = false;
232 ct3d->dc.regions[i].ic_specific_dc_management = false;
233 ct3d->dc.regions[i].rdonly = false;
234 ct3_build_cdat_entries_for_mr(&(table[cur_ent]),
235 dsmad_handle++,
236 ct3d->dc.regions[i].len,
237 ct3d->dc.regions[i].nonvolatile,
238 true, region_base);
239 ct3d->dc.regions[i].dsmadhandle = dsmad_handle - 1;
240
241 cur_ent += CT3_CDAT_NUM_ENTRIES;
242 region_base += ct3d->dc.regions[i].len;
243 }
244 }
245
246 assert(len == cur_ent);
247
248 *cdat_table = g_steal_pointer(&table);
249
250 return len;
251 }
252
ct3_free_cdat_table(CDATSubHeader ** cdat_table,int num,void * priv)253 static void ct3_free_cdat_table(CDATSubHeader **cdat_table, int num, void *priv)
254 {
255 int i;
256
257 for (i = 0; i < num; i++) {
258 g_free(cdat_table[i]);
259 }
260 g_free(cdat_table);
261 }
262
cxl_doe_cdat_rsp(DOECap * doe_cap)263 static bool cxl_doe_cdat_rsp(DOECap *doe_cap)
264 {
265 CDATObject *cdat = &CXL_TYPE3(doe_cap->pdev)->cxl_cstate.cdat;
266 uint16_t ent;
267 void *base;
268 uint32_t len;
269 CDATReq *req = pcie_doe_get_write_mbox_ptr(doe_cap);
270 CDATRsp rsp;
271
272 assert(cdat->entry_len);
273
274 /* Discard if request length mismatched */
275 if (pcie_doe_get_obj_len(req) <
276 DIV_ROUND_UP(sizeof(CDATReq), DWORD_BYTE)) {
277 return false;
278 }
279
280 ent = req->entry_handle;
281 base = cdat->entry[ent].base;
282 len = cdat->entry[ent].length;
283
284 rsp = (CDATRsp) {
285 .header = {
286 .vendor_id = CXL_VENDOR_ID,
287 .data_obj_type = CXL_DOE_TABLE_ACCESS,
288 .reserved = 0x0,
289 .length = DIV_ROUND_UP((sizeof(rsp) + len), DWORD_BYTE),
290 },
291 .rsp_code = CXL_DOE_TAB_RSP,
292 .table_type = CXL_DOE_TAB_TYPE_CDAT,
293 .entry_handle = (ent < cdat->entry_len - 1) ?
294 ent + 1 : CXL_DOE_TAB_ENT_MAX,
295 };
296
297 memcpy(doe_cap->read_mbox, &rsp, sizeof(rsp));
298 memcpy(doe_cap->read_mbox + DIV_ROUND_UP(sizeof(rsp), DWORD_BYTE),
299 base, len);
300
301 doe_cap->read_mbox_len += rsp.header.length;
302
303 return true;
304 }
305
ct3d_config_read(PCIDevice * pci_dev,uint32_t addr,int size)306 static uint32_t ct3d_config_read(PCIDevice *pci_dev, uint32_t addr, int size)
307 {
308 CXLType3Dev *ct3d = CXL_TYPE3(pci_dev);
309 uint32_t val;
310
311 if (pcie_doe_read_config(&ct3d->doe_cdat, addr, size, &val)) {
312 return val;
313 }
314
315 return pci_default_read_config(pci_dev, addr, size);
316 }
317
ct3d_config_write(PCIDevice * pci_dev,uint32_t addr,uint32_t val,int size)318 static void ct3d_config_write(PCIDevice *pci_dev, uint32_t addr, uint32_t val,
319 int size)
320 {
321 CXLType3Dev *ct3d = CXL_TYPE3(pci_dev);
322
323 pcie_doe_write_config(&ct3d->doe_cdat, addr, val, size);
324 pci_default_write_config(pci_dev, addr, val, size);
325 pcie_aer_write_config(pci_dev, addr, val, size);
326 }
327
328 /*
329 * Null value of all Fs suggested by IEEE RA guidelines for use of
330 * EU, OUI and CID
331 */
332 #define UI64_NULL ~(0ULL)
333
build_dvsecs(CXLType3Dev * ct3d)334 static void build_dvsecs(CXLType3Dev *ct3d)
335 {
336 CXLComponentState *cxl_cstate = &ct3d->cxl_cstate;
337 uint8_t *dvsec;
338 uint32_t range1_size_hi, range1_size_lo,
339 range1_base_hi = 0, range1_base_lo = 0,
340 range2_size_hi = 0, range2_size_lo = 0,
341 range2_base_hi = 0, range2_base_lo = 0;
342
343 /*
344 * Volatile memory is mapped as (0x0)
345 * Persistent memory is mapped at (volatile->size)
346 */
347 if (ct3d->hostvmem) {
348 range1_size_hi = ct3d->hostvmem->size >> 32;
349 range1_size_lo = (2 << 5) | (2 << 2) | 0x3 |
350 (ct3d->hostvmem->size & 0xF0000000);
351 if (ct3d->hostpmem) {
352 range2_size_hi = ct3d->hostpmem->size >> 32;
353 range2_size_lo = (2 << 5) | (2 << 2) | 0x3 |
354 (ct3d->hostpmem->size & 0xF0000000);
355 }
356 } else if (ct3d->hostpmem) {
357 range1_size_hi = ct3d->hostpmem->size >> 32;
358 range1_size_lo = (2 << 5) | (2 << 2) | 0x3 |
359 (ct3d->hostpmem->size & 0xF0000000);
360 } else {
361 /*
362 * For DCD with no static memory, set memory active, memory class bits.
363 * No range is set.
364 */
365 range1_size_hi = 0;
366 range1_size_lo = (2 << 5) | (2 << 2) | 0x3;
367 }
368
369 dvsec = (uint8_t *)&(CXLDVSECDevice){
370 .cap = 0x1e,
371 .ctrl = 0x2,
372 .status2 = 0x2,
373 .range1_size_hi = range1_size_hi,
374 .range1_size_lo = range1_size_lo,
375 .range1_base_hi = range1_base_hi,
376 .range1_base_lo = range1_base_lo,
377 .range2_size_hi = range2_size_hi,
378 .range2_size_lo = range2_size_lo,
379 .range2_base_hi = range2_base_hi,
380 .range2_base_lo = range2_base_lo,
381 };
382 cxl_component_create_dvsec(cxl_cstate, CXL2_TYPE3_DEVICE,
383 PCIE_CXL_DEVICE_DVSEC_LENGTH,
384 PCIE_CXL_DEVICE_DVSEC,
385 PCIE_CXL31_DEVICE_DVSEC_REVID, dvsec);
386
387 dvsec = (uint8_t *)&(CXLDVSECRegisterLocator){
388 .rsvd = 0,
389 .reg0_base_lo = RBI_COMPONENT_REG | CXL_COMPONENT_REG_BAR_IDX,
390 .reg0_base_hi = 0,
391 .reg1_base_lo = RBI_CXL_DEVICE_REG | CXL_DEVICE_REG_BAR_IDX,
392 .reg1_base_hi = 0,
393 };
394 cxl_component_create_dvsec(cxl_cstate, CXL2_TYPE3_DEVICE,
395 REG_LOC_DVSEC_LENGTH, REG_LOC_DVSEC,
396 REG_LOC_DVSEC_REVID, dvsec);
397 dvsec = (uint8_t *)&(CXLDVSECDeviceGPF){
398 .phase2_duration = 0x603, /* 3 seconds */
399 .phase2_power = 0x33, /* 0x33 miliwatts */
400 };
401 cxl_component_create_dvsec(cxl_cstate, CXL2_TYPE3_DEVICE,
402 GPF_DEVICE_DVSEC_LENGTH, GPF_DEVICE_DVSEC,
403 GPF_DEVICE_DVSEC_REVID, dvsec);
404
405 dvsec = (uint8_t *)&(CXLDVSECPortFlexBus){
406 .cap = 0x26, /* 68B, IO, Mem, non-MLD */
407 .ctrl = 0x02, /* IO always enabled */
408 .status = 0x26, /* same as capabilities */
409 .rcvd_mod_ts_data_phase1 = 0xef, /* WTF? */
410 };
411 cxl_component_create_dvsec(cxl_cstate, CXL2_TYPE3_DEVICE,
412 PCIE_CXL3_FLEXBUS_PORT_DVSEC_LENGTH,
413 PCIE_FLEXBUS_PORT_DVSEC,
414 PCIE_CXL3_FLEXBUS_PORT_DVSEC_REVID, dvsec);
415 }
416
hdm_decoder_commit(CXLType3Dev * ct3d,int which)417 static void hdm_decoder_commit(CXLType3Dev *ct3d, int which)
418 {
419 int hdm_inc = R_CXL_HDM_DECODER1_BASE_LO - R_CXL_HDM_DECODER0_BASE_LO;
420 ComponentRegisters *cregs = &ct3d->cxl_cstate.crb;
421 uint32_t *cache_mem = cregs->cache_mem_registers;
422 uint32_t ctrl;
423
424 ctrl = ldl_le_p(cache_mem + R_CXL_HDM_DECODER0_CTRL + which * hdm_inc);
425 /* TODO: Sanity checks that the decoder is possible */
426 ctrl = FIELD_DP32(ctrl, CXL_HDM_DECODER0_CTRL, ERR, 0);
427 ctrl = FIELD_DP32(ctrl, CXL_HDM_DECODER0_CTRL, COMMITTED, 1);
428
429 stl_le_p(cache_mem + R_CXL_HDM_DECODER0_CTRL + which * hdm_inc, ctrl);
430 }
431
hdm_decoder_uncommit(CXLType3Dev * ct3d,int which)432 static void hdm_decoder_uncommit(CXLType3Dev *ct3d, int which)
433 {
434 int hdm_inc = R_CXL_HDM_DECODER1_BASE_LO - R_CXL_HDM_DECODER0_BASE_LO;
435 ComponentRegisters *cregs = &ct3d->cxl_cstate.crb;
436 uint32_t *cache_mem = cregs->cache_mem_registers;
437 uint32_t ctrl;
438
439 ctrl = ldl_le_p(cache_mem + R_CXL_HDM_DECODER0_CTRL + which * hdm_inc);
440
441 ctrl = FIELD_DP32(ctrl, CXL_HDM_DECODER0_CTRL, ERR, 0);
442 ctrl = FIELD_DP32(ctrl, CXL_HDM_DECODER0_CTRL, COMMITTED, 0);
443
444 stl_le_p(cache_mem + R_CXL_HDM_DECODER0_CTRL + which * hdm_inc, ctrl);
445 }
446
ct3d_qmp_uncor_err_to_cxl(CxlUncorErrorType qmp_err)447 static int ct3d_qmp_uncor_err_to_cxl(CxlUncorErrorType qmp_err)
448 {
449 switch (qmp_err) {
450 case CXL_UNCOR_ERROR_TYPE_CACHE_DATA_PARITY:
451 return CXL_RAS_UNC_ERR_CACHE_DATA_PARITY;
452 case CXL_UNCOR_ERROR_TYPE_CACHE_ADDRESS_PARITY:
453 return CXL_RAS_UNC_ERR_CACHE_ADDRESS_PARITY;
454 case CXL_UNCOR_ERROR_TYPE_CACHE_BE_PARITY:
455 return CXL_RAS_UNC_ERR_CACHE_BE_PARITY;
456 case CXL_UNCOR_ERROR_TYPE_CACHE_DATA_ECC:
457 return CXL_RAS_UNC_ERR_CACHE_DATA_ECC;
458 case CXL_UNCOR_ERROR_TYPE_MEM_DATA_PARITY:
459 return CXL_RAS_UNC_ERR_MEM_DATA_PARITY;
460 case CXL_UNCOR_ERROR_TYPE_MEM_ADDRESS_PARITY:
461 return CXL_RAS_UNC_ERR_MEM_ADDRESS_PARITY;
462 case CXL_UNCOR_ERROR_TYPE_MEM_BE_PARITY:
463 return CXL_RAS_UNC_ERR_MEM_BE_PARITY;
464 case CXL_UNCOR_ERROR_TYPE_MEM_DATA_ECC:
465 return CXL_RAS_UNC_ERR_MEM_DATA_ECC;
466 case CXL_UNCOR_ERROR_TYPE_REINIT_THRESHOLD:
467 return CXL_RAS_UNC_ERR_REINIT_THRESHOLD;
468 case CXL_UNCOR_ERROR_TYPE_RSVD_ENCODING:
469 return CXL_RAS_UNC_ERR_RSVD_ENCODING;
470 case CXL_UNCOR_ERROR_TYPE_POISON_RECEIVED:
471 return CXL_RAS_UNC_ERR_POISON_RECEIVED;
472 case CXL_UNCOR_ERROR_TYPE_RECEIVER_OVERFLOW:
473 return CXL_RAS_UNC_ERR_RECEIVER_OVERFLOW;
474 case CXL_UNCOR_ERROR_TYPE_INTERNAL:
475 return CXL_RAS_UNC_ERR_INTERNAL;
476 case CXL_UNCOR_ERROR_TYPE_CXL_IDE_TX:
477 return CXL_RAS_UNC_ERR_CXL_IDE_TX;
478 case CXL_UNCOR_ERROR_TYPE_CXL_IDE_RX:
479 return CXL_RAS_UNC_ERR_CXL_IDE_RX;
480 default:
481 return -EINVAL;
482 }
483 }
484
ct3d_qmp_cor_err_to_cxl(CxlCorErrorType qmp_err)485 static int ct3d_qmp_cor_err_to_cxl(CxlCorErrorType qmp_err)
486 {
487 switch (qmp_err) {
488 case CXL_COR_ERROR_TYPE_CACHE_DATA_ECC:
489 return CXL_RAS_COR_ERR_CACHE_DATA_ECC;
490 case CXL_COR_ERROR_TYPE_MEM_DATA_ECC:
491 return CXL_RAS_COR_ERR_MEM_DATA_ECC;
492 case CXL_COR_ERROR_TYPE_CRC_THRESHOLD:
493 return CXL_RAS_COR_ERR_CRC_THRESHOLD;
494 case CXL_COR_ERROR_TYPE_RETRY_THRESHOLD:
495 return CXL_RAS_COR_ERR_RETRY_THRESHOLD;
496 case CXL_COR_ERROR_TYPE_CACHE_POISON_RECEIVED:
497 return CXL_RAS_COR_ERR_CACHE_POISON_RECEIVED;
498 case CXL_COR_ERROR_TYPE_MEM_POISON_RECEIVED:
499 return CXL_RAS_COR_ERR_MEM_POISON_RECEIVED;
500 case CXL_COR_ERROR_TYPE_PHYSICAL:
501 return CXL_RAS_COR_ERR_PHYSICAL;
502 default:
503 return -EINVAL;
504 }
505 }
506
ct3d_reg_write(void * opaque,hwaddr offset,uint64_t value,unsigned size)507 static void ct3d_reg_write(void *opaque, hwaddr offset, uint64_t value,
508 unsigned size)
509 {
510 CXLComponentState *cxl_cstate = opaque;
511 ComponentRegisters *cregs = &cxl_cstate->crb;
512 CXLType3Dev *ct3d = container_of(cxl_cstate, CXLType3Dev, cxl_cstate);
513 uint32_t *cache_mem = cregs->cache_mem_registers;
514 bool should_commit = false;
515 bool should_uncommit = false;
516 int which_hdm = -1;
517
518 assert(size == 4);
519 g_assert(offset < CXL2_COMPONENT_CM_REGION_SIZE);
520
521 switch (offset) {
522 case A_CXL_HDM_DECODER0_CTRL:
523 should_commit = FIELD_EX32(value, CXL_HDM_DECODER0_CTRL, COMMIT);
524 should_uncommit = !should_commit;
525 which_hdm = 0;
526 break;
527 case A_CXL_HDM_DECODER1_CTRL:
528 should_commit = FIELD_EX32(value, CXL_HDM_DECODER0_CTRL, COMMIT);
529 should_uncommit = !should_commit;
530 which_hdm = 1;
531 break;
532 case A_CXL_HDM_DECODER2_CTRL:
533 should_commit = FIELD_EX32(value, CXL_HDM_DECODER0_CTRL, COMMIT);
534 should_uncommit = !should_commit;
535 which_hdm = 2;
536 break;
537 case A_CXL_HDM_DECODER3_CTRL:
538 should_commit = FIELD_EX32(value, CXL_HDM_DECODER0_CTRL, COMMIT);
539 should_uncommit = !should_commit;
540 which_hdm = 3;
541 break;
542 case A_CXL_RAS_UNC_ERR_STATUS:
543 {
544 uint32_t capctrl = ldl_le_p(cache_mem + R_CXL_RAS_ERR_CAP_CTRL);
545 uint32_t fe = FIELD_EX32(capctrl, CXL_RAS_ERR_CAP_CTRL,
546 FIRST_ERROR_POINTER);
547 CXLError *cxl_err;
548 uint32_t unc_err;
549
550 /*
551 * If single bit written that corresponds to the first error
552 * pointer being cleared, update the status and header log.
553 */
554 if (!QTAILQ_EMPTY(&ct3d->error_list)) {
555 if ((1 << fe) ^ value) {
556 CXLError *cxl_next;
557 /*
558 * Software is using wrong flow for multiple header recording
559 * Following behavior in PCIe r6.0 and assuming multiple
560 * header support. Implementation defined choice to clear all
561 * matching records if more than one bit set - which corresponds
562 * closest to behavior of hardware not capable of multiple
563 * header recording.
564 */
565 QTAILQ_FOREACH_SAFE(cxl_err, &ct3d->error_list, node,
566 cxl_next) {
567 if ((1 << cxl_err->type) & value) {
568 QTAILQ_REMOVE(&ct3d->error_list, cxl_err, node);
569 g_free(cxl_err);
570 }
571 }
572 } else {
573 /* Done with previous FE, so drop from list */
574 cxl_err = QTAILQ_FIRST(&ct3d->error_list);
575 QTAILQ_REMOVE(&ct3d->error_list, cxl_err, node);
576 g_free(cxl_err);
577 }
578
579 /*
580 * If there is another FE, then put that in place and update
581 * the header log
582 */
583 if (!QTAILQ_EMPTY(&ct3d->error_list)) {
584 uint32_t *header_log = &cache_mem[R_CXL_RAS_ERR_HEADER0];
585 int i;
586
587 cxl_err = QTAILQ_FIRST(&ct3d->error_list);
588 for (i = 0; i < CXL_RAS_ERR_HEADER_NUM; i++) {
589 stl_le_p(header_log + i, cxl_err->header[i]);
590 }
591 capctrl = FIELD_DP32(capctrl, CXL_RAS_ERR_CAP_CTRL,
592 FIRST_ERROR_POINTER, cxl_err->type);
593 } else {
594 /*
595 * If no more errors, then follow recommendation of PCI spec
596 * r6.0 6.2.4.2 to set the first error pointer to a status
597 * bit that will never be used.
598 */
599 capctrl = FIELD_DP32(capctrl, CXL_RAS_ERR_CAP_CTRL,
600 FIRST_ERROR_POINTER,
601 CXL_RAS_UNC_ERR_CXL_UNUSED);
602 }
603 stl_le_p((uint8_t *)cache_mem + A_CXL_RAS_ERR_CAP_CTRL, capctrl);
604 }
605 unc_err = 0;
606 QTAILQ_FOREACH(cxl_err, &ct3d->error_list, node) {
607 unc_err |= 1 << cxl_err->type;
608 }
609 stl_le_p((uint8_t *)cache_mem + offset, unc_err);
610
611 return;
612 }
613 case A_CXL_RAS_COR_ERR_STATUS:
614 {
615 uint32_t rw1c = value;
616 uint32_t temp = ldl_le_p((uint8_t *)cache_mem + offset);
617 temp &= ~rw1c;
618 stl_le_p((uint8_t *)cache_mem + offset, temp);
619 return;
620 }
621 default:
622 break;
623 }
624
625 stl_le_p((uint8_t *)cache_mem + offset, value);
626 if (should_commit) {
627 hdm_decoder_commit(ct3d, which_hdm);
628 } else if (should_uncommit) {
629 hdm_decoder_uncommit(ct3d, which_hdm);
630 }
631 }
632
633 /*
634 * TODO: dc region configuration will be updated once host backend and address
635 * space support is added for DCD.
636 */
cxl_create_dc_regions(CXLType3Dev * ct3d,Error ** errp)637 static bool cxl_create_dc_regions(CXLType3Dev *ct3d, Error **errp)
638 {
639 int i;
640 uint64_t region_base = 0;
641 uint64_t region_len;
642 uint64_t decode_len;
643 uint64_t blk_size = 2 * MiB;
644 /* Only 1 block size is supported for now. */
645 uint64_t supported_blk_size_bitmask = blk_size;
646 CXLDCRegion *region;
647 MemoryRegion *mr;
648 uint64_t dc_size;
649
650 mr = host_memory_backend_get_memory(ct3d->dc.host_dc);
651 dc_size = memory_region_size(mr);
652 region_len = DIV_ROUND_UP(dc_size, ct3d->dc.num_regions);
653
654 if (dc_size % (ct3d->dc.num_regions * CXL_CAPACITY_MULTIPLIER) != 0) {
655 error_setg(errp,
656 "backend size is not multiple of region len: 0x%" PRIx64,
657 region_len);
658 return false;
659 }
660 if (region_len % CXL_CAPACITY_MULTIPLIER != 0) {
661 error_setg(errp, "DC region size is unaligned to 0x%" PRIx64,
662 CXL_CAPACITY_MULTIPLIER);
663 return false;
664 }
665 decode_len = region_len;
666
667 if (ct3d->hostvmem) {
668 mr = host_memory_backend_get_memory(ct3d->hostvmem);
669 region_base += memory_region_size(mr);
670 }
671 if (ct3d->hostpmem) {
672 mr = host_memory_backend_get_memory(ct3d->hostpmem);
673 region_base += memory_region_size(mr);
674 }
675 if (region_base % CXL_CAPACITY_MULTIPLIER != 0) {
676 error_setg(errp, "DC region base not aligned to 0x%" PRIx64,
677 CXL_CAPACITY_MULTIPLIER);
678 return false;
679 }
680
681 for (i = 0, region = &ct3d->dc.regions[0];
682 i < ct3d->dc.num_regions;
683 i++, region++, region_base += region_len) {
684 *region = (CXLDCRegion) {
685 .base = region_base,
686 .decode_len = decode_len,
687 .len = region_len,
688 .block_size = blk_size,
689 /* dsmad_handle set when creating CDAT table entries */
690 .flags = 0,
691 .supported_blk_size_bitmask = supported_blk_size_bitmask,
692 };
693 ct3d->dc.total_capacity += region->len;
694 region->blk_bitmap = bitmap_new(region->len / region->block_size);
695 qemu_mutex_init(®ion->bitmap_lock);
696 }
697 QTAILQ_INIT(&ct3d->dc.extents);
698 QTAILQ_INIT(&ct3d->dc.extents_pending);
699
700 return true;
701 }
702
cxl_destroy_dc_regions(CXLType3Dev * ct3d)703 static void cxl_destroy_dc_regions(CXLType3Dev *ct3d)
704 {
705 CXLDCExtent *ent, *ent_next;
706 CXLDCExtentGroup *group, *group_next;
707 int i;
708 CXLDCRegion *region;
709
710 QTAILQ_FOREACH_SAFE(ent, &ct3d->dc.extents, node, ent_next) {
711 cxl_remove_extent_from_extent_list(&ct3d->dc.extents, ent);
712 }
713
714 QTAILQ_FOREACH_SAFE(group, &ct3d->dc.extents_pending, node, group_next) {
715 QTAILQ_REMOVE(&ct3d->dc.extents_pending, group, node);
716 QTAILQ_FOREACH_SAFE(ent, &group->list, node, ent_next) {
717 cxl_remove_extent_from_extent_list(&group->list, ent);
718 }
719 g_free(group);
720 }
721
722 for (i = 0; i < ct3d->dc.num_regions; i++) {
723 region = &ct3d->dc.regions[i];
724 g_free(region->blk_bitmap);
725 }
726 }
727
cxl_setup_memory(CXLType3Dev * ct3d,Error ** errp)728 static bool cxl_setup_memory(CXLType3Dev *ct3d, Error **errp)
729 {
730 DeviceState *ds = DEVICE(ct3d);
731
732 if (!ct3d->hostmem && !ct3d->hostvmem && !ct3d->hostpmem
733 && !ct3d->dc.num_regions) {
734 error_setg(errp, "at least one memdev property must be set");
735 return false;
736 } else if (ct3d->hostmem && ct3d->hostpmem) {
737 error_setg(errp, "[memdev] cannot be used with new "
738 "[persistent-memdev] property");
739 return false;
740 } else if (ct3d->hostmem) {
741 /* Use of hostmem property implies pmem */
742 ct3d->hostpmem = ct3d->hostmem;
743 ct3d->hostmem = NULL;
744 }
745
746 if (ct3d->hostpmem && !ct3d->lsa) {
747 error_setg(errp, "lsa property must be set for persistent devices");
748 return false;
749 }
750
751 if (ct3d->hostvmem) {
752 MemoryRegion *vmr;
753 char *v_name;
754
755 vmr = host_memory_backend_get_memory(ct3d->hostvmem);
756 if (!vmr) {
757 error_setg(errp, "volatile memdev must have backing device");
758 return false;
759 }
760 if (host_memory_backend_is_mapped(ct3d->hostvmem)) {
761 error_setg(errp, "memory backend %s can't be used multiple times.",
762 object_get_canonical_path_component(OBJECT(ct3d->hostvmem)));
763 return false;
764 }
765 memory_region_set_nonvolatile(vmr, false);
766 memory_region_set_enabled(vmr, true);
767 host_memory_backend_set_mapped(ct3d->hostvmem, true);
768 if (ds->id) {
769 v_name = g_strdup_printf("cxl-type3-dpa-vmem-space:%s", ds->id);
770 } else {
771 v_name = g_strdup("cxl-type3-dpa-vmem-space");
772 }
773 address_space_init(&ct3d->hostvmem_as, vmr, v_name);
774 ct3d->cxl_dstate.vmem_size = memory_region_size(vmr);
775 ct3d->cxl_dstate.static_mem_size += memory_region_size(vmr);
776 g_free(v_name);
777 }
778
779 if (ct3d->hostpmem) {
780 MemoryRegion *pmr;
781 char *p_name;
782
783 pmr = host_memory_backend_get_memory(ct3d->hostpmem);
784 if (!pmr) {
785 error_setg(errp, "persistent memdev must have backing device");
786 return false;
787 }
788 if (host_memory_backend_is_mapped(ct3d->hostpmem)) {
789 error_setg(errp, "memory backend %s can't be used multiple times.",
790 object_get_canonical_path_component(OBJECT(ct3d->hostpmem)));
791 return false;
792 }
793 memory_region_set_nonvolatile(pmr, true);
794 memory_region_set_enabled(pmr, true);
795 host_memory_backend_set_mapped(ct3d->hostpmem, true);
796 if (ds->id) {
797 p_name = g_strdup_printf("cxl-type3-dpa-pmem-space:%s", ds->id);
798 } else {
799 p_name = g_strdup("cxl-type3-dpa-pmem-space");
800 }
801 address_space_init(&ct3d->hostpmem_as, pmr, p_name);
802 ct3d->cxl_dstate.pmem_size = memory_region_size(pmr);
803 ct3d->cxl_dstate.static_mem_size += memory_region_size(pmr);
804 g_free(p_name);
805 }
806
807 ct3d->dc.total_capacity = 0;
808 if (ct3d->dc.num_regions > 0) {
809 MemoryRegion *dc_mr;
810 char *dc_name;
811
812 if (!ct3d->dc.host_dc) {
813 error_setg(errp, "dynamic capacity must have a backing device");
814 return false;
815 }
816
817 dc_mr = host_memory_backend_get_memory(ct3d->dc.host_dc);
818 if (!dc_mr) {
819 error_setg(errp, "dynamic capacity must have a backing device");
820 return false;
821 }
822
823 if (host_memory_backend_is_mapped(ct3d->dc.host_dc)) {
824 error_setg(errp, "memory backend %s can't be used multiple times.",
825 object_get_canonical_path_component(OBJECT(ct3d->dc.host_dc)));
826 return false;
827 }
828 /*
829 * Set DC regions as volatile for now, non-volatile support can
830 * be added in the future if needed.
831 */
832 memory_region_set_nonvolatile(dc_mr, false);
833 memory_region_set_enabled(dc_mr, true);
834 host_memory_backend_set_mapped(ct3d->dc.host_dc, true);
835 if (ds->id) {
836 dc_name = g_strdup_printf("cxl-dcd-dpa-dc-space:%s", ds->id);
837 } else {
838 dc_name = g_strdup("cxl-dcd-dpa-dc-space");
839 }
840 address_space_init(&ct3d->dc.host_dc_as, dc_mr, dc_name);
841 g_free(dc_name);
842
843 if (!cxl_create_dc_regions(ct3d, errp)) {
844 error_append_hint(errp, "setup DC regions failed");
845 return false;
846 }
847 }
848
849 return true;
850 }
851
852 static DOEProtocol doe_cdat_prot[] = {
853 { CXL_VENDOR_ID, CXL_DOE_TABLE_ACCESS, cxl_doe_cdat_rsp },
854 { }
855 };
856
857 /* Initialize CXL device alerts with default threshold values. */
init_alert_config(CXLType3Dev * ct3d)858 static void init_alert_config(CXLType3Dev *ct3d)
859 {
860 ct3d->alert_config = (CXLAlertConfig) {
861 .life_used_crit_alert_thresh = 75,
862 .life_used_warn_thresh = 40,
863 .over_temp_crit_alert_thresh = 35,
864 .under_temp_crit_alert_thresh = 10,
865 .over_temp_warn_thresh = 25,
866 .under_temp_warn_thresh = 20
867 };
868 }
869
ct3_realize(PCIDevice * pci_dev,Error ** errp)870 static void ct3_realize(PCIDevice *pci_dev, Error **errp)
871 {
872 ERRP_GUARD();
873 CXLType3Dev *ct3d = CXL_TYPE3(pci_dev);
874 CXLComponentState *cxl_cstate = &ct3d->cxl_cstate;
875 ComponentRegisters *regs = &cxl_cstate->crb;
876 MemoryRegion *mr = ®s->component_registers;
877 uint8_t *pci_conf = pci_dev->config;
878 int i, rc;
879 uint16_t count;
880
881 QTAILQ_INIT(&ct3d->error_list);
882
883 if (!cxl_setup_memory(ct3d, errp)) {
884 return;
885 }
886
887 pci_config_set_prog_interface(pci_conf, 0x10);
888
889 pcie_endpoint_cap_init(pci_dev, 0x80);
890 if (ct3d->sn != UI64_NULL) {
891 pcie_dev_ser_num_init(pci_dev, 0x100, ct3d->sn);
892 cxl_cstate->dvsec_offset = 0x100 + 0x0c;
893 } else {
894 cxl_cstate->dvsec_offset = 0x100;
895 }
896
897 ct3d->cxl_cstate.pdev = pci_dev;
898 build_dvsecs(ct3d);
899
900 regs->special_ops = g_new0(MemoryRegionOps, 1);
901 regs->special_ops->write = ct3d_reg_write;
902
903 cxl_component_register_block_init(OBJECT(pci_dev), cxl_cstate,
904 TYPE_CXL_TYPE3);
905
906 pci_register_bar(
907 pci_dev, CXL_COMPONENT_REG_BAR_IDX,
908 PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64, mr);
909
910 cxl_device_register_block_init(OBJECT(pci_dev), &ct3d->cxl_dstate,
911 &ct3d->cci);
912 pci_register_bar(pci_dev, CXL_DEVICE_REG_BAR_IDX,
913 PCI_BASE_ADDRESS_SPACE_MEMORY |
914 PCI_BASE_ADDRESS_MEM_TYPE_64,
915 &ct3d->cxl_dstate.device_registers);
916
917 /* MSI(-X) Initialization */
918 rc = msix_init_exclusive_bar(pci_dev, CXL_T3_MSIX_VECTOR_NR, 4, errp);
919 if (rc) {
920 goto err_free_special_ops;
921 }
922 for (i = 0; i < CXL_T3_MSIX_VECTOR_NR; i++) {
923 msix_vector_use(pci_dev, i);
924 }
925
926 /* DOE Initialization */
927 pcie_doe_init(pci_dev, &ct3d->doe_cdat, 0x190, doe_cdat_prot, true,
928 CXL_T3_MSIX_PCIE_DOE_TABLE_ACCESS);
929
930 cxl_cstate->cdat.build_cdat_table = ct3_build_cdat_table;
931 cxl_cstate->cdat.free_cdat_table = ct3_free_cdat_table;
932 cxl_cstate->cdat.private = ct3d;
933 if (!cxl_doe_cdat_init(cxl_cstate, errp)) {
934 goto err_msix_uninit;
935 }
936
937 init_alert_config(ct3d);
938 pcie_cap_deverr_init(pci_dev);
939 /* Leave a bit of room for expansion */
940 rc = pcie_aer_init(pci_dev, PCI_ERR_VER, 0x200, PCI_ERR_SIZEOF, errp);
941 if (rc) {
942 goto err_release_cdat;
943 }
944 cxl_event_init(&ct3d->cxl_dstate, CXL_T3_MSIX_EVENT_START);
945
946 /* Set default value for patrol scrub attributes */
947 ct3d->patrol_scrub_attrs.scrub_cycle_cap =
948 CXL_MEMDEV_PS_SCRUB_CYCLE_CHANGE_CAP_DEFAULT |
949 CXL_MEMDEV_PS_SCRUB_REALTIME_REPORT_CAP_DEFAULT;
950 ct3d->patrol_scrub_attrs.scrub_cycle =
951 CXL_MEMDEV_PS_CUR_SCRUB_CYCLE_DEFAULT |
952 (CXL_MEMDEV_PS_MIN_SCRUB_CYCLE_DEFAULT << 8);
953 ct3d->patrol_scrub_attrs.scrub_flags = CXL_MEMDEV_PS_ENABLE_DEFAULT;
954
955 /* Set default value for DDR5 ECS read attributes */
956 ct3d->ecs_attrs.ecs_log_cap = CXL_ECS_LOG_ENTRY_TYPE_DEFAULT;
957 for (count = 0; count < CXL_ECS_NUM_MEDIA_FRUS; count++) {
958 ct3d->ecs_attrs.fru_attrs[count].ecs_cap =
959 CXL_ECS_REALTIME_REPORT_CAP_DEFAULT;
960 ct3d->ecs_attrs.fru_attrs[count].ecs_config =
961 CXL_ECS_THRESHOLD_COUNT_DEFAULT |
962 (CXL_ECS_MODE_DEFAULT << 3);
963 /* Reserved */
964 ct3d->ecs_attrs.fru_attrs[count].ecs_flags = 0;
965 }
966
967 return;
968
969 err_release_cdat:
970 cxl_doe_cdat_release(cxl_cstate);
971 err_msix_uninit:
972 msix_uninit_exclusive_bar(pci_dev);
973 err_free_special_ops:
974 g_free(regs->special_ops);
975 if (ct3d->dc.host_dc) {
976 cxl_destroy_dc_regions(ct3d);
977 address_space_destroy(&ct3d->dc.host_dc_as);
978 }
979 if (ct3d->hostpmem) {
980 address_space_destroy(&ct3d->hostpmem_as);
981 }
982 if (ct3d->hostvmem) {
983 address_space_destroy(&ct3d->hostvmem_as);
984 }
985 }
986
ct3_exit(PCIDevice * pci_dev)987 static void ct3_exit(PCIDevice *pci_dev)
988 {
989 CXLType3Dev *ct3d = CXL_TYPE3(pci_dev);
990 CXLComponentState *cxl_cstate = &ct3d->cxl_cstate;
991 ComponentRegisters *regs = &cxl_cstate->crb;
992
993 pcie_aer_exit(pci_dev);
994 cxl_doe_cdat_release(cxl_cstate);
995 msix_uninit_exclusive_bar(pci_dev);
996 g_free(regs->special_ops);
997 cxl_destroy_cci(&ct3d->cci);
998 if (ct3d->dc.host_dc) {
999 cxl_destroy_dc_regions(ct3d);
1000 address_space_destroy(&ct3d->dc.host_dc_as);
1001 }
1002 if (ct3d->hostpmem) {
1003 address_space_destroy(&ct3d->hostpmem_as);
1004 }
1005 if (ct3d->hostvmem) {
1006 address_space_destroy(&ct3d->hostvmem_as);
1007 }
1008 }
1009
1010 /*
1011 * Mark the DPA range [dpa, dap + len - 1] to be backed and accessible. This
1012 * happens when a DC extent is added and accepted by the host.
1013 */
ct3_set_region_block_backed(CXLType3Dev * ct3d,uint64_t dpa,uint64_t len)1014 void ct3_set_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
1015 uint64_t len)
1016 {
1017 CXLDCRegion *region;
1018
1019 region = cxl_find_dc_region(ct3d, dpa, len);
1020 if (!region) {
1021 return;
1022 }
1023
1024 QEMU_LOCK_GUARD(®ion->bitmap_lock);
1025 bitmap_set(region->blk_bitmap, (dpa - region->base) / region->block_size,
1026 len / region->block_size);
1027 }
1028
1029 /*
1030 * Check whether the DPA range [dpa, dpa + len - 1] is backed with DC extents.
1031 * Used when validating read/write to dc regions
1032 */
ct3_test_region_block_backed(CXLType3Dev * ct3d,uint64_t dpa,uint64_t len)1033 bool ct3_test_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
1034 uint64_t len)
1035 {
1036 CXLDCRegion *region;
1037 uint64_t nbits;
1038 long nr;
1039
1040 region = cxl_find_dc_region(ct3d, dpa, len);
1041 if (!region) {
1042 return false;
1043 }
1044
1045 nr = (dpa - region->base) / region->block_size;
1046 nbits = DIV_ROUND_UP(len, region->block_size);
1047 /*
1048 * if bits between [dpa, dpa + len) are all 1s, meaning the DPA range is
1049 * backed with DC extents, return true; else return false.
1050 */
1051 QEMU_LOCK_GUARD(®ion->bitmap_lock);
1052 return find_next_zero_bit(region->blk_bitmap, nr + nbits, nr) == nr + nbits;
1053 }
1054
1055 /*
1056 * Mark the DPA range [dpa, dap + len - 1] to be unbacked and inaccessible.
1057 * This happens when a dc extent is released by the host.
1058 */
ct3_clear_region_block_backed(CXLType3Dev * ct3d,uint64_t dpa,uint64_t len)1059 void ct3_clear_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
1060 uint64_t len)
1061 {
1062 CXLDCRegion *region;
1063 uint64_t nbits;
1064 long nr;
1065
1066 region = cxl_find_dc_region(ct3d, dpa, len);
1067 if (!region) {
1068 return;
1069 }
1070
1071 nr = (dpa - region->base) / region->block_size;
1072 nbits = len / region->block_size;
1073 QEMU_LOCK_GUARD(®ion->bitmap_lock);
1074 bitmap_clear(region->blk_bitmap, nr, nbits);
1075 }
1076
cxl_type3_dpa(CXLType3Dev * ct3d,hwaddr host_addr,uint64_t * dpa)1077 static bool cxl_type3_dpa(CXLType3Dev *ct3d, hwaddr host_addr, uint64_t *dpa)
1078 {
1079 int hdm_inc = R_CXL_HDM_DECODER1_BASE_LO - R_CXL_HDM_DECODER0_BASE_LO;
1080 uint32_t *cache_mem = ct3d->cxl_cstate.crb.cache_mem_registers;
1081 unsigned int hdm_count;
1082 uint32_t cap;
1083 uint64_t dpa_base = 0;
1084 int i;
1085
1086 cap = ldl_le_p(cache_mem + R_CXL_HDM_DECODER_CAPABILITY);
1087 hdm_count = cxl_decoder_count_dec(FIELD_EX32(cap,
1088 CXL_HDM_DECODER_CAPABILITY,
1089 DECODER_COUNT));
1090
1091 for (i = 0; i < hdm_count; i++) {
1092 uint64_t decoder_base, decoder_size, hpa_offset, skip;
1093 uint32_t hdm_ctrl, low, high;
1094 int ig, iw;
1095
1096 low = ldl_le_p(cache_mem + R_CXL_HDM_DECODER0_BASE_LO + i * hdm_inc);
1097 high = ldl_le_p(cache_mem + R_CXL_HDM_DECODER0_BASE_HI + i * hdm_inc);
1098 decoder_base = ((uint64_t)high << 32) | (low & 0xf0000000);
1099
1100 low = ldl_le_p(cache_mem + R_CXL_HDM_DECODER0_SIZE_LO + i * hdm_inc);
1101 high = ldl_le_p(cache_mem + R_CXL_HDM_DECODER0_SIZE_HI + i * hdm_inc);
1102 decoder_size = ((uint64_t)high << 32) | (low & 0xf0000000);
1103
1104 low = ldl_le_p(cache_mem + R_CXL_HDM_DECODER0_DPA_SKIP_LO +
1105 i * hdm_inc);
1106 high = ldl_le_p(cache_mem + R_CXL_HDM_DECODER0_DPA_SKIP_HI +
1107 i * hdm_inc);
1108 skip = ((uint64_t)high << 32) | (low & 0xf0000000);
1109 dpa_base += skip;
1110
1111 hpa_offset = (uint64_t)host_addr - decoder_base;
1112
1113 hdm_ctrl = ldl_le_p(cache_mem + R_CXL_HDM_DECODER0_CTRL + i * hdm_inc);
1114 iw = FIELD_EX32(hdm_ctrl, CXL_HDM_DECODER0_CTRL, IW);
1115 ig = FIELD_EX32(hdm_ctrl, CXL_HDM_DECODER0_CTRL, IG);
1116 if (!FIELD_EX32(hdm_ctrl, CXL_HDM_DECODER0_CTRL, COMMITTED)) {
1117 return false;
1118 }
1119 if (((uint64_t)host_addr < decoder_base) ||
1120 (hpa_offset >= decoder_size)) {
1121 int decoded_iw = cxl_interleave_ways_dec(iw, &error_fatal);
1122
1123 if (decoded_iw == 0) {
1124 return false;
1125 }
1126
1127 dpa_base += decoder_size / decoded_iw;
1128 continue;
1129 }
1130
1131 if (iw < 8) {
1132 *dpa = dpa_base +
1133 ((MAKE_64BIT_MASK(0, 8 + ig) & hpa_offset) |
1134 ((MAKE_64BIT_MASK(8 + ig + iw, 64 - 8 - ig - iw) & hpa_offset)
1135 >> iw));
1136 } else {
1137 *dpa = dpa_base +
1138 ((MAKE_64BIT_MASK(0, 8 + ig) & hpa_offset) |
1139 ((((MAKE_64BIT_MASK(ig + iw, 64 - ig - iw) & hpa_offset)
1140 >> (ig + iw)) / 3) << (ig + 8)));
1141 }
1142
1143 return true;
1144 }
1145 return false;
1146 }
1147
cxl_type3_hpa_to_as_and_dpa(CXLType3Dev * ct3d,hwaddr host_addr,unsigned int size,AddressSpace ** as,uint64_t * dpa_offset)1148 static int cxl_type3_hpa_to_as_and_dpa(CXLType3Dev *ct3d,
1149 hwaddr host_addr,
1150 unsigned int size,
1151 AddressSpace **as,
1152 uint64_t *dpa_offset)
1153 {
1154 MemoryRegion *vmr = NULL, *pmr = NULL, *dc_mr = NULL;
1155 uint64_t vmr_size = 0, pmr_size = 0, dc_size = 0;
1156
1157 if (ct3d->hostvmem) {
1158 vmr = host_memory_backend_get_memory(ct3d->hostvmem);
1159 vmr_size = memory_region_size(vmr);
1160 }
1161 if (ct3d->hostpmem) {
1162 pmr = host_memory_backend_get_memory(ct3d->hostpmem);
1163 pmr_size = memory_region_size(pmr);
1164 }
1165 if (ct3d->dc.host_dc) {
1166 dc_mr = host_memory_backend_get_memory(ct3d->dc.host_dc);
1167 dc_size = memory_region_size(dc_mr);
1168 }
1169
1170 if (!vmr && !pmr && !dc_mr) {
1171 return -ENODEV;
1172 }
1173
1174 if (!cxl_type3_dpa(ct3d, host_addr, dpa_offset)) {
1175 return -EINVAL;
1176 }
1177
1178 if (*dpa_offset >= vmr_size + pmr_size + dc_size) {
1179 return -EINVAL;
1180 }
1181
1182 if (*dpa_offset < vmr_size) {
1183 *as = &ct3d->hostvmem_as;
1184 } else if (*dpa_offset < vmr_size + pmr_size) {
1185 *as = &ct3d->hostpmem_as;
1186 *dpa_offset -= vmr_size;
1187 } else {
1188 if (!ct3_test_region_block_backed(ct3d, *dpa_offset, size)) {
1189 return -ENODEV;
1190 }
1191
1192 *as = &ct3d->dc.host_dc_as;
1193 *dpa_offset -= (vmr_size + pmr_size);
1194 }
1195
1196 return 0;
1197 }
1198
cxl_type3_read(PCIDevice * d,hwaddr host_addr,uint64_t * data,unsigned size,MemTxAttrs attrs)1199 MemTxResult cxl_type3_read(PCIDevice *d, hwaddr host_addr, uint64_t *data,
1200 unsigned size, MemTxAttrs attrs)
1201 {
1202 CXLType3Dev *ct3d = CXL_TYPE3(d);
1203 uint64_t dpa_offset = 0;
1204 AddressSpace *as = NULL;
1205 int res;
1206
1207 res = cxl_type3_hpa_to_as_and_dpa(ct3d, host_addr, size,
1208 &as, &dpa_offset);
1209 if (res) {
1210 return MEMTX_ERROR;
1211 }
1212
1213 if (cxl_dev_media_disabled(&ct3d->cxl_dstate)) {
1214 qemu_guest_getrandom_nofail(data, size);
1215 return MEMTX_OK;
1216 }
1217
1218 return address_space_read(as, dpa_offset, attrs, data, size);
1219 }
1220
cxl_type3_write(PCIDevice * d,hwaddr host_addr,uint64_t data,unsigned size,MemTxAttrs attrs)1221 MemTxResult cxl_type3_write(PCIDevice *d, hwaddr host_addr, uint64_t data,
1222 unsigned size, MemTxAttrs attrs)
1223 {
1224 CXLType3Dev *ct3d = CXL_TYPE3(d);
1225 uint64_t dpa_offset = 0;
1226 AddressSpace *as = NULL;
1227 int res;
1228
1229 res = cxl_type3_hpa_to_as_and_dpa(ct3d, host_addr, size,
1230 &as, &dpa_offset);
1231 if (res) {
1232 return MEMTX_ERROR;
1233 }
1234
1235 if (cxl_dev_media_disabled(&ct3d->cxl_dstate)) {
1236 return MEMTX_OK;
1237 }
1238
1239 return address_space_write(as, dpa_offset, attrs, &data, size);
1240 }
1241
ct3d_reset(DeviceState * dev)1242 static void ct3d_reset(DeviceState *dev)
1243 {
1244 CXLType3Dev *ct3d = CXL_TYPE3(dev);
1245 uint32_t *reg_state = ct3d->cxl_cstate.crb.cache_mem_registers;
1246 uint32_t *write_msk = ct3d->cxl_cstate.crb.cache_mem_regs_write_mask;
1247
1248 pcie_cap_fill_link_ep_usp(PCI_DEVICE(dev), ct3d->width, ct3d->speed);
1249 cxl_component_register_init_common(reg_state, write_msk, CXL2_TYPE3_DEVICE);
1250 cxl_device_register_init_t3(ct3d, CXL_T3_MSIX_MBOX);
1251
1252 /*
1253 * Bring up an endpoint to target with MCTP over VDM.
1254 * This device is emulating an MLD with single LD for now.
1255 */
1256 if (ct3d->vdm_fm_owned_ld_mctp_cci.initialized) {
1257 cxl_destroy_cci(&ct3d->vdm_fm_owned_ld_mctp_cci);
1258 }
1259 cxl_initialize_t3_fm_owned_ld_mctpcci(&ct3d->vdm_fm_owned_ld_mctp_cci,
1260 DEVICE(ct3d), DEVICE(ct3d),
1261 512); /* Max payload made up */
1262 if (ct3d->ld0_cci.initialized) {
1263 cxl_destroy_cci(&ct3d->ld0_cci);
1264 }
1265 cxl_initialize_t3_ld_cci(&ct3d->ld0_cci, DEVICE(ct3d), DEVICE(ct3d),
1266 512); /* Max payload made up */
1267 }
1268
1269 static const Property ct3_props[] = {
1270 DEFINE_PROP_LINK("memdev", CXLType3Dev, hostmem, TYPE_MEMORY_BACKEND,
1271 HostMemoryBackend *), /* for backward compatibility */
1272 DEFINE_PROP_LINK("persistent-memdev", CXLType3Dev, hostpmem,
1273 TYPE_MEMORY_BACKEND, HostMemoryBackend *),
1274 DEFINE_PROP_LINK("volatile-memdev", CXLType3Dev, hostvmem,
1275 TYPE_MEMORY_BACKEND, HostMemoryBackend *),
1276 DEFINE_PROP_LINK("lsa", CXLType3Dev, lsa, TYPE_MEMORY_BACKEND,
1277 HostMemoryBackend *),
1278 DEFINE_PROP_UINT64("sn", CXLType3Dev, sn, UI64_NULL),
1279 DEFINE_PROP_STRING("cdat", CXLType3Dev, cxl_cstate.cdat.filename),
1280 DEFINE_PROP_UINT8("num-dc-regions", CXLType3Dev, dc.num_regions, 0),
1281 DEFINE_PROP_LINK("volatile-dc-memdev", CXLType3Dev, dc.host_dc,
1282 TYPE_MEMORY_BACKEND, HostMemoryBackend *),
1283 DEFINE_PROP_PCIE_LINK_SPEED("x-speed", CXLType3Dev,
1284 speed, PCIE_LINK_SPEED_32),
1285 DEFINE_PROP_PCIE_LINK_WIDTH("x-width", CXLType3Dev,
1286 width, PCIE_LINK_WIDTH_16),
1287 };
1288
get_lsa_size(CXLType3Dev * ct3d)1289 static uint64_t get_lsa_size(CXLType3Dev *ct3d)
1290 {
1291 MemoryRegion *mr;
1292
1293 if (!ct3d->lsa) {
1294 return 0;
1295 }
1296
1297 mr = host_memory_backend_get_memory(ct3d->lsa);
1298 return memory_region_size(mr);
1299 }
1300
validate_lsa_access(MemoryRegion * mr,uint64_t size,uint64_t offset)1301 static void validate_lsa_access(MemoryRegion *mr, uint64_t size,
1302 uint64_t offset)
1303 {
1304 assert(offset + size <= memory_region_size(mr));
1305 assert(offset + size > offset);
1306 }
1307
get_lsa(CXLType3Dev * ct3d,void * buf,uint64_t size,uint64_t offset)1308 static uint64_t get_lsa(CXLType3Dev *ct3d, void *buf, uint64_t size,
1309 uint64_t offset)
1310 {
1311 MemoryRegion *mr;
1312 void *lsa;
1313
1314 if (!ct3d->lsa) {
1315 return 0;
1316 }
1317
1318 mr = host_memory_backend_get_memory(ct3d->lsa);
1319 validate_lsa_access(mr, size, offset);
1320
1321 lsa = memory_region_get_ram_ptr(mr) + offset;
1322 memcpy(buf, lsa, size);
1323
1324 return size;
1325 }
1326
set_lsa(CXLType3Dev * ct3d,const void * buf,uint64_t size,uint64_t offset)1327 static void set_lsa(CXLType3Dev *ct3d, const void *buf, uint64_t size,
1328 uint64_t offset)
1329 {
1330 MemoryRegion *mr;
1331 void *lsa;
1332
1333 if (!ct3d->lsa) {
1334 return;
1335 }
1336
1337 mr = host_memory_backend_get_memory(ct3d->lsa);
1338 validate_lsa_access(mr, size, offset);
1339
1340 lsa = memory_region_get_ram_ptr(mr) + offset;
1341 memcpy(lsa, buf, size);
1342 memory_region_set_dirty(mr, offset, size);
1343
1344 /*
1345 * Just like the PMEM, if the guest is not allowed to exit gracefully, label
1346 * updates will get lost.
1347 */
1348 }
1349
set_cacheline(CXLType3Dev * ct3d,uint64_t dpa_offset,uint8_t * data)1350 static bool set_cacheline(CXLType3Dev *ct3d, uint64_t dpa_offset, uint8_t *data)
1351 {
1352 MemoryRegion *vmr = NULL, *pmr = NULL, *dc_mr = NULL;
1353 AddressSpace *as;
1354 uint64_t vmr_size = 0, pmr_size = 0, dc_size = 0;
1355
1356 if (ct3d->hostvmem) {
1357 vmr = host_memory_backend_get_memory(ct3d->hostvmem);
1358 vmr_size = memory_region_size(vmr);
1359 }
1360 if (ct3d->hostpmem) {
1361 pmr = host_memory_backend_get_memory(ct3d->hostpmem);
1362 pmr_size = memory_region_size(pmr);
1363 }
1364 if (ct3d->dc.host_dc) {
1365 dc_mr = host_memory_backend_get_memory(ct3d->dc.host_dc);
1366 dc_size = memory_region_size(dc_mr);
1367 }
1368
1369 if (!vmr && !pmr && !dc_mr) {
1370 return false;
1371 }
1372
1373 if (dpa_offset + CXL_CACHE_LINE_SIZE > vmr_size + pmr_size + dc_size) {
1374 return false;
1375 }
1376
1377 if (dpa_offset < vmr_size) {
1378 as = &ct3d->hostvmem_as;
1379 } else if (dpa_offset < vmr_size + pmr_size) {
1380 as = &ct3d->hostpmem_as;
1381 dpa_offset -= vmr_size;
1382 } else {
1383 as = &ct3d->dc.host_dc_as;
1384 dpa_offset -= (vmr_size + pmr_size);
1385 }
1386
1387 address_space_write(as, dpa_offset, MEMTXATTRS_UNSPECIFIED, data,
1388 CXL_CACHE_LINE_SIZE);
1389 return true;
1390 }
1391
cxl_set_poison_list_overflowed(CXLType3Dev * ct3d)1392 void cxl_set_poison_list_overflowed(CXLType3Dev *ct3d)
1393 {
1394 ct3d->poison_list_overflowed = true;
1395 ct3d->poison_list_overflow_ts =
1396 cxl_device_get_timestamp(&ct3d->cxl_dstate);
1397 }
1398
cxl_clear_poison_list_overflowed(CXLType3Dev * ct3d)1399 void cxl_clear_poison_list_overflowed(CXLType3Dev *ct3d)
1400 {
1401 ct3d->poison_list_overflowed = false;
1402 ct3d->poison_list_overflow_ts = 0;
1403 }
1404
qmp_cxl_inject_poison(const char * path,uint64_t start,uint64_t length,Error ** errp)1405 void qmp_cxl_inject_poison(const char *path, uint64_t start, uint64_t length,
1406 Error **errp)
1407 {
1408 Object *obj = object_resolve_path(path, NULL);
1409 CXLType3Dev *ct3d;
1410 CXLPoison *p;
1411
1412 if (length % 64) {
1413 error_setg(errp, "Poison injection must be in multiples of 64 bytes");
1414 return;
1415 }
1416 if (start % 64) {
1417 error_setg(errp, "Poison start address must be 64 byte aligned");
1418 return;
1419 }
1420 if (!obj) {
1421 error_setg(errp, "Unable to resolve path");
1422 return;
1423 }
1424 if (!object_dynamic_cast(obj, TYPE_CXL_TYPE3)) {
1425 error_setg(errp, "Path does not point to a CXL type 3 device");
1426 return;
1427 }
1428
1429 ct3d = CXL_TYPE3(obj);
1430
1431 QLIST_FOREACH(p, &ct3d->poison_list, node) {
1432 if ((start < p->start + p->length) && (start + length > p->start)) {
1433 error_setg(errp,
1434 "Overlap with existing poisoned region not supported");
1435 return;
1436 }
1437 }
1438
1439 p = g_new0(CXLPoison, 1);
1440 p->length = length;
1441 p->start = start;
1442 /* Different from injected via the mbox */
1443 p->type = CXL_POISON_TYPE_INTERNAL;
1444
1445 if (ct3d->poison_list_cnt < CXL_POISON_LIST_LIMIT) {
1446 QLIST_INSERT_HEAD(&ct3d->poison_list, p, node);
1447 ct3d->poison_list_cnt++;
1448 } else {
1449 if (!ct3d->poison_list_overflowed) {
1450 cxl_set_poison_list_overflowed(ct3d);
1451 }
1452 QLIST_INSERT_HEAD(&ct3d->poison_list_bkp, p, node);
1453 }
1454 }
1455
1456 /* For uncorrectable errors include support for multiple header recording */
qmp_cxl_inject_uncorrectable_errors(const char * path,CXLUncorErrorRecordList * errors,Error ** errp)1457 void qmp_cxl_inject_uncorrectable_errors(const char *path,
1458 CXLUncorErrorRecordList *errors,
1459 Error **errp)
1460 {
1461 Object *obj = object_resolve_path(path, NULL);
1462 static PCIEAERErr err = {};
1463 CXLType3Dev *ct3d;
1464 CXLError *cxl_err;
1465 uint32_t *reg_state;
1466 uint32_t unc_err;
1467 bool first;
1468
1469 if (!obj) {
1470 error_setg(errp, "Unable to resolve path");
1471 return;
1472 }
1473
1474 if (!object_dynamic_cast(obj, TYPE_CXL_TYPE3)) {
1475 error_setg(errp, "Path does not point to a CXL type 3 device");
1476 return;
1477 }
1478
1479 err.status = PCI_ERR_UNC_INTN;
1480 err.source_id = pci_requester_id(PCI_DEVICE(obj));
1481 err.flags = 0;
1482
1483 ct3d = CXL_TYPE3(obj);
1484
1485 first = QTAILQ_EMPTY(&ct3d->error_list);
1486 reg_state = ct3d->cxl_cstate.crb.cache_mem_registers;
1487 while (errors) {
1488 uint32List *header = errors->value->header;
1489 uint8_t header_count = 0;
1490 int cxl_err_code;
1491
1492 cxl_err_code = ct3d_qmp_uncor_err_to_cxl(errors->value->type);
1493 if (cxl_err_code < 0) {
1494 error_setg(errp, "Unknown error code");
1495 return;
1496 }
1497
1498 /* If the error is masked, nothing to do here */
1499 if (!((1 << cxl_err_code) &
1500 ~ldl_le_p(reg_state + R_CXL_RAS_UNC_ERR_MASK))) {
1501 errors = errors->next;
1502 continue;
1503 }
1504
1505 cxl_err = g_malloc0(sizeof(*cxl_err));
1506
1507 cxl_err->type = cxl_err_code;
1508 while (header && header_count < 32) {
1509 cxl_err->header[header_count++] = header->value;
1510 header = header->next;
1511 }
1512 if (header_count > 32) {
1513 error_setg(errp, "Header must be 32 DWORD or less");
1514 return;
1515 }
1516 QTAILQ_INSERT_TAIL(&ct3d->error_list, cxl_err, node);
1517
1518 errors = errors->next;
1519 }
1520
1521 if (first && !QTAILQ_EMPTY(&ct3d->error_list)) {
1522 uint32_t *cache_mem = ct3d->cxl_cstate.crb.cache_mem_registers;
1523 uint32_t capctrl = ldl_le_p(cache_mem + R_CXL_RAS_ERR_CAP_CTRL);
1524 uint32_t *header_log = &cache_mem[R_CXL_RAS_ERR_HEADER0];
1525 int i;
1526
1527 cxl_err = QTAILQ_FIRST(&ct3d->error_list);
1528 for (i = 0; i < CXL_RAS_ERR_HEADER_NUM; i++) {
1529 stl_le_p(header_log + i, cxl_err->header[i]);
1530 }
1531
1532 capctrl = FIELD_DP32(capctrl, CXL_RAS_ERR_CAP_CTRL,
1533 FIRST_ERROR_POINTER, cxl_err->type);
1534 stl_le_p(cache_mem + R_CXL_RAS_ERR_CAP_CTRL, capctrl);
1535 }
1536
1537 unc_err = 0;
1538 QTAILQ_FOREACH(cxl_err, &ct3d->error_list, node) {
1539 unc_err |= (1 << cxl_err->type);
1540 }
1541 if (!unc_err) {
1542 return;
1543 }
1544
1545 stl_le_p(reg_state + R_CXL_RAS_UNC_ERR_STATUS, unc_err);
1546 pcie_aer_inject_error(PCI_DEVICE(obj), &err);
1547 }
1548
qmp_cxl_inject_correctable_error(const char * path,CxlCorErrorType type,Error ** errp)1549 void qmp_cxl_inject_correctable_error(const char *path, CxlCorErrorType type,
1550 Error **errp)
1551 {
1552 static PCIEAERErr err = {};
1553 Object *obj = object_resolve_path(path, NULL);
1554 CXLType3Dev *ct3d;
1555 uint32_t *reg_state;
1556 uint32_t cor_err;
1557 int cxl_err_type;
1558
1559 if (!obj) {
1560 error_setg(errp, "Unable to resolve path");
1561 return;
1562 }
1563 if (!object_dynamic_cast(obj, TYPE_CXL_TYPE3)) {
1564 error_setg(errp, "Path does not point to a CXL type 3 device");
1565 return;
1566 }
1567
1568 err.status = PCI_ERR_COR_INTERNAL;
1569 err.source_id = pci_requester_id(PCI_DEVICE(obj));
1570 err.flags = PCIE_AER_ERR_IS_CORRECTABLE;
1571
1572 ct3d = CXL_TYPE3(obj);
1573 reg_state = ct3d->cxl_cstate.crb.cache_mem_registers;
1574 cor_err = ldl_le_p(reg_state + R_CXL_RAS_COR_ERR_STATUS);
1575
1576 cxl_err_type = ct3d_qmp_cor_err_to_cxl(type);
1577 if (cxl_err_type < 0) {
1578 error_setg(errp, "Invalid COR error");
1579 return;
1580 }
1581 /* If the error is masked, nothting to do here */
1582 if (!((1 << cxl_err_type) &
1583 ~ldl_le_p(reg_state + R_CXL_RAS_COR_ERR_MASK))) {
1584 return;
1585 }
1586
1587 cor_err |= (1 << cxl_err_type);
1588 stl_le_p(reg_state + R_CXL_RAS_COR_ERR_STATUS, cor_err);
1589
1590 pcie_aer_inject_error(PCI_DEVICE(obj), &err);
1591 }
1592
cxl_assign_event_header(CXLEventRecordHdr * hdr,const QemuUUID * uuid,uint32_t flags,uint8_t length,uint64_t timestamp)1593 void cxl_assign_event_header(CXLEventRecordHdr *hdr,
1594 const QemuUUID *uuid, uint32_t flags,
1595 uint8_t length, uint64_t timestamp)
1596 {
1597 st24_le_p(&hdr->flags, flags);
1598 hdr->length = length;
1599 memcpy(&hdr->id, uuid, sizeof(hdr->id));
1600 stq_le_p(&hdr->timestamp, timestamp);
1601 }
1602
1603 static const QemuUUID gen_media_uuid = {
1604 .data = UUID(0xfbcd0a77, 0xc260, 0x417f,
1605 0x85, 0xa9, 0x08, 0x8b, 0x16, 0x21, 0xeb, 0xa6),
1606 };
1607
1608 static const QemuUUID dram_uuid = {
1609 .data = UUID(0x601dcbb3, 0x9c06, 0x4eab, 0xb8, 0xaf,
1610 0x4e, 0x9b, 0xfb, 0x5c, 0x96, 0x24),
1611 };
1612
1613 static const QemuUUID memory_module_uuid = {
1614 .data = UUID(0xfe927475, 0xdd59, 0x4339, 0xa5, 0x86,
1615 0x79, 0xba, 0xb1, 0x13, 0xb7, 0x74),
1616 };
1617
1618 #define CXL_GMER_VALID_CHANNEL BIT(0)
1619 #define CXL_GMER_VALID_RANK BIT(1)
1620 #define CXL_GMER_VALID_DEVICE BIT(2)
1621 #define CXL_GMER_VALID_COMPONENT BIT(3)
1622
ct3d_qmp_cxl_event_log_enc(CxlEventLog log)1623 static int ct3d_qmp_cxl_event_log_enc(CxlEventLog log)
1624 {
1625 switch (log) {
1626 case CXL_EVENT_LOG_INFORMATIONAL:
1627 return CXL_EVENT_TYPE_INFO;
1628 case CXL_EVENT_LOG_WARNING:
1629 return CXL_EVENT_TYPE_WARN;
1630 case CXL_EVENT_LOG_FAILURE:
1631 return CXL_EVENT_TYPE_FAIL;
1632 case CXL_EVENT_LOG_FATAL:
1633 return CXL_EVENT_TYPE_FATAL;
1634 default:
1635 return -EINVAL;
1636 }
1637 }
1638 /* Component ID is device specific. Define this as a string. */
qmp_cxl_inject_general_media_event(const char * path,CxlEventLog log,uint8_t flags,uint64_t dpa,uint8_t descriptor,uint8_t type,uint8_t transaction_type,bool has_channel,uint8_t channel,bool has_rank,uint8_t rank,bool has_device,uint32_t device,const char * component_id,Error ** errp)1639 void qmp_cxl_inject_general_media_event(const char *path, CxlEventLog log,
1640 uint8_t flags, uint64_t dpa,
1641 uint8_t descriptor, uint8_t type,
1642 uint8_t transaction_type,
1643 bool has_channel, uint8_t channel,
1644 bool has_rank, uint8_t rank,
1645 bool has_device, uint32_t device,
1646 const char *component_id,
1647 Error **errp)
1648 {
1649 Object *obj = object_resolve_path(path, NULL);
1650 CXLEventGenMedia gem;
1651 CXLEventRecordHdr *hdr = &gem.hdr;
1652 CXLDeviceState *cxlds;
1653 CXLType3Dev *ct3d;
1654 uint16_t valid_flags = 0;
1655 uint8_t enc_log;
1656 int rc;
1657
1658 if (!obj) {
1659 error_setg(errp, "Unable to resolve path");
1660 return;
1661 }
1662 if (!object_dynamic_cast(obj, TYPE_CXL_TYPE3)) {
1663 error_setg(errp, "Path does not point to a CXL type 3 device");
1664 return;
1665 }
1666 ct3d = CXL_TYPE3(obj);
1667 cxlds = &ct3d->cxl_dstate;
1668
1669 rc = ct3d_qmp_cxl_event_log_enc(log);
1670 if (rc < 0) {
1671 error_setg(errp, "Unhandled error log type");
1672 return;
1673 }
1674 enc_log = rc;
1675
1676 memset(&gem, 0, sizeof(gem));
1677 cxl_assign_event_header(hdr, &gen_media_uuid, flags, sizeof(gem),
1678 cxl_device_get_timestamp(&ct3d->cxl_dstate));
1679
1680 stq_le_p(&gem.phys_addr, dpa);
1681 gem.descriptor = descriptor;
1682 gem.type = type;
1683 gem.transaction_type = transaction_type;
1684
1685 if (has_channel) {
1686 gem.channel = channel;
1687 valid_flags |= CXL_GMER_VALID_CHANNEL;
1688 }
1689
1690 if (has_rank) {
1691 gem.rank = rank;
1692 valid_flags |= CXL_GMER_VALID_RANK;
1693 }
1694
1695 if (has_device) {
1696 st24_le_p(gem.device, device);
1697 valid_flags |= CXL_GMER_VALID_DEVICE;
1698 }
1699
1700 if (component_id) {
1701 strncpy((char *)gem.component_id, component_id,
1702 sizeof(gem.component_id) - 1);
1703 valid_flags |= CXL_GMER_VALID_COMPONENT;
1704 }
1705
1706 stw_le_p(&gem.validity_flags, valid_flags);
1707
1708 if (cxl_event_insert(cxlds, enc_log, (CXLEventRecordRaw *)&gem)) {
1709 cxl_event_irq_assert(ct3d);
1710 }
1711 }
1712
1713 #define CXL_DRAM_VALID_CHANNEL BIT(0)
1714 #define CXL_DRAM_VALID_RANK BIT(1)
1715 #define CXL_DRAM_VALID_NIBBLE_MASK BIT(2)
1716 #define CXL_DRAM_VALID_BANK_GROUP BIT(3)
1717 #define CXL_DRAM_VALID_BANK BIT(4)
1718 #define CXL_DRAM_VALID_ROW BIT(5)
1719 #define CXL_DRAM_VALID_COLUMN BIT(6)
1720 #define CXL_DRAM_VALID_CORRECTION_MASK BIT(7)
1721
qmp_cxl_inject_dram_event(const char * path,CxlEventLog log,uint8_t flags,uint64_t dpa,uint8_t descriptor,uint8_t type,uint8_t transaction_type,bool has_channel,uint8_t channel,bool has_rank,uint8_t rank,bool has_nibble_mask,uint32_t nibble_mask,bool has_bank_group,uint8_t bank_group,bool has_bank,uint8_t bank,bool has_row,uint32_t row,bool has_column,uint16_t column,bool has_correction_mask,uint64List * correction_mask,Error ** errp)1722 void qmp_cxl_inject_dram_event(const char *path, CxlEventLog log, uint8_t flags,
1723 uint64_t dpa, uint8_t descriptor,
1724 uint8_t type, uint8_t transaction_type,
1725 bool has_channel, uint8_t channel,
1726 bool has_rank, uint8_t rank,
1727 bool has_nibble_mask, uint32_t nibble_mask,
1728 bool has_bank_group, uint8_t bank_group,
1729 bool has_bank, uint8_t bank,
1730 bool has_row, uint32_t row,
1731 bool has_column, uint16_t column,
1732 bool has_correction_mask,
1733 uint64List *correction_mask,
1734 Error **errp)
1735 {
1736 Object *obj = object_resolve_path(path, NULL);
1737 CXLEventDram dram;
1738 CXLEventRecordHdr *hdr = &dram.hdr;
1739 CXLDeviceState *cxlds;
1740 CXLType3Dev *ct3d;
1741 uint16_t valid_flags = 0;
1742 uint8_t enc_log;
1743 int rc;
1744
1745 if (!obj) {
1746 error_setg(errp, "Unable to resolve path");
1747 return;
1748 }
1749 if (!object_dynamic_cast(obj, TYPE_CXL_TYPE3)) {
1750 error_setg(errp, "Path does not point to a CXL type 3 device");
1751 return;
1752 }
1753 ct3d = CXL_TYPE3(obj);
1754 cxlds = &ct3d->cxl_dstate;
1755
1756 rc = ct3d_qmp_cxl_event_log_enc(log);
1757 if (rc < 0) {
1758 error_setg(errp, "Unhandled error log type");
1759 return;
1760 }
1761 enc_log = rc;
1762
1763 memset(&dram, 0, sizeof(dram));
1764 cxl_assign_event_header(hdr, &dram_uuid, flags, sizeof(dram),
1765 cxl_device_get_timestamp(&ct3d->cxl_dstate));
1766 stq_le_p(&dram.phys_addr, dpa);
1767 dram.descriptor = descriptor;
1768 dram.type = type;
1769 dram.transaction_type = transaction_type;
1770
1771 if (has_channel) {
1772 dram.channel = channel;
1773 valid_flags |= CXL_DRAM_VALID_CHANNEL;
1774 }
1775
1776 if (has_rank) {
1777 dram.rank = rank;
1778 valid_flags |= CXL_DRAM_VALID_RANK;
1779 }
1780
1781 if (has_nibble_mask) {
1782 st24_le_p(dram.nibble_mask, nibble_mask);
1783 valid_flags |= CXL_DRAM_VALID_NIBBLE_MASK;
1784 }
1785
1786 if (has_bank_group) {
1787 dram.bank_group = bank_group;
1788 valid_flags |= CXL_DRAM_VALID_BANK_GROUP;
1789 }
1790
1791 if (has_bank) {
1792 dram.bank = bank;
1793 valid_flags |= CXL_DRAM_VALID_BANK;
1794 }
1795
1796 if (has_row) {
1797 st24_le_p(dram.row, row);
1798 valid_flags |= CXL_DRAM_VALID_ROW;
1799 }
1800
1801 if (has_column) {
1802 stw_le_p(&dram.column, column);
1803 valid_flags |= CXL_DRAM_VALID_COLUMN;
1804 }
1805
1806 if (has_correction_mask) {
1807 int count = 0;
1808 while (correction_mask && count < 4) {
1809 stq_le_p(&dram.correction_mask[count],
1810 correction_mask->value);
1811 count++;
1812 correction_mask = correction_mask->next;
1813 }
1814 valid_flags |= CXL_DRAM_VALID_CORRECTION_MASK;
1815 }
1816
1817 stw_le_p(&dram.validity_flags, valid_flags);
1818
1819 if (cxl_event_insert(cxlds, enc_log, (CXLEventRecordRaw *)&dram)) {
1820 cxl_event_irq_assert(ct3d);
1821 }
1822 }
1823
qmp_cxl_inject_memory_module_event(const char * path,CxlEventLog log,uint8_t flags,uint8_t type,uint8_t health_status,uint8_t media_status,uint8_t additional_status,uint8_t life_used,int16_t temperature,uint32_t dirty_shutdown_count,uint32_t corrected_volatile_error_count,uint32_t corrected_persist_error_count,Error ** errp)1824 void qmp_cxl_inject_memory_module_event(const char *path, CxlEventLog log,
1825 uint8_t flags, uint8_t type,
1826 uint8_t health_status,
1827 uint8_t media_status,
1828 uint8_t additional_status,
1829 uint8_t life_used,
1830 int16_t temperature,
1831 uint32_t dirty_shutdown_count,
1832 uint32_t corrected_volatile_error_count,
1833 uint32_t corrected_persist_error_count,
1834 Error **errp)
1835 {
1836 Object *obj = object_resolve_path(path, NULL);
1837 CXLEventMemoryModule module;
1838 CXLEventRecordHdr *hdr = &module.hdr;
1839 CXLDeviceState *cxlds;
1840 CXLType3Dev *ct3d;
1841 uint8_t enc_log;
1842 int rc;
1843
1844 if (!obj) {
1845 error_setg(errp, "Unable to resolve path");
1846 return;
1847 }
1848 if (!object_dynamic_cast(obj, TYPE_CXL_TYPE3)) {
1849 error_setg(errp, "Path does not point to a CXL type 3 device");
1850 return;
1851 }
1852 ct3d = CXL_TYPE3(obj);
1853 cxlds = &ct3d->cxl_dstate;
1854
1855 rc = ct3d_qmp_cxl_event_log_enc(log);
1856 if (rc < 0) {
1857 error_setg(errp, "Unhandled error log type");
1858 return;
1859 }
1860 enc_log = rc;
1861
1862 memset(&module, 0, sizeof(module));
1863 cxl_assign_event_header(hdr, &memory_module_uuid, flags, sizeof(module),
1864 cxl_device_get_timestamp(&ct3d->cxl_dstate));
1865
1866 module.type = type;
1867 module.health_status = health_status;
1868 module.media_status = media_status;
1869 module.additional_status = additional_status;
1870 module.life_used = life_used;
1871 stw_le_p(&module.temperature, temperature);
1872 stl_le_p(&module.dirty_shutdown_count, dirty_shutdown_count);
1873 stl_le_p(&module.corrected_volatile_error_count,
1874 corrected_volatile_error_count);
1875 stl_le_p(&module.corrected_persistent_error_count,
1876 corrected_persist_error_count);
1877
1878 if (cxl_event_insert(cxlds, enc_log, (CXLEventRecordRaw *)&module)) {
1879 cxl_event_irq_assert(ct3d);
1880 }
1881 }
1882
1883 /*
1884 * Check whether the range [dpa, dpa + len - 1] has overlaps with extents in
1885 * the list.
1886 * Return value: return true if has overlaps; otherwise, return false
1887 */
cxl_extents_overlaps_dpa_range(CXLDCExtentList * list,uint64_t dpa,uint64_t len)1888 bool cxl_extents_overlaps_dpa_range(CXLDCExtentList *list,
1889 uint64_t dpa, uint64_t len)
1890 {
1891 CXLDCExtent *ent;
1892 Range range1, range2;
1893
1894 if (!list) {
1895 return false;
1896 }
1897
1898 range_init_nofail(&range1, dpa, len);
1899 QTAILQ_FOREACH(ent, list, node) {
1900 range_init_nofail(&range2, ent->start_dpa, ent->len);
1901 if (range_overlaps_range(&range1, &range2)) {
1902 return true;
1903 }
1904 }
1905 return false;
1906 }
1907
1908 /*
1909 * Check whether the range [dpa, dpa + len - 1] is contained by extents in
1910 * the list.
1911 * Will check multiple extents containment once superset release is added.
1912 * Return value: return true if range is contained; otherwise, return false
1913 */
cxl_extents_contains_dpa_range(CXLDCExtentList * list,uint64_t dpa,uint64_t len)1914 bool cxl_extents_contains_dpa_range(CXLDCExtentList *list,
1915 uint64_t dpa, uint64_t len)
1916 {
1917 CXLDCExtent *ent;
1918 Range range1, range2;
1919
1920 if (!list) {
1921 return false;
1922 }
1923
1924 range_init_nofail(&range1, dpa, len);
1925 QTAILQ_FOREACH(ent, list, node) {
1926 range_init_nofail(&range2, ent->start_dpa, ent->len);
1927 if (range_contains_range(&range2, &range1)) {
1928 return true;
1929 }
1930 }
1931 return false;
1932 }
1933
cxl_extent_groups_overlaps_dpa_range(CXLDCExtentGroupList * list,uint64_t dpa,uint64_t len)1934 bool cxl_extent_groups_overlaps_dpa_range(CXLDCExtentGroupList *list,
1935 uint64_t dpa, uint64_t len)
1936 {
1937 CXLDCExtentGroup *group;
1938
1939 if (!list) {
1940 return false;
1941 }
1942
1943 QTAILQ_FOREACH(group, list, node) {
1944 if (cxl_extents_overlaps_dpa_range(&group->list, dpa, len)) {
1945 return true;
1946 }
1947 }
1948 return false;
1949 }
1950
1951 /*
1952 * The main function to process dynamic capacity event with extent list.
1953 * Currently DC extents add/release requests are processed.
1954 */
qmp_cxl_process_dynamic_capacity_prescriptive(const char * path,uint16_t hid,CXLDCEventType type,uint8_t rid,CxlDynamicCapacityExtentList * records,Error ** errp)1955 static void qmp_cxl_process_dynamic_capacity_prescriptive(const char *path,
1956 uint16_t hid, CXLDCEventType type, uint8_t rid,
1957 CxlDynamicCapacityExtentList *records, Error **errp)
1958 {
1959 Object *obj;
1960 CXLType3Dev *dcd;
1961 uint32_t num_extents = 0;
1962 CxlDynamicCapacityExtentList *list;
1963 CXLDCExtentGroup *group = NULL;
1964 g_autofree CXLDCExtentRaw *extents = NULL;
1965 uint64_t dpa, offset, len, block_size;
1966 g_autofree unsigned long *blk_bitmap = NULL;
1967 int i;
1968
1969 obj = object_resolve_path_type(path, TYPE_CXL_TYPE3, NULL);
1970 if (!obj) {
1971 error_setg(errp, "Unable to resolve CXL type 3 device");
1972 return;
1973 }
1974
1975 dcd = CXL_TYPE3(obj);
1976 if (!dcd->dc.num_regions) {
1977 error_setg(errp, "No dynamic capacity support from the device");
1978 return;
1979 }
1980
1981
1982 if (rid >= dcd->dc.num_regions) {
1983 error_setg(errp, "region id is too large");
1984 return;
1985 }
1986 block_size = dcd->dc.regions[rid].block_size;
1987 blk_bitmap = bitmap_new(dcd->dc.regions[rid].len / block_size);
1988
1989 /* Sanity check and count the extents */
1990 list = records;
1991 while (list) {
1992 offset = list->value->offset;
1993 len = list->value->len;
1994 dpa = offset + dcd->dc.regions[rid].base;
1995
1996 if (len == 0) {
1997 error_setg(errp, "extent with 0 length is not allowed");
1998 return;
1999 }
2000
2001 if (offset % block_size || len % block_size) {
2002 error_setg(errp, "dpa or len is not aligned to region block size");
2003 return;
2004 }
2005
2006 if (offset + len > dcd->dc.regions[rid].len) {
2007 error_setg(errp, "extent range is beyond the region end");
2008 return;
2009 }
2010
2011 /* No duplicate or overlapped extents are allowed */
2012 if (test_any_bits_set(blk_bitmap, offset / block_size,
2013 len / block_size)) {
2014 error_setg(errp, "duplicate or overlapped extents are detected");
2015 return;
2016 }
2017 bitmap_set(blk_bitmap, offset / block_size, len / block_size);
2018
2019 if (type == DC_EVENT_RELEASE_CAPACITY) {
2020 if (cxl_extent_groups_overlaps_dpa_range(&dcd->dc.extents_pending,
2021 dpa, len)) {
2022 error_setg(errp,
2023 "cannot release extent with pending DPA range");
2024 return;
2025 }
2026 if (!ct3_test_region_block_backed(dcd, dpa, len)) {
2027 error_setg(errp,
2028 "cannot release extent with non-existing DPA range");
2029 return;
2030 }
2031 } else if (type == DC_EVENT_ADD_CAPACITY) {
2032 if (cxl_extents_overlaps_dpa_range(&dcd->dc.extents, dpa, len)) {
2033 error_setg(errp,
2034 "cannot add DPA already accessible to the same LD");
2035 return;
2036 }
2037 if (cxl_extent_groups_overlaps_dpa_range(&dcd->dc.extents_pending,
2038 dpa, len)) {
2039 error_setg(errp,
2040 "cannot add DPA again while still pending");
2041 return;
2042 }
2043 }
2044 list = list->next;
2045 num_extents++;
2046 }
2047
2048 /* Create extent list for event being passed to host */
2049 i = 0;
2050 list = records;
2051 extents = g_new0(CXLDCExtentRaw, num_extents);
2052 while (list) {
2053 offset = list->value->offset;
2054 len = list->value->len;
2055 dpa = dcd->dc.regions[rid].base + offset;
2056
2057 extents[i].start_dpa = dpa;
2058 extents[i].len = len;
2059 memset(extents[i].tag, 0, 0x10);
2060 extents[i].shared_seq = 0;
2061 if (type == DC_EVENT_ADD_CAPACITY) {
2062 group = cxl_insert_extent_to_extent_group(group,
2063 extents[i].start_dpa,
2064 extents[i].len,
2065 extents[i].tag,
2066 extents[i].shared_seq);
2067 }
2068
2069 list = list->next;
2070 i++;
2071 }
2072 if (group) {
2073 cxl_extent_group_list_insert_tail(&dcd->dc.extents_pending, group);
2074 dcd->dc.total_extent_count += num_extents;
2075 }
2076
2077 cxl_create_dc_event_records_for_extents(dcd, type, extents, num_extents);
2078 }
2079
qmp_cxl_add_dynamic_capacity(const char * path,uint16_t host_id,CxlExtentSelectionPolicy sel_policy,uint8_t region,const char * tag,CxlDynamicCapacityExtentList * extents,Error ** errp)2080 void qmp_cxl_add_dynamic_capacity(const char *path, uint16_t host_id,
2081 CxlExtentSelectionPolicy sel_policy,
2082 uint8_t region, const char *tag,
2083 CxlDynamicCapacityExtentList *extents,
2084 Error **errp)
2085 {
2086 switch (sel_policy) {
2087 case CXL_EXTENT_SELECTION_POLICY_PRESCRIPTIVE:
2088 qmp_cxl_process_dynamic_capacity_prescriptive(path, host_id,
2089 DC_EVENT_ADD_CAPACITY,
2090 region, extents, errp);
2091 return;
2092 default:
2093 error_setg(errp, "Selection policy not supported");
2094 return;
2095 }
2096 }
2097
qmp_cxl_release_dynamic_capacity(const char * path,uint16_t host_id,CxlExtentRemovalPolicy removal_policy,bool has_forced_removal,bool forced_removal,bool has_sanitize_on_release,bool sanitize_on_release,uint8_t region,const char * tag,CxlDynamicCapacityExtentList * extents,Error ** errp)2098 void qmp_cxl_release_dynamic_capacity(const char *path, uint16_t host_id,
2099 CxlExtentRemovalPolicy removal_policy,
2100 bool has_forced_removal,
2101 bool forced_removal,
2102 bool has_sanitize_on_release,
2103 bool sanitize_on_release,
2104 uint8_t region,
2105 const char *tag,
2106 CxlDynamicCapacityExtentList *extents,
2107 Error **errp)
2108 {
2109 CXLDCEventType type = DC_EVENT_RELEASE_CAPACITY;
2110
2111 if (has_forced_removal && forced_removal) {
2112 /* TODO: enable forced removal in the future */
2113 type = DC_EVENT_FORCED_RELEASE_CAPACITY;
2114 error_setg(errp, "Forced removal not supported yet");
2115 return;
2116 }
2117
2118 switch (removal_policy) {
2119 case CXL_EXTENT_REMOVAL_POLICY_PRESCRIPTIVE:
2120 qmp_cxl_process_dynamic_capacity_prescriptive(path, host_id, type,
2121 region, extents, errp);
2122 return;
2123 default:
2124 error_setg(errp, "Removal policy not supported");
2125 return;
2126 }
2127 }
2128
ct3_class_init(ObjectClass * oc,const void * data)2129 static void ct3_class_init(ObjectClass *oc, const void *data)
2130 {
2131 DeviceClass *dc = DEVICE_CLASS(oc);
2132 PCIDeviceClass *pc = PCI_DEVICE_CLASS(oc);
2133 CXLType3Class *cvc = CXL_TYPE3_CLASS(oc);
2134
2135 pc->realize = ct3_realize;
2136 pc->exit = ct3_exit;
2137 pc->class_id = PCI_CLASS_MEMORY_CXL;
2138 pc->vendor_id = PCI_VENDOR_ID_INTEL;
2139 pc->device_id = 0xd93; /* LVF for now */
2140 pc->revision = 1;
2141
2142 pc->config_write = ct3d_config_write;
2143 pc->config_read = ct3d_config_read;
2144
2145 set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
2146 dc->desc = "CXL Memory Device (Type 3)";
2147 device_class_set_legacy_reset(dc, ct3d_reset);
2148 device_class_set_props(dc, ct3_props);
2149
2150 cvc->get_lsa_size = get_lsa_size;
2151 cvc->get_lsa = get_lsa;
2152 cvc->set_lsa = set_lsa;
2153 cvc->set_cacheline = set_cacheline;
2154 }
2155
2156 static const TypeInfo ct3d_info = {
2157 .name = TYPE_CXL_TYPE3,
2158 .parent = TYPE_PCI_DEVICE,
2159 .class_size = sizeof(struct CXLType3Class),
2160 .class_init = ct3_class_init,
2161 .instance_size = sizeof(CXLType3Dev),
2162 .interfaces = (const InterfaceInfo[]) {
2163 { INTERFACE_CXL_DEVICE },
2164 { INTERFACE_PCIE_DEVICE },
2165 {}
2166 },
2167 };
2168
ct3d_registers(void)2169 static void ct3d_registers(void)
2170 {
2171 type_register_static(&ct3d_info);
2172 }
2173
2174 type_init(ct3d_registers);
2175