xref: /openbmc/qemu/hw/nvme/dif.c (revision 1d76437b45ab9982307b95d325d627f7b6f06088)
1 /*
2  * QEMU NVM Express End-to-End Data Protection support
3  *
4  * Copyright (c) 2021 Samsung Electronics Co., Ltd.
5  *
6  * Authors:
7  *   Klaus Jensen           <k.jensen@samsung.com>
8  *   Gollu Appalanaidu      <anaidu.gollu@samsung.com>
9  */
10 
11 #include "qemu/osdep.h"
12 #include "qapi/error.h"
13 #include "sysemu/block-backend.h"
14 
15 #include "nvme.h"
16 #include "trace.h"
17 
18 uint16_t nvme_check_prinfo(NvmeNamespace *ns, uint8_t prinfo, uint64_t slba,
19                            uint32_t reftag)
20 {
21     if ((NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) == NVME_ID_NS_DPS_TYPE_1) &&
22         (prinfo & NVME_PRINFO_PRCHK_REF) && (slba & 0xffffffff) != reftag) {
23         return NVME_INVALID_PROT_INFO | NVME_DNR;
24     }
25 
26     return NVME_SUCCESS;
27 }
28 
29 /* from Linux kernel (crypto/crct10dif_common.c) */
30 static uint16_t crc_t10dif(uint16_t crc, const unsigned char *buffer,
31                            size_t len)
32 {
33     unsigned int i;
34 
35     for (i = 0; i < len; i++) {
36         crc = (crc << 8) ^ t10_dif_crc_table[((crc >> 8) ^ buffer[i]) & 0xff];
37     }
38 
39     return crc;
40 }
41 
42 void nvme_dif_pract_generate_dif(NvmeNamespace *ns, uint8_t *buf, size_t len,
43                                  uint8_t *mbuf, size_t mlen, uint16_t apptag,
44                                  uint32_t *reftag)
45 {
46     uint8_t *end = buf + len;
47     int16_t pil = 0;
48 
49     if (!(ns->id_ns.dps & NVME_ID_NS_DPS_FIRST_EIGHT)) {
50         pil = ns->lbaf.ms - sizeof(NvmeDifTuple);
51     }
52 
53     trace_pci_nvme_dif_pract_generate_dif(len, ns->lbasz, ns->lbasz + pil,
54                                           apptag, *reftag);
55 
56     for (; buf < end; buf += ns->lbasz, mbuf += ns->lbaf.ms) {
57         NvmeDifTuple *dif = (NvmeDifTuple *)(mbuf + pil);
58         uint16_t crc = crc_t10dif(0x0, buf, ns->lbasz);
59 
60         if (pil) {
61             crc = crc_t10dif(crc, mbuf, pil);
62         }
63 
64         dif->guard = cpu_to_be16(crc);
65         dif->apptag = cpu_to_be16(apptag);
66         dif->reftag = cpu_to_be32(*reftag);
67 
68         if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) != NVME_ID_NS_DPS_TYPE_3) {
69             (*reftag)++;
70         }
71     }
72 }
73 
74 static uint16_t nvme_dif_prchk(NvmeNamespace *ns, NvmeDifTuple *dif,
75                                uint8_t *buf, uint8_t *mbuf, size_t pil,
76                                uint8_t prinfo, uint16_t apptag,
77                                uint16_t appmask, uint32_t reftag)
78 {
79     switch (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) {
80     case NVME_ID_NS_DPS_TYPE_3:
81         if (be32_to_cpu(dif->reftag) != 0xffffffff) {
82             break;
83         }
84 
85         /* fallthrough */
86     case NVME_ID_NS_DPS_TYPE_1:
87     case NVME_ID_NS_DPS_TYPE_2:
88         if (be16_to_cpu(dif->apptag) != 0xffff) {
89             break;
90         }
91 
92         trace_pci_nvme_dif_prchk_disabled(be16_to_cpu(dif->apptag),
93                                           be32_to_cpu(dif->reftag));
94 
95         return NVME_SUCCESS;
96     }
97 
98     if (prinfo & NVME_PRINFO_PRCHK_GUARD) {
99         uint16_t crc = crc_t10dif(0x0, buf, ns->lbasz);
100 
101         if (pil) {
102             crc = crc_t10dif(crc, mbuf, pil);
103         }
104 
105         trace_pci_nvme_dif_prchk_guard(be16_to_cpu(dif->guard), crc);
106 
107         if (be16_to_cpu(dif->guard) != crc) {
108             return NVME_E2E_GUARD_ERROR;
109         }
110     }
111 
112     if (prinfo & NVME_PRINFO_PRCHK_APP) {
113         trace_pci_nvme_dif_prchk_apptag(be16_to_cpu(dif->apptag), apptag,
114                                         appmask);
115 
116         if ((be16_to_cpu(dif->apptag) & appmask) != (apptag & appmask)) {
117             return NVME_E2E_APP_ERROR;
118         }
119     }
120 
121     if (prinfo & NVME_PRINFO_PRCHK_REF) {
122         trace_pci_nvme_dif_prchk_reftag(be32_to_cpu(dif->reftag), reftag);
123 
124         if (be32_to_cpu(dif->reftag) != reftag) {
125             return NVME_E2E_REF_ERROR;
126         }
127     }
128 
129     return NVME_SUCCESS;
130 }
131 
132 uint16_t nvme_dif_check(NvmeNamespace *ns, uint8_t *buf, size_t len,
133                         uint8_t *mbuf, size_t mlen, uint8_t prinfo,
134                         uint64_t slba, uint16_t apptag,
135                         uint16_t appmask, uint32_t *reftag)
136 {
137     uint8_t *end = buf + len;
138     int16_t pil = 0;
139     uint16_t status;
140 
141     status = nvme_check_prinfo(ns, prinfo, slba, *reftag);
142     if (status) {
143         return status;
144     }
145 
146     if (!(ns->id_ns.dps & NVME_ID_NS_DPS_FIRST_EIGHT)) {
147         pil = ns->lbaf.ms - sizeof(NvmeDifTuple);
148     }
149 
150     trace_pci_nvme_dif_check(prinfo, ns->lbasz + pil);
151 
152     for (; buf < end; buf += ns->lbasz, mbuf += ns->lbaf.ms) {
153         NvmeDifTuple *dif = (NvmeDifTuple *)(mbuf + pil);
154 
155         status = nvme_dif_prchk(ns, dif, buf, mbuf, pil, prinfo, apptag,
156                                 appmask, *reftag);
157         if (status) {
158             return status;
159         }
160 
161         if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) != NVME_ID_NS_DPS_TYPE_3) {
162             (*reftag)++;
163         }
164     }
165 
166     return NVME_SUCCESS;
167 }
168 
169 uint16_t nvme_dif_mangle_mdata(NvmeNamespace *ns, uint8_t *mbuf, size_t mlen,
170                                uint64_t slba)
171 {
172     BlockBackend *blk = ns->blkconf.blk;
173     BlockDriverState *bs = blk_bs(blk);
174 
175     int64_t moffset = 0, offset = nvme_l2b(ns, slba);
176     uint8_t *mbufp, *end;
177     bool zeroed;
178     int16_t pil = 0;
179     int64_t bytes = (mlen / ns->lbaf.ms) << ns->lbaf.ds;
180     int64_t pnum = 0;
181 
182     Error *err = NULL;
183 
184 
185     if (!(ns->id_ns.dps & NVME_ID_NS_DPS_FIRST_EIGHT)) {
186         pil = ns->lbaf.ms - sizeof(NvmeDifTuple);
187     }
188 
189     do {
190         int ret;
191 
192         bytes -= pnum;
193 
194         ret = bdrv_block_status(bs, offset, bytes, &pnum, NULL, NULL);
195         if (ret < 0) {
196             error_setg_errno(&err, -ret, "unable to get block status");
197             error_report_err(err);
198 
199             return NVME_INTERNAL_DEV_ERROR;
200         }
201 
202         zeroed = !!(ret & BDRV_BLOCK_ZERO);
203 
204         trace_pci_nvme_block_status(offset, bytes, pnum, ret, zeroed);
205 
206         if (zeroed) {
207             mbufp = mbuf + moffset;
208             mlen = (pnum >> ns->lbaf.ds) * ns->lbaf.ms;
209             end = mbufp + mlen;
210 
211             for (; mbufp < end; mbufp += ns->lbaf.ms) {
212                 memset(mbufp + pil, 0xff, sizeof(NvmeDifTuple));
213             }
214         }
215 
216         moffset += (pnum >> ns->lbaf.ds) * ns->lbaf.ms;
217         offset += pnum;
218     } while (pnum != bytes);
219 
220     return NVME_SUCCESS;
221 }
222 
223 static void nvme_dif_rw_cb(void *opaque, int ret)
224 {
225     NvmeBounceContext *ctx = opaque;
226     NvmeRequest *req = ctx->req;
227     NvmeNamespace *ns = req->ns;
228     BlockBackend *blk = ns->blkconf.blk;
229 
230     trace_pci_nvme_dif_rw_cb(nvme_cid(req), blk_name(blk));
231 
232     qemu_iovec_destroy(&ctx->data.iov);
233     g_free(ctx->data.bounce);
234 
235     qemu_iovec_destroy(&ctx->mdata.iov);
236     g_free(ctx->mdata.bounce);
237 
238     g_free(ctx);
239 
240     nvme_rw_complete_cb(req, ret);
241 }
242 
243 static void nvme_dif_rw_check_cb(void *opaque, int ret)
244 {
245     NvmeBounceContext *ctx = opaque;
246     NvmeRequest *req = ctx->req;
247     NvmeNamespace *ns = req->ns;
248     NvmeCtrl *n = nvme_ctrl(req);
249     NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
250     uint64_t slba = le64_to_cpu(rw->slba);
251     uint8_t prinfo = NVME_RW_PRINFO(le16_to_cpu(rw->control));
252     uint16_t apptag = le16_to_cpu(rw->apptag);
253     uint16_t appmask = le16_to_cpu(rw->appmask);
254     uint32_t reftag = le32_to_cpu(rw->reftag);
255     uint16_t status;
256 
257     trace_pci_nvme_dif_rw_check_cb(nvme_cid(req), prinfo, apptag, appmask,
258                                    reftag);
259 
260     if (ret) {
261         goto out;
262     }
263 
264     status = nvme_dif_mangle_mdata(ns, ctx->mdata.bounce, ctx->mdata.iov.size,
265                                    slba);
266     if (status) {
267         req->status = status;
268         goto out;
269     }
270 
271     status = nvme_dif_check(ns, ctx->data.bounce, ctx->data.iov.size,
272                             ctx->mdata.bounce, ctx->mdata.iov.size, prinfo,
273                             slba, apptag, appmask, &reftag);
274     if (status) {
275         req->status = status;
276         goto out;
277     }
278 
279     status = nvme_bounce_data(n, ctx->data.bounce, ctx->data.iov.size,
280                               NVME_TX_DIRECTION_FROM_DEVICE, req);
281     if (status) {
282         req->status = status;
283         goto out;
284     }
285 
286     if (prinfo & NVME_PRINFO_PRACT && ns->lbaf.ms == 8) {
287         goto out;
288     }
289 
290     status = nvme_bounce_mdata(n, ctx->mdata.bounce, ctx->mdata.iov.size,
291                                NVME_TX_DIRECTION_FROM_DEVICE, req);
292     if (status) {
293         req->status = status;
294     }
295 
296 out:
297     nvme_dif_rw_cb(ctx, ret);
298 }
299 
300 static void nvme_dif_rw_mdata_in_cb(void *opaque, int ret)
301 {
302     NvmeBounceContext *ctx = opaque;
303     NvmeRequest *req = ctx->req;
304     NvmeNamespace *ns = req->ns;
305     NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
306     uint64_t slba = le64_to_cpu(rw->slba);
307     uint32_t nlb = le16_to_cpu(rw->nlb) + 1;
308     size_t mlen = nvme_m2b(ns, nlb);
309     uint64_t offset = nvme_moff(ns, slba);
310     BlockBackend *blk = ns->blkconf.blk;
311 
312     trace_pci_nvme_dif_rw_mdata_in_cb(nvme_cid(req), blk_name(blk));
313 
314     if (ret) {
315         goto out;
316     }
317 
318     ctx->mdata.bounce = g_malloc(mlen);
319 
320     qemu_iovec_reset(&ctx->mdata.iov);
321     qemu_iovec_add(&ctx->mdata.iov, ctx->mdata.bounce, mlen);
322 
323     req->aiocb = blk_aio_preadv(blk, offset, &ctx->mdata.iov, 0,
324                                 nvme_dif_rw_check_cb, ctx);
325     return;
326 
327 out:
328     nvme_dif_rw_cb(ctx, ret);
329 }
330 
331 static void nvme_dif_rw_mdata_out_cb(void *opaque, int ret)
332 {
333     NvmeBounceContext *ctx = opaque;
334     NvmeRequest *req = ctx->req;
335     NvmeNamespace *ns = req->ns;
336     NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
337     uint64_t slba = le64_to_cpu(rw->slba);
338     uint64_t offset = nvme_moff(ns, slba);
339     BlockBackend *blk = ns->blkconf.blk;
340 
341     trace_pci_nvme_dif_rw_mdata_out_cb(nvme_cid(req), blk_name(blk));
342 
343     if (ret) {
344         goto out;
345     }
346 
347     req->aiocb = blk_aio_pwritev(blk, offset, &ctx->mdata.iov, 0,
348                                  nvme_dif_rw_cb, ctx);
349     return;
350 
351 out:
352     nvme_dif_rw_cb(ctx, ret);
353 }
354 
355 uint16_t nvme_dif_rw(NvmeCtrl *n, NvmeRequest *req)
356 {
357     NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
358     NvmeNamespace *ns = req->ns;
359     BlockBackend *blk = ns->blkconf.blk;
360     bool wrz = rw->opcode == NVME_CMD_WRITE_ZEROES;
361     uint32_t nlb = le16_to_cpu(rw->nlb) + 1;
362     uint64_t slba = le64_to_cpu(rw->slba);
363     size_t len = nvme_l2b(ns, nlb);
364     size_t mlen = nvme_m2b(ns, nlb);
365     size_t mapped_len = len;
366     int64_t offset = nvme_l2b(ns, slba);
367     uint8_t prinfo = NVME_RW_PRINFO(le16_to_cpu(rw->control));
368     uint16_t apptag = le16_to_cpu(rw->apptag);
369     uint16_t appmask = le16_to_cpu(rw->appmask);
370     uint32_t reftag = le32_to_cpu(rw->reftag);
371     bool pract = !!(prinfo & NVME_PRINFO_PRACT);
372     NvmeBounceContext *ctx;
373     uint16_t status;
374 
375     trace_pci_nvme_dif_rw(pract, prinfo);
376 
377     ctx = g_new0(NvmeBounceContext, 1);
378     ctx->req = req;
379 
380     if (wrz) {
381         BdrvRequestFlags flags = BDRV_REQ_MAY_UNMAP;
382 
383         if (prinfo & NVME_PRINFO_PRCHK_MASK) {
384             status = NVME_INVALID_PROT_INFO | NVME_DNR;
385             goto err;
386         }
387 
388         if (pract) {
389             uint8_t *mbuf, *end;
390             int16_t pil = ns->lbaf.ms - sizeof(NvmeDifTuple);
391 
392             status = nvme_check_prinfo(ns, prinfo, slba, reftag);
393             if (status) {
394                 goto err;
395             }
396 
397             flags = 0;
398 
399             ctx->mdata.bounce = g_malloc0(mlen);
400 
401             qemu_iovec_init(&ctx->mdata.iov, 1);
402             qemu_iovec_add(&ctx->mdata.iov, ctx->mdata.bounce, mlen);
403 
404             mbuf = ctx->mdata.bounce;
405             end = mbuf + mlen;
406 
407             if (ns->id_ns.dps & NVME_ID_NS_DPS_FIRST_EIGHT) {
408                 pil = 0;
409             }
410 
411             for (; mbuf < end; mbuf += ns->lbaf.ms) {
412                 NvmeDifTuple *dif = (NvmeDifTuple *)(mbuf + pil);
413 
414                 dif->apptag = cpu_to_be16(apptag);
415                 dif->reftag = cpu_to_be32(reftag);
416 
417                 switch (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) {
418                 case NVME_ID_NS_DPS_TYPE_1:
419                 case NVME_ID_NS_DPS_TYPE_2:
420                     reftag++;
421                 }
422             }
423         }
424 
425         req->aiocb = blk_aio_pwrite_zeroes(blk, offset, len, flags,
426                                            nvme_dif_rw_mdata_out_cb, ctx);
427         return NVME_NO_COMPLETE;
428     }
429 
430     if (nvme_ns_ext(ns) && !(pract && ns->lbaf.ms == 8)) {
431         mapped_len += mlen;
432     }
433 
434     status = nvme_map_dptr(n, &req->sg, mapped_len, &req->cmd);
435     if (status) {
436         goto err;
437     }
438 
439     ctx->data.bounce = g_malloc(len);
440 
441     qemu_iovec_init(&ctx->data.iov, 1);
442     qemu_iovec_add(&ctx->data.iov, ctx->data.bounce, len);
443 
444     if (req->cmd.opcode == NVME_CMD_READ) {
445         block_acct_start(blk_get_stats(blk), &req->acct, ctx->data.iov.size,
446                          BLOCK_ACCT_READ);
447 
448         req->aiocb = blk_aio_preadv(ns->blkconf.blk, offset, &ctx->data.iov, 0,
449                                     nvme_dif_rw_mdata_in_cb, ctx);
450         return NVME_NO_COMPLETE;
451     }
452 
453     status = nvme_bounce_data(n, ctx->data.bounce, ctx->data.iov.size,
454                               NVME_TX_DIRECTION_TO_DEVICE, req);
455     if (status) {
456         goto err;
457     }
458 
459     ctx->mdata.bounce = g_malloc(mlen);
460 
461     qemu_iovec_init(&ctx->mdata.iov, 1);
462     qemu_iovec_add(&ctx->mdata.iov, ctx->mdata.bounce, mlen);
463 
464     if (!(pract && ns->lbaf.ms == 8)) {
465         status = nvme_bounce_mdata(n, ctx->mdata.bounce, ctx->mdata.iov.size,
466                                    NVME_TX_DIRECTION_TO_DEVICE, req);
467         if (status) {
468             goto err;
469         }
470     }
471 
472     status = nvme_check_prinfo(ns, prinfo, slba, reftag);
473     if (status) {
474         goto err;
475     }
476 
477     if (pract) {
478         /* splice generated protection information into the buffer */
479         nvme_dif_pract_generate_dif(ns, ctx->data.bounce, ctx->data.iov.size,
480                                     ctx->mdata.bounce, ctx->mdata.iov.size,
481                                     apptag, &reftag);
482     } else {
483         status = nvme_dif_check(ns, ctx->data.bounce, ctx->data.iov.size,
484                                 ctx->mdata.bounce, ctx->mdata.iov.size, prinfo,
485                                 slba, apptag, appmask, &reftag);
486         if (status) {
487             goto err;
488         }
489     }
490 
491     block_acct_start(blk_get_stats(blk), &req->acct, ctx->data.iov.size,
492                      BLOCK_ACCT_WRITE);
493 
494     req->aiocb = blk_aio_pwritev(ns->blkconf.blk, offset, &ctx->data.iov, 0,
495                                  nvme_dif_rw_mdata_out_cb, ctx);
496 
497     return NVME_NO_COMPLETE;
498 
499 err:
500     qemu_iovec_destroy(&ctx->data.iov);
501     g_free(ctx->data.bounce);
502 
503     qemu_iovec_destroy(&ctx->mdata.iov);
504     g_free(ctx->mdata.bounce);
505 
506     g_free(ctx);
507 
508     return status;
509 }
510