175411d23SStefan Hajnoczi /* 275411d23SStefan Hajnoczi * QEMU Enhanced Disk Format 375411d23SStefan Hajnoczi * 475411d23SStefan Hajnoczi * Copyright IBM, Corp. 2010 575411d23SStefan Hajnoczi * 675411d23SStefan Hajnoczi * Authors: 775411d23SStefan Hajnoczi * Stefan Hajnoczi <stefanha@linux.vnet.ibm.com> 875411d23SStefan Hajnoczi * Anthony Liguori <aliguori@us.ibm.com> 975411d23SStefan Hajnoczi * 1075411d23SStefan Hajnoczi * This work is licensed under the terms of the GNU LGPL, version 2 or later. 1175411d23SStefan Hajnoczi * See the COPYING.LIB file in the top-level directory. 1275411d23SStefan Hajnoczi * 1375411d23SStefan Hajnoczi */ 1475411d23SStefan Hajnoczi 1575411d23SStefan Hajnoczi #include "qed.h" 1675411d23SStefan Hajnoczi 1775411d23SStefan Hajnoczi static int bdrv_qed_probe(const uint8_t *buf, int buf_size, 1875411d23SStefan Hajnoczi const char *filename) 1975411d23SStefan Hajnoczi { 2075411d23SStefan Hajnoczi const QEDHeader *header = (const QEDHeader *)buf; 2175411d23SStefan Hajnoczi 2275411d23SStefan Hajnoczi if (buf_size < sizeof(*header)) { 2375411d23SStefan Hajnoczi return 0; 2475411d23SStefan Hajnoczi } 2575411d23SStefan Hajnoczi if (le32_to_cpu(header->magic) != QED_MAGIC) { 2675411d23SStefan Hajnoczi return 0; 2775411d23SStefan Hajnoczi } 2875411d23SStefan Hajnoczi return 100; 2975411d23SStefan Hajnoczi } 3075411d23SStefan Hajnoczi 3175411d23SStefan Hajnoczi /** 3275411d23SStefan Hajnoczi * Check whether an image format is raw 3375411d23SStefan Hajnoczi * 3475411d23SStefan Hajnoczi * @fmt: Backing file format, may be NULL 3575411d23SStefan Hajnoczi */ 3675411d23SStefan Hajnoczi static bool qed_fmt_is_raw(const char *fmt) 3775411d23SStefan Hajnoczi { 3875411d23SStefan Hajnoczi return fmt && strcmp(fmt, "raw") == 0; 3975411d23SStefan Hajnoczi } 4075411d23SStefan Hajnoczi 4175411d23SStefan Hajnoczi static void qed_header_le_to_cpu(const QEDHeader *le, QEDHeader *cpu) 4275411d23SStefan Hajnoczi { 4375411d23SStefan Hajnoczi cpu->magic = le32_to_cpu(le->magic); 4475411d23SStefan Hajnoczi cpu->cluster_size = le32_to_cpu(le->cluster_size); 4575411d23SStefan Hajnoczi cpu->table_size = le32_to_cpu(le->table_size); 4675411d23SStefan Hajnoczi cpu->header_size = le32_to_cpu(le->header_size); 4775411d23SStefan Hajnoczi cpu->features = le64_to_cpu(le->features); 4875411d23SStefan Hajnoczi cpu->compat_features = le64_to_cpu(le->compat_features); 4975411d23SStefan Hajnoczi cpu->autoclear_features = le64_to_cpu(le->autoclear_features); 5075411d23SStefan Hajnoczi cpu->l1_table_offset = le64_to_cpu(le->l1_table_offset); 5175411d23SStefan Hajnoczi cpu->image_size = le64_to_cpu(le->image_size); 5275411d23SStefan Hajnoczi cpu->backing_filename_offset = le32_to_cpu(le->backing_filename_offset); 5375411d23SStefan Hajnoczi cpu->backing_filename_size = le32_to_cpu(le->backing_filename_size); 5475411d23SStefan Hajnoczi } 5575411d23SStefan Hajnoczi 5675411d23SStefan Hajnoczi static void qed_header_cpu_to_le(const QEDHeader *cpu, QEDHeader *le) 5775411d23SStefan Hajnoczi { 5875411d23SStefan Hajnoczi le->magic = cpu_to_le32(cpu->magic); 5975411d23SStefan Hajnoczi le->cluster_size = cpu_to_le32(cpu->cluster_size); 6075411d23SStefan Hajnoczi le->table_size = cpu_to_le32(cpu->table_size); 6175411d23SStefan Hajnoczi le->header_size = cpu_to_le32(cpu->header_size); 6275411d23SStefan Hajnoczi le->features = cpu_to_le64(cpu->features); 6375411d23SStefan Hajnoczi le->compat_features = cpu_to_le64(cpu->compat_features); 6475411d23SStefan Hajnoczi le->autoclear_features = cpu_to_le64(cpu->autoclear_features); 6575411d23SStefan Hajnoczi le->l1_table_offset = cpu_to_le64(cpu->l1_table_offset); 6675411d23SStefan Hajnoczi le->image_size = cpu_to_le64(cpu->image_size); 6775411d23SStefan Hajnoczi le->backing_filename_offset = cpu_to_le32(cpu->backing_filename_offset); 6875411d23SStefan Hajnoczi le->backing_filename_size = cpu_to_le32(cpu->backing_filename_size); 6975411d23SStefan Hajnoczi } 7075411d23SStefan Hajnoczi 7175411d23SStefan Hajnoczi static int qed_write_header_sync(BDRVQEDState *s) 7275411d23SStefan Hajnoczi { 7375411d23SStefan Hajnoczi QEDHeader le; 7475411d23SStefan Hajnoczi int ret; 7575411d23SStefan Hajnoczi 7675411d23SStefan Hajnoczi qed_header_cpu_to_le(&s->header, &le); 7775411d23SStefan Hajnoczi ret = bdrv_pwrite(s->bs->file, 0, &le, sizeof(le)); 7875411d23SStefan Hajnoczi if (ret != sizeof(le)) { 7975411d23SStefan Hajnoczi return ret; 8075411d23SStefan Hajnoczi } 8175411d23SStefan Hajnoczi return 0; 8275411d23SStefan Hajnoczi } 8375411d23SStefan Hajnoczi 8475411d23SStefan Hajnoczi static uint64_t qed_max_image_size(uint32_t cluster_size, uint32_t table_size) 8575411d23SStefan Hajnoczi { 8675411d23SStefan Hajnoczi uint64_t table_entries; 8775411d23SStefan Hajnoczi uint64_t l2_size; 8875411d23SStefan Hajnoczi 8975411d23SStefan Hajnoczi table_entries = (table_size * cluster_size) / sizeof(uint64_t); 9075411d23SStefan Hajnoczi l2_size = table_entries * cluster_size; 9175411d23SStefan Hajnoczi 9275411d23SStefan Hajnoczi return l2_size * table_entries; 9375411d23SStefan Hajnoczi } 9475411d23SStefan Hajnoczi 9575411d23SStefan Hajnoczi static bool qed_is_cluster_size_valid(uint32_t cluster_size) 9675411d23SStefan Hajnoczi { 9775411d23SStefan Hajnoczi if (cluster_size < QED_MIN_CLUSTER_SIZE || 9875411d23SStefan Hajnoczi cluster_size > QED_MAX_CLUSTER_SIZE) { 9975411d23SStefan Hajnoczi return false; 10075411d23SStefan Hajnoczi } 10175411d23SStefan Hajnoczi if (cluster_size & (cluster_size - 1)) { 10275411d23SStefan Hajnoczi return false; /* not power of 2 */ 10375411d23SStefan Hajnoczi } 10475411d23SStefan Hajnoczi return true; 10575411d23SStefan Hajnoczi } 10675411d23SStefan Hajnoczi 10775411d23SStefan Hajnoczi static bool qed_is_table_size_valid(uint32_t table_size) 10875411d23SStefan Hajnoczi { 10975411d23SStefan Hajnoczi if (table_size < QED_MIN_TABLE_SIZE || 11075411d23SStefan Hajnoczi table_size > QED_MAX_TABLE_SIZE) { 11175411d23SStefan Hajnoczi return false; 11275411d23SStefan Hajnoczi } 11375411d23SStefan Hajnoczi if (table_size & (table_size - 1)) { 11475411d23SStefan Hajnoczi return false; /* not power of 2 */ 11575411d23SStefan Hajnoczi } 11675411d23SStefan Hajnoczi return true; 11775411d23SStefan Hajnoczi } 11875411d23SStefan Hajnoczi 11975411d23SStefan Hajnoczi static bool qed_is_image_size_valid(uint64_t image_size, uint32_t cluster_size, 12075411d23SStefan Hajnoczi uint32_t table_size) 12175411d23SStefan Hajnoczi { 12275411d23SStefan Hajnoczi if (image_size % BDRV_SECTOR_SIZE != 0) { 12375411d23SStefan Hajnoczi return false; /* not multiple of sector size */ 12475411d23SStefan Hajnoczi } 12575411d23SStefan Hajnoczi if (image_size > qed_max_image_size(cluster_size, table_size)) { 12675411d23SStefan Hajnoczi return false; /* image is too large */ 12775411d23SStefan Hajnoczi } 12875411d23SStefan Hajnoczi return true; 12975411d23SStefan Hajnoczi } 13075411d23SStefan Hajnoczi 13175411d23SStefan Hajnoczi /** 13275411d23SStefan Hajnoczi * Read a string of known length from the image file 13375411d23SStefan Hajnoczi * 13475411d23SStefan Hajnoczi * @file: Image file 13575411d23SStefan Hajnoczi * @offset: File offset to start of string, in bytes 13675411d23SStefan Hajnoczi * @n: String length in bytes 13775411d23SStefan Hajnoczi * @buf: Destination buffer 13875411d23SStefan Hajnoczi * @buflen: Destination buffer length in bytes 13975411d23SStefan Hajnoczi * @ret: 0 on success, -errno on failure 14075411d23SStefan Hajnoczi * 14175411d23SStefan Hajnoczi * The string is NUL-terminated. 14275411d23SStefan Hajnoczi */ 14375411d23SStefan Hajnoczi static int qed_read_string(BlockDriverState *file, uint64_t offset, size_t n, 14475411d23SStefan Hajnoczi char *buf, size_t buflen) 14575411d23SStefan Hajnoczi { 14675411d23SStefan Hajnoczi int ret; 14775411d23SStefan Hajnoczi if (n >= buflen) { 14875411d23SStefan Hajnoczi return -EINVAL; 14975411d23SStefan Hajnoczi } 15075411d23SStefan Hajnoczi ret = bdrv_pread(file, offset, buf, n); 15175411d23SStefan Hajnoczi if (ret < 0) { 15275411d23SStefan Hajnoczi return ret; 15375411d23SStefan Hajnoczi } 15475411d23SStefan Hajnoczi buf[n] = '\0'; 15575411d23SStefan Hajnoczi return 0; 15675411d23SStefan Hajnoczi } 15775411d23SStefan Hajnoczi 158*298800caSStefan Hajnoczi QEDTable *qed_alloc_table(BDRVQEDState *s) 159*298800caSStefan Hajnoczi { 160*298800caSStefan Hajnoczi /* Honor O_DIRECT memory alignment requirements */ 161*298800caSStefan Hajnoczi return qemu_blockalign(s->bs, 162*298800caSStefan Hajnoczi s->header.cluster_size * s->header.table_size); 163*298800caSStefan Hajnoczi } 164*298800caSStefan Hajnoczi 16575411d23SStefan Hajnoczi static int bdrv_qed_open(BlockDriverState *bs, int flags) 16675411d23SStefan Hajnoczi { 16775411d23SStefan Hajnoczi BDRVQEDState *s = bs->opaque; 16875411d23SStefan Hajnoczi QEDHeader le_header; 16975411d23SStefan Hajnoczi int64_t file_size; 17075411d23SStefan Hajnoczi int ret; 17175411d23SStefan Hajnoczi 17275411d23SStefan Hajnoczi s->bs = bs; 17375411d23SStefan Hajnoczi 17475411d23SStefan Hajnoczi ret = bdrv_pread(bs->file, 0, &le_header, sizeof(le_header)); 17575411d23SStefan Hajnoczi if (ret < 0) { 17675411d23SStefan Hajnoczi return ret; 17775411d23SStefan Hajnoczi } 17875411d23SStefan Hajnoczi ret = 0; /* ret should always be 0 or -errno */ 17975411d23SStefan Hajnoczi qed_header_le_to_cpu(&le_header, &s->header); 18075411d23SStefan Hajnoczi 18175411d23SStefan Hajnoczi if (s->header.magic != QED_MAGIC) { 18275411d23SStefan Hajnoczi return -EINVAL; 18375411d23SStefan Hajnoczi } 18475411d23SStefan Hajnoczi if (s->header.features & ~QED_FEATURE_MASK) { 18575411d23SStefan Hajnoczi return -ENOTSUP; /* image uses unsupported feature bits */ 18675411d23SStefan Hajnoczi } 18775411d23SStefan Hajnoczi if (!qed_is_cluster_size_valid(s->header.cluster_size)) { 18875411d23SStefan Hajnoczi return -EINVAL; 18975411d23SStefan Hajnoczi } 19075411d23SStefan Hajnoczi 19175411d23SStefan Hajnoczi /* Round down file size to the last cluster */ 19275411d23SStefan Hajnoczi file_size = bdrv_getlength(bs->file); 19375411d23SStefan Hajnoczi if (file_size < 0) { 19475411d23SStefan Hajnoczi return file_size; 19575411d23SStefan Hajnoczi } 19675411d23SStefan Hajnoczi s->file_size = qed_start_of_cluster(s, file_size); 19775411d23SStefan Hajnoczi 19875411d23SStefan Hajnoczi if (!qed_is_table_size_valid(s->header.table_size)) { 19975411d23SStefan Hajnoczi return -EINVAL; 20075411d23SStefan Hajnoczi } 20175411d23SStefan Hajnoczi if (!qed_is_image_size_valid(s->header.image_size, 20275411d23SStefan Hajnoczi s->header.cluster_size, 20375411d23SStefan Hajnoczi s->header.table_size)) { 20475411d23SStefan Hajnoczi return -EINVAL; 20575411d23SStefan Hajnoczi } 20675411d23SStefan Hajnoczi if (!qed_check_table_offset(s, s->header.l1_table_offset)) { 20775411d23SStefan Hajnoczi return -EINVAL; 20875411d23SStefan Hajnoczi } 20975411d23SStefan Hajnoczi 21075411d23SStefan Hajnoczi s->table_nelems = (s->header.cluster_size * s->header.table_size) / 21175411d23SStefan Hajnoczi sizeof(uint64_t); 21275411d23SStefan Hajnoczi s->l2_shift = ffs(s->header.cluster_size) - 1; 21375411d23SStefan Hajnoczi s->l2_mask = s->table_nelems - 1; 21475411d23SStefan Hajnoczi s->l1_shift = s->l2_shift + ffs(s->table_nelems) - 1; 21575411d23SStefan Hajnoczi 21675411d23SStefan Hajnoczi if ((s->header.features & QED_F_BACKING_FILE)) { 21775411d23SStefan Hajnoczi if ((uint64_t)s->header.backing_filename_offset + 21875411d23SStefan Hajnoczi s->header.backing_filename_size > 21975411d23SStefan Hajnoczi s->header.cluster_size * s->header.header_size) { 22075411d23SStefan Hajnoczi return -EINVAL; 22175411d23SStefan Hajnoczi } 22275411d23SStefan Hajnoczi 22375411d23SStefan Hajnoczi ret = qed_read_string(bs->file, s->header.backing_filename_offset, 22475411d23SStefan Hajnoczi s->header.backing_filename_size, bs->backing_file, 22575411d23SStefan Hajnoczi sizeof(bs->backing_file)); 22675411d23SStefan Hajnoczi if (ret < 0) { 22775411d23SStefan Hajnoczi return ret; 22875411d23SStefan Hajnoczi } 22975411d23SStefan Hajnoczi 23075411d23SStefan Hajnoczi if (s->header.features & QED_F_BACKING_FORMAT_NO_PROBE) { 23175411d23SStefan Hajnoczi pstrcpy(bs->backing_format, sizeof(bs->backing_format), "raw"); 23275411d23SStefan Hajnoczi } 23375411d23SStefan Hajnoczi } 23475411d23SStefan Hajnoczi 23575411d23SStefan Hajnoczi /* Reset unknown autoclear feature bits. This is a backwards 23675411d23SStefan Hajnoczi * compatibility mechanism that allows images to be opened by older 23775411d23SStefan Hajnoczi * programs, which "knock out" unknown feature bits. When an image is 23875411d23SStefan Hajnoczi * opened by a newer program again it can detect that the autoclear 23975411d23SStefan Hajnoczi * feature is no longer valid. 24075411d23SStefan Hajnoczi */ 24175411d23SStefan Hajnoczi if ((s->header.autoclear_features & ~QED_AUTOCLEAR_FEATURE_MASK) != 0 && 24275411d23SStefan Hajnoczi !bdrv_is_read_only(bs->file)) { 24375411d23SStefan Hajnoczi s->header.autoclear_features &= QED_AUTOCLEAR_FEATURE_MASK; 24475411d23SStefan Hajnoczi 24575411d23SStefan Hajnoczi ret = qed_write_header_sync(s); 24675411d23SStefan Hajnoczi if (ret) { 24775411d23SStefan Hajnoczi return ret; 24875411d23SStefan Hajnoczi } 24975411d23SStefan Hajnoczi 25075411d23SStefan Hajnoczi /* From here on only known autoclear feature bits are valid */ 25175411d23SStefan Hajnoczi bdrv_flush(bs->file); 25275411d23SStefan Hajnoczi } 25375411d23SStefan Hajnoczi 254*298800caSStefan Hajnoczi s->l1_table = qed_alloc_table(s); 255*298800caSStefan Hajnoczi qed_init_l2_cache(&s->l2_cache); 256*298800caSStefan Hajnoczi 257*298800caSStefan Hajnoczi ret = qed_read_l1_table_sync(s); 258*298800caSStefan Hajnoczi if (ret) { 259*298800caSStefan Hajnoczi qed_free_l2_cache(&s->l2_cache); 260*298800caSStefan Hajnoczi qemu_vfree(s->l1_table); 261*298800caSStefan Hajnoczi } 26275411d23SStefan Hajnoczi return ret; 26375411d23SStefan Hajnoczi } 26475411d23SStefan Hajnoczi 26575411d23SStefan Hajnoczi static void bdrv_qed_close(BlockDriverState *bs) 26675411d23SStefan Hajnoczi { 267*298800caSStefan Hajnoczi BDRVQEDState *s = bs->opaque; 268*298800caSStefan Hajnoczi 269*298800caSStefan Hajnoczi qed_free_l2_cache(&s->l2_cache); 270*298800caSStefan Hajnoczi qemu_vfree(s->l1_table); 27175411d23SStefan Hajnoczi } 27275411d23SStefan Hajnoczi 27375411d23SStefan Hajnoczi static int bdrv_qed_flush(BlockDriverState *bs) 27475411d23SStefan Hajnoczi { 27575411d23SStefan Hajnoczi return bdrv_flush(bs->file); 27675411d23SStefan Hajnoczi } 27775411d23SStefan Hajnoczi 27875411d23SStefan Hajnoczi static int qed_create(const char *filename, uint32_t cluster_size, 27975411d23SStefan Hajnoczi uint64_t image_size, uint32_t table_size, 28075411d23SStefan Hajnoczi const char *backing_file, const char *backing_fmt) 28175411d23SStefan Hajnoczi { 28275411d23SStefan Hajnoczi QEDHeader header = { 28375411d23SStefan Hajnoczi .magic = QED_MAGIC, 28475411d23SStefan Hajnoczi .cluster_size = cluster_size, 28575411d23SStefan Hajnoczi .table_size = table_size, 28675411d23SStefan Hajnoczi .header_size = 1, 28775411d23SStefan Hajnoczi .features = 0, 28875411d23SStefan Hajnoczi .compat_features = 0, 28975411d23SStefan Hajnoczi .l1_table_offset = cluster_size, 29075411d23SStefan Hajnoczi .image_size = image_size, 29175411d23SStefan Hajnoczi }; 29275411d23SStefan Hajnoczi QEDHeader le_header; 29375411d23SStefan Hajnoczi uint8_t *l1_table = NULL; 29475411d23SStefan Hajnoczi size_t l1_size = header.cluster_size * header.table_size; 29575411d23SStefan Hajnoczi int ret = 0; 29675411d23SStefan Hajnoczi BlockDriverState *bs = NULL; 29775411d23SStefan Hajnoczi 29875411d23SStefan Hajnoczi ret = bdrv_create_file(filename, NULL); 29975411d23SStefan Hajnoczi if (ret < 0) { 30075411d23SStefan Hajnoczi return ret; 30175411d23SStefan Hajnoczi } 30275411d23SStefan Hajnoczi 30375411d23SStefan Hajnoczi ret = bdrv_file_open(&bs, filename, BDRV_O_RDWR | BDRV_O_CACHE_WB); 30475411d23SStefan Hajnoczi if (ret < 0) { 30575411d23SStefan Hajnoczi return ret; 30675411d23SStefan Hajnoczi } 30775411d23SStefan Hajnoczi 30875411d23SStefan Hajnoczi if (backing_file) { 30975411d23SStefan Hajnoczi header.features |= QED_F_BACKING_FILE; 31075411d23SStefan Hajnoczi header.backing_filename_offset = sizeof(le_header); 31175411d23SStefan Hajnoczi header.backing_filename_size = strlen(backing_file); 31275411d23SStefan Hajnoczi 31375411d23SStefan Hajnoczi if (qed_fmt_is_raw(backing_fmt)) { 31475411d23SStefan Hajnoczi header.features |= QED_F_BACKING_FORMAT_NO_PROBE; 31575411d23SStefan Hajnoczi } 31675411d23SStefan Hajnoczi } 31775411d23SStefan Hajnoczi 31875411d23SStefan Hajnoczi qed_header_cpu_to_le(&header, &le_header); 31975411d23SStefan Hajnoczi ret = bdrv_pwrite(bs, 0, &le_header, sizeof(le_header)); 32075411d23SStefan Hajnoczi if (ret < 0) { 32175411d23SStefan Hajnoczi goto out; 32275411d23SStefan Hajnoczi } 32375411d23SStefan Hajnoczi ret = bdrv_pwrite(bs, sizeof(le_header), backing_file, 32475411d23SStefan Hajnoczi header.backing_filename_size); 32575411d23SStefan Hajnoczi if (ret < 0) { 32675411d23SStefan Hajnoczi goto out; 32775411d23SStefan Hajnoczi } 32875411d23SStefan Hajnoczi 32975411d23SStefan Hajnoczi l1_table = qemu_mallocz(l1_size); 33075411d23SStefan Hajnoczi ret = bdrv_pwrite(bs, header.l1_table_offset, l1_table, l1_size); 33175411d23SStefan Hajnoczi if (ret < 0) { 33275411d23SStefan Hajnoczi goto out; 33375411d23SStefan Hajnoczi } 33475411d23SStefan Hajnoczi 33575411d23SStefan Hajnoczi ret = 0; /* success */ 33675411d23SStefan Hajnoczi out: 33775411d23SStefan Hajnoczi qemu_free(l1_table); 33875411d23SStefan Hajnoczi bdrv_delete(bs); 33975411d23SStefan Hajnoczi return ret; 34075411d23SStefan Hajnoczi } 34175411d23SStefan Hajnoczi 34275411d23SStefan Hajnoczi static int bdrv_qed_create(const char *filename, QEMUOptionParameter *options) 34375411d23SStefan Hajnoczi { 34475411d23SStefan Hajnoczi uint64_t image_size = 0; 34575411d23SStefan Hajnoczi uint32_t cluster_size = QED_DEFAULT_CLUSTER_SIZE; 34675411d23SStefan Hajnoczi uint32_t table_size = QED_DEFAULT_TABLE_SIZE; 34775411d23SStefan Hajnoczi const char *backing_file = NULL; 34875411d23SStefan Hajnoczi const char *backing_fmt = NULL; 34975411d23SStefan Hajnoczi 35075411d23SStefan Hajnoczi while (options && options->name) { 35175411d23SStefan Hajnoczi if (!strcmp(options->name, BLOCK_OPT_SIZE)) { 35275411d23SStefan Hajnoczi image_size = options->value.n; 35375411d23SStefan Hajnoczi } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FILE)) { 35475411d23SStefan Hajnoczi backing_file = options->value.s; 35575411d23SStefan Hajnoczi } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FMT)) { 35675411d23SStefan Hajnoczi backing_fmt = options->value.s; 35775411d23SStefan Hajnoczi } else if (!strcmp(options->name, BLOCK_OPT_CLUSTER_SIZE)) { 35875411d23SStefan Hajnoczi if (options->value.n) { 35975411d23SStefan Hajnoczi cluster_size = options->value.n; 36075411d23SStefan Hajnoczi } 36175411d23SStefan Hajnoczi } else if (!strcmp(options->name, BLOCK_OPT_TABLE_SIZE)) { 36275411d23SStefan Hajnoczi if (options->value.n) { 36375411d23SStefan Hajnoczi table_size = options->value.n; 36475411d23SStefan Hajnoczi } 36575411d23SStefan Hajnoczi } 36675411d23SStefan Hajnoczi options++; 36775411d23SStefan Hajnoczi } 36875411d23SStefan Hajnoczi 36975411d23SStefan Hajnoczi if (!qed_is_cluster_size_valid(cluster_size)) { 37075411d23SStefan Hajnoczi fprintf(stderr, "QED cluster size must be within range [%u, %u] and power of 2\n", 37175411d23SStefan Hajnoczi QED_MIN_CLUSTER_SIZE, QED_MAX_CLUSTER_SIZE); 37275411d23SStefan Hajnoczi return -EINVAL; 37375411d23SStefan Hajnoczi } 37475411d23SStefan Hajnoczi if (!qed_is_table_size_valid(table_size)) { 37575411d23SStefan Hajnoczi fprintf(stderr, "QED table size must be within range [%u, %u] and power of 2\n", 37675411d23SStefan Hajnoczi QED_MIN_TABLE_SIZE, QED_MAX_TABLE_SIZE); 37775411d23SStefan Hajnoczi return -EINVAL; 37875411d23SStefan Hajnoczi } 37975411d23SStefan Hajnoczi if (!qed_is_image_size_valid(image_size, cluster_size, table_size)) { 38075411d23SStefan Hajnoczi fprintf(stderr, "QED image size must be a non-zero multiple of " 38175411d23SStefan Hajnoczi "cluster size and less than %" PRIu64 " bytes\n", 38275411d23SStefan Hajnoczi qed_max_image_size(cluster_size, table_size)); 38375411d23SStefan Hajnoczi return -EINVAL; 38475411d23SStefan Hajnoczi } 38575411d23SStefan Hajnoczi 38675411d23SStefan Hajnoczi return qed_create(filename, cluster_size, image_size, table_size, 38775411d23SStefan Hajnoczi backing_file, backing_fmt); 38875411d23SStefan Hajnoczi } 38975411d23SStefan Hajnoczi 390*298800caSStefan Hajnoczi typedef struct { 391*298800caSStefan Hajnoczi int is_allocated; 392*298800caSStefan Hajnoczi int *pnum; 393*298800caSStefan Hajnoczi } QEDIsAllocatedCB; 394*298800caSStefan Hajnoczi 395*298800caSStefan Hajnoczi static void qed_is_allocated_cb(void *opaque, int ret, uint64_t offset, size_t len) 396*298800caSStefan Hajnoczi { 397*298800caSStefan Hajnoczi QEDIsAllocatedCB *cb = opaque; 398*298800caSStefan Hajnoczi *cb->pnum = len / BDRV_SECTOR_SIZE; 399*298800caSStefan Hajnoczi cb->is_allocated = ret == QED_CLUSTER_FOUND; 400*298800caSStefan Hajnoczi } 401*298800caSStefan Hajnoczi 40275411d23SStefan Hajnoczi static int bdrv_qed_is_allocated(BlockDriverState *bs, int64_t sector_num, 40375411d23SStefan Hajnoczi int nb_sectors, int *pnum) 40475411d23SStefan Hajnoczi { 405*298800caSStefan Hajnoczi BDRVQEDState *s = bs->opaque; 406*298800caSStefan Hajnoczi uint64_t pos = (uint64_t)sector_num * BDRV_SECTOR_SIZE; 407*298800caSStefan Hajnoczi size_t len = (size_t)nb_sectors * BDRV_SECTOR_SIZE; 408*298800caSStefan Hajnoczi QEDIsAllocatedCB cb = { 409*298800caSStefan Hajnoczi .is_allocated = -1, 410*298800caSStefan Hajnoczi .pnum = pnum, 411*298800caSStefan Hajnoczi }; 412*298800caSStefan Hajnoczi QEDRequest request = { .l2_table = NULL }; 413*298800caSStefan Hajnoczi 414*298800caSStefan Hajnoczi async_context_push(); 415*298800caSStefan Hajnoczi 416*298800caSStefan Hajnoczi qed_find_cluster(s, &request, pos, len, qed_is_allocated_cb, &cb); 417*298800caSStefan Hajnoczi 418*298800caSStefan Hajnoczi while (cb.is_allocated == -1) { 419*298800caSStefan Hajnoczi qemu_aio_wait(); 420*298800caSStefan Hajnoczi } 421*298800caSStefan Hajnoczi 422*298800caSStefan Hajnoczi async_context_pop(); 423*298800caSStefan Hajnoczi 424*298800caSStefan Hajnoczi qed_unref_l2_cache_entry(request.l2_table); 425*298800caSStefan Hajnoczi 426*298800caSStefan Hajnoczi return cb.is_allocated; 42775411d23SStefan Hajnoczi } 42875411d23SStefan Hajnoczi 42975411d23SStefan Hajnoczi static int bdrv_qed_make_empty(BlockDriverState *bs) 43075411d23SStefan Hajnoczi { 43175411d23SStefan Hajnoczi return -ENOTSUP; 43275411d23SStefan Hajnoczi } 43375411d23SStefan Hajnoczi 43475411d23SStefan Hajnoczi static BlockDriverAIOCB *bdrv_qed_aio_readv(BlockDriverState *bs, 43575411d23SStefan Hajnoczi int64_t sector_num, 43675411d23SStefan Hajnoczi QEMUIOVector *qiov, int nb_sectors, 43775411d23SStefan Hajnoczi BlockDriverCompletionFunc *cb, 43875411d23SStefan Hajnoczi void *opaque) 43975411d23SStefan Hajnoczi { 44075411d23SStefan Hajnoczi return NULL; 44175411d23SStefan Hajnoczi } 44275411d23SStefan Hajnoczi 44375411d23SStefan Hajnoczi static BlockDriverAIOCB *bdrv_qed_aio_writev(BlockDriverState *bs, 44475411d23SStefan Hajnoczi int64_t sector_num, 44575411d23SStefan Hajnoczi QEMUIOVector *qiov, int nb_sectors, 44675411d23SStefan Hajnoczi BlockDriverCompletionFunc *cb, 44775411d23SStefan Hajnoczi void *opaque) 44875411d23SStefan Hajnoczi { 44975411d23SStefan Hajnoczi return NULL; 45075411d23SStefan Hajnoczi } 45175411d23SStefan Hajnoczi 45275411d23SStefan Hajnoczi static BlockDriverAIOCB *bdrv_qed_aio_flush(BlockDriverState *bs, 45375411d23SStefan Hajnoczi BlockDriverCompletionFunc *cb, 45475411d23SStefan Hajnoczi void *opaque) 45575411d23SStefan Hajnoczi { 45675411d23SStefan Hajnoczi return bdrv_aio_flush(bs->file, cb, opaque); 45775411d23SStefan Hajnoczi } 45875411d23SStefan Hajnoczi 45975411d23SStefan Hajnoczi static int bdrv_qed_truncate(BlockDriverState *bs, int64_t offset) 46075411d23SStefan Hajnoczi { 46175411d23SStefan Hajnoczi return -ENOTSUP; 46275411d23SStefan Hajnoczi } 46375411d23SStefan Hajnoczi 46475411d23SStefan Hajnoczi static int64_t bdrv_qed_getlength(BlockDriverState *bs) 46575411d23SStefan Hajnoczi { 46675411d23SStefan Hajnoczi BDRVQEDState *s = bs->opaque; 46775411d23SStefan Hajnoczi return s->header.image_size; 46875411d23SStefan Hajnoczi } 46975411d23SStefan Hajnoczi 47075411d23SStefan Hajnoczi static int bdrv_qed_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) 47175411d23SStefan Hajnoczi { 47275411d23SStefan Hajnoczi BDRVQEDState *s = bs->opaque; 47375411d23SStefan Hajnoczi 47475411d23SStefan Hajnoczi memset(bdi, 0, sizeof(*bdi)); 47575411d23SStefan Hajnoczi bdi->cluster_size = s->header.cluster_size; 47675411d23SStefan Hajnoczi return 0; 47775411d23SStefan Hajnoczi } 47875411d23SStefan Hajnoczi 47975411d23SStefan Hajnoczi static int bdrv_qed_change_backing_file(BlockDriverState *bs, 48075411d23SStefan Hajnoczi const char *backing_file, 48175411d23SStefan Hajnoczi const char *backing_fmt) 48275411d23SStefan Hajnoczi { 48375411d23SStefan Hajnoczi BDRVQEDState *s = bs->opaque; 48475411d23SStefan Hajnoczi QEDHeader new_header, le_header; 48575411d23SStefan Hajnoczi void *buffer; 48675411d23SStefan Hajnoczi size_t buffer_len, backing_file_len; 48775411d23SStefan Hajnoczi int ret; 48875411d23SStefan Hajnoczi 48975411d23SStefan Hajnoczi /* Refuse to set backing filename if unknown compat feature bits are 49075411d23SStefan Hajnoczi * active. If the image uses an unknown compat feature then we may not 49175411d23SStefan Hajnoczi * know the layout of data following the header structure and cannot safely 49275411d23SStefan Hajnoczi * add a new string. 49375411d23SStefan Hajnoczi */ 49475411d23SStefan Hajnoczi if (backing_file && (s->header.compat_features & 49575411d23SStefan Hajnoczi ~QED_COMPAT_FEATURE_MASK)) { 49675411d23SStefan Hajnoczi return -ENOTSUP; 49775411d23SStefan Hajnoczi } 49875411d23SStefan Hajnoczi 49975411d23SStefan Hajnoczi memcpy(&new_header, &s->header, sizeof(new_header)); 50075411d23SStefan Hajnoczi 50175411d23SStefan Hajnoczi new_header.features &= ~(QED_F_BACKING_FILE | 50275411d23SStefan Hajnoczi QED_F_BACKING_FORMAT_NO_PROBE); 50375411d23SStefan Hajnoczi 50475411d23SStefan Hajnoczi /* Adjust feature flags */ 50575411d23SStefan Hajnoczi if (backing_file) { 50675411d23SStefan Hajnoczi new_header.features |= QED_F_BACKING_FILE; 50775411d23SStefan Hajnoczi 50875411d23SStefan Hajnoczi if (qed_fmt_is_raw(backing_fmt)) { 50975411d23SStefan Hajnoczi new_header.features |= QED_F_BACKING_FORMAT_NO_PROBE; 51075411d23SStefan Hajnoczi } 51175411d23SStefan Hajnoczi } 51275411d23SStefan Hajnoczi 51375411d23SStefan Hajnoczi /* Calculate new header size */ 51475411d23SStefan Hajnoczi backing_file_len = 0; 51575411d23SStefan Hajnoczi 51675411d23SStefan Hajnoczi if (backing_file) { 51775411d23SStefan Hajnoczi backing_file_len = strlen(backing_file); 51875411d23SStefan Hajnoczi } 51975411d23SStefan Hajnoczi 52075411d23SStefan Hajnoczi buffer_len = sizeof(new_header); 52175411d23SStefan Hajnoczi new_header.backing_filename_offset = buffer_len; 52275411d23SStefan Hajnoczi new_header.backing_filename_size = backing_file_len; 52375411d23SStefan Hajnoczi buffer_len += backing_file_len; 52475411d23SStefan Hajnoczi 52575411d23SStefan Hajnoczi /* Make sure we can rewrite header without failing */ 52675411d23SStefan Hajnoczi if (buffer_len > new_header.header_size * new_header.cluster_size) { 52775411d23SStefan Hajnoczi return -ENOSPC; 52875411d23SStefan Hajnoczi } 52975411d23SStefan Hajnoczi 53075411d23SStefan Hajnoczi /* Prepare new header */ 53175411d23SStefan Hajnoczi buffer = qemu_malloc(buffer_len); 53275411d23SStefan Hajnoczi 53375411d23SStefan Hajnoczi qed_header_cpu_to_le(&new_header, &le_header); 53475411d23SStefan Hajnoczi memcpy(buffer, &le_header, sizeof(le_header)); 53575411d23SStefan Hajnoczi buffer_len = sizeof(le_header); 53675411d23SStefan Hajnoczi 53775411d23SStefan Hajnoczi memcpy(buffer + buffer_len, backing_file, backing_file_len); 53875411d23SStefan Hajnoczi buffer_len += backing_file_len; 53975411d23SStefan Hajnoczi 54075411d23SStefan Hajnoczi /* Write new header */ 54175411d23SStefan Hajnoczi ret = bdrv_pwrite_sync(bs->file, 0, buffer, buffer_len); 54275411d23SStefan Hajnoczi qemu_free(buffer); 54375411d23SStefan Hajnoczi if (ret == 0) { 54475411d23SStefan Hajnoczi memcpy(&s->header, &new_header, sizeof(new_header)); 54575411d23SStefan Hajnoczi } 54675411d23SStefan Hajnoczi return ret; 54775411d23SStefan Hajnoczi } 54875411d23SStefan Hajnoczi 54975411d23SStefan Hajnoczi static int bdrv_qed_check(BlockDriverState *bs, BdrvCheckResult *result) 55075411d23SStefan Hajnoczi { 55175411d23SStefan Hajnoczi return -ENOTSUP; 55275411d23SStefan Hajnoczi } 55375411d23SStefan Hajnoczi 55475411d23SStefan Hajnoczi static QEMUOptionParameter qed_create_options[] = { 55575411d23SStefan Hajnoczi { 55675411d23SStefan Hajnoczi .name = BLOCK_OPT_SIZE, 55775411d23SStefan Hajnoczi .type = OPT_SIZE, 55875411d23SStefan Hajnoczi .help = "Virtual disk size (in bytes)" 55975411d23SStefan Hajnoczi }, { 56075411d23SStefan Hajnoczi .name = BLOCK_OPT_BACKING_FILE, 56175411d23SStefan Hajnoczi .type = OPT_STRING, 56275411d23SStefan Hajnoczi .help = "File name of a base image" 56375411d23SStefan Hajnoczi }, { 56475411d23SStefan Hajnoczi .name = BLOCK_OPT_BACKING_FMT, 56575411d23SStefan Hajnoczi .type = OPT_STRING, 56675411d23SStefan Hajnoczi .help = "Image format of the base image" 56775411d23SStefan Hajnoczi }, { 56875411d23SStefan Hajnoczi .name = BLOCK_OPT_CLUSTER_SIZE, 56975411d23SStefan Hajnoczi .type = OPT_SIZE, 57075411d23SStefan Hajnoczi .help = "Cluster size (in bytes)" 57175411d23SStefan Hajnoczi }, { 57275411d23SStefan Hajnoczi .name = BLOCK_OPT_TABLE_SIZE, 57375411d23SStefan Hajnoczi .type = OPT_SIZE, 57475411d23SStefan Hajnoczi .help = "L1/L2 table size (in clusters)" 57575411d23SStefan Hajnoczi }, 57675411d23SStefan Hajnoczi { /* end of list */ } 57775411d23SStefan Hajnoczi }; 57875411d23SStefan Hajnoczi 57975411d23SStefan Hajnoczi static BlockDriver bdrv_qed = { 58075411d23SStefan Hajnoczi .format_name = "qed", 58175411d23SStefan Hajnoczi .instance_size = sizeof(BDRVQEDState), 58275411d23SStefan Hajnoczi .create_options = qed_create_options, 58375411d23SStefan Hajnoczi 58475411d23SStefan Hajnoczi .bdrv_probe = bdrv_qed_probe, 58575411d23SStefan Hajnoczi .bdrv_open = bdrv_qed_open, 58675411d23SStefan Hajnoczi .bdrv_close = bdrv_qed_close, 58775411d23SStefan Hajnoczi .bdrv_create = bdrv_qed_create, 58875411d23SStefan Hajnoczi .bdrv_flush = bdrv_qed_flush, 58975411d23SStefan Hajnoczi .bdrv_is_allocated = bdrv_qed_is_allocated, 59075411d23SStefan Hajnoczi .bdrv_make_empty = bdrv_qed_make_empty, 59175411d23SStefan Hajnoczi .bdrv_aio_readv = bdrv_qed_aio_readv, 59275411d23SStefan Hajnoczi .bdrv_aio_writev = bdrv_qed_aio_writev, 59375411d23SStefan Hajnoczi .bdrv_aio_flush = bdrv_qed_aio_flush, 59475411d23SStefan Hajnoczi .bdrv_truncate = bdrv_qed_truncate, 59575411d23SStefan Hajnoczi .bdrv_getlength = bdrv_qed_getlength, 59675411d23SStefan Hajnoczi .bdrv_get_info = bdrv_qed_get_info, 59775411d23SStefan Hajnoczi .bdrv_change_backing_file = bdrv_qed_change_backing_file, 59875411d23SStefan Hajnoczi .bdrv_check = bdrv_qed_check, 59975411d23SStefan Hajnoczi }; 60075411d23SStefan Hajnoczi 60175411d23SStefan Hajnoczi static void bdrv_qed_init(void) 60275411d23SStefan Hajnoczi { 60375411d23SStefan Hajnoczi bdrv_register(&bdrv_qed); 60475411d23SStefan Hajnoczi } 60575411d23SStefan Hajnoczi 60675411d23SStefan Hajnoczi block_init(bdrv_qed_init); 607