mr.c (f22c30aa6d27597a6da665db1a5f099e4903ecc7) mr.c (8010d74b9965b33182651767e9788ed84cf8e5f9)
1/*
2 * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:

--- 31 unchanged lines hidden (view full) ---

40#include <rdma/ib_umem_odp.h>
41#include <rdma/ib_verbs.h>
42#include "mlx5_ib.h"
43
44/*
45 * We can't use an array for xlt_emergency_page because dma_map_single doesn't
46 * work on kernel modules memory
47 */
1/*
2 * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:

--- 31 unchanged lines hidden (view full) ---

40#include <rdma/ib_umem_odp.h>
41#include <rdma/ib_verbs.h>
42#include "mlx5_ib.h"
43
44/*
45 * We can't use an array for xlt_emergency_page because dma_map_single doesn't
46 * work on kernel modules memory
47 */
48unsigned long xlt_emergency_page;
48void *xlt_emergency_page;
49static DEFINE_MUTEX(xlt_emergency_page_mutex);
50
51enum {
52 MAX_PENDING_REG_MR = 8,
53};
54
55#define MLX5_UMR_ALIGN 2048
56

--- 937 unchanged lines hidden (view full) ---

994
995 return mr;
996}
997
998#define MLX5_MAX_UMR_CHUNK ((1 << (MLX5_MAX_UMR_SHIFT + 4)) - \
999 MLX5_UMR_MTT_ALIGNMENT)
1000#define MLX5_SPARE_UMR_CHUNK 0x10000
1001
49static DEFINE_MUTEX(xlt_emergency_page_mutex);
50
51enum {
52 MAX_PENDING_REG_MR = 8,
53};
54
55#define MLX5_UMR_ALIGN 2048
56

--- 937 unchanged lines hidden (view full) ---

994
995 return mr;
996}
997
998#define MLX5_MAX_UMR_CHUNK ((1 << (MLX5_MAX_UMR_SHIFT + 4)) - \
999 MLX5_UMR_MTT_ALIGNMENT)
1000#define MLX5_SPARE_UMR_CHUNK 0x10000
1001
1002static unsigned long mlx5_ib_get_xlt_emergency_page(void)
1002/*
1003 * Allocate a temporary buffer to hold the per-page information to transfer to
1004 * HW. For efficiency this should be as large as it can be, but buffer
1005 * allocation failure is not allowed, so try smaller sizes.
1006 */
1007static void *mlx5_ib_alloc_xlt(size_t *nents, size_t ent_size, gfp_t gfp_mask)
1003{
1008{
1009 const size_t xlt_chunk_align =
1010 MLX5_UMR_MTT_ALIGNMENT / sizeof(ent_size);
1011 size_t size;
1012 void *res = NULL;
1013
1014 static_assert(PAGE_SIZE % MLX5_UMR_MTT_ALIGNMENT == 0);
1015
1016 /*
1017 * MLX5_IB_UPD_XLT_ATOMIC doesn't signal an atomic context just that the
1018 * allocation can't trigger any kind of reclaim.
1019 */
1020 might_sleep();
1021
1022 gfp_mask |= __GFP_ZERO;
1023
1024 /*
1025 * If the system already has a suitable high order page then just use
1026 * that, but don't try hard to create one. This max is about 1M, so a
1027 * free x86 huge page will satisfy it.
1028 */
1029 size = min_t(size_t, ent_size * ALIGN(*nents, xlt_chunk_align),
1030 MLX5_MAX_UMR_CHUNK);
1031 *nents = size / ent_size;
1032 res = (void *)__get_free_pages(gfp_mask | __GFP_NOWARN,
1033 get_order(size));
1034 if (res)
1035 return res;
1036
1037 if (size > MLX5_SPARE_UMR_CHUNK) {
1038 size = MLX5_SPARE_UMR_CHUNK;
1039 *nents = get_order(size) / ent_size;
1040 res = (void *)__get_free_pages(gfp_mask | __GFP_NOWARN,
1041 get_order(size));
1042 if (res)
1043 return res;
1044 }
1045
1046 *nents = PAGE_SIZE / ent_size;
1047 res = (void *)__get_free_page(gfp_mask);
1048 if (res)
1049 return res;
1050
1004 mutex_lock(&xlt_emergency_page_mutex);
1051 mutex_lock(&xlt_emergency_page_mutex);
1052 memset(xlt_emergency_page, 0, PAGE_SIZE);
1005 return xlt_emergency_page;
1006}
1007
1053 return xlt_emergency_page;
1054}
1055
1008static void mlx5_ib_put_xlt_emergency_page(void)
1056static void mlx5_ib_free_xlt(void *xlt, size_t length)
1009{
1057{
1010 mutex_unlock(&xlt_emergency_page_mutex);
1058 if (xlt == xlt_emergency_page) {
1059 mutex_unlock(&xlt_emergency_page_mutex);
1060 return;
1061 }
1062
1063 free_pages((unsigned long)xlt, get_order(length));
1011}
1012
1064}
1065
1066/*
1067 * Create a MLX5_IB_SEND_UMR_UPDATE_XLT work request and XLT buffer ready for
1068 * submission.
1069 */
1070static void *mlx5_ib_create_xlt_wr(struct mlx5_ib_mr *mr,
1071 struct mlx5_umr_wr *wr, struct ib_sge *sg,
1072 size_t nents, size_t ent_size,
1073 unsigned int flags)
1074{
1075 struct mlx5_ib_dev *dev = mr->dev;
1076 struct device *ddev = dev->ib_dev.dev.parent;
1077 dma_addr_t dma;
1078 void *xlt;
1079
1080 xlt = mlx5_ib_alloc_xlt(&nents, ent_size,
1081 flags & MLX5_IB_UPD_XLT_ATOMIC ? GFP_ATOMIC :
1082 GFP_KERNEL);
1083 sg->length = nents * ent_size;
1084 dma = dma_map_single(ddev, xlt, sg->length, DMA_TO_DEVICE);
1085 if (dma_mapping_error(ddev, dma)) {
1086 mlx5_ib_err(dev, "unable to map DMA during XLT update.\n");
1087 mlx5_ib_free_xlt(xlt, sg->length);
1088 return NULL;
1089 }
1090 sg->addr = dma;
1091 sg->lkey = dev->umrc.pd->local_dma_lkey;
1092
1093 memset(wr, 0, sizeof(*wr));
1094 wr->wr.send_flags = MLX5_IB_SEND_UMR_UPDATE_XLT;
1095 if (!(flags & MLX5_IB_UPD_XLT_ENABLE))
1096 wr->wr.send_flags |= MLX5_IB_SEND_UMR_FAIL_IF_FREE;
1097 wr->wr.sg_list = sg;
1098 wr->wr.num_sge = 1;
1099 wr->wr.opcode = MLX5_IB_WR_UMR;
1100 wr->pd = mr->ibmr.pd;
1101 wr->mkey = mr->mmkey.key;
1102 wr->length = mr->mmkey.size;
1103 wr->virt_addr = mr->mmkey.iova;
1104 wr->access_flags = mr->access_flags;
1105 wr->page_shift = mr->page_shift;
1106 wr->xlt_size = sg->length;
1107 return xlt;
1108}
1109
1110static void mlx5_ib_unmap_free_xlt(struct mlx5_ib_dev *dev, void *xlt,
1111 struct ib_sge *sg)
1112{
1113 struct device *ddev = dev->ib_dev.dev.parent;
1114
1115 dma_unmap_single(ddev, sg->addr, sg->length, DMA_TO_DEVICE);
1116 mlx5_ib_free_xlt(xlt, sg->length);
1117}
1118
1013int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
1014 int page_shift, int flags)
1015{
1016 struct mlx5_ib_dev *dev = mr->dev;
1017 struct device *ddev = dev->ib_dev.dev.parent;
1119int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
1120 int page_shift, int flags)
1121{
1122 struct mlx5_ib_dev *dev = mr->dev;
1123 struct device *ddev = dev->ib_dev.dev.parent;
1018 int size;
1019 void *xlt;
1124 void *xlt;
1020 dma_addr_t dma;
1021 struct mlx5_umr_wr wr;
1022 struct ib_sge sg;
1023 int err = 0;
1024 int desc_size = (flags & MLX5_IB_UPD_XLT_INDIRECT)
1025 ? sizeof(struct mlx5_klm)
1026 : sizeof(struct mlx5_mtt);
1027 const int page_align = MLX5_UMR_MTT_ALIGNMENT / desc_size;
1028 const int page_mask = page_align - 1;
1029 size_t pages_mapped = 0;
1030 size_t pages_to_map = 0;
1125 struct mlx5_umr_wr wr;
1126 struct ib_sge sg;
1127 int err = 0;
1128 int desc_size = (flags & MLX5_IB_UPD_XLT_INDIRECT)
1129 ? sizeof(struct mlx5_klm)
1130 : sizeof(struct mlx5_mtt);
1131 const int page_align = MLX5_UMR_MTT_ALIGNMENT / desc_size;
1132 const int page_mask = page_align - 1;
1133 size_t pages_mapped = 0;
1134 size_t pages_to_map = 0;
1031 size_t pages_iter = 0;
1135 size_t pages_iter;
1032 size_t size_to_map = 0;
1136 size_t size_to_map = 0;
1033 gfp_t gfp;
1034 bool use_emergency_page = false;
1137 size_t orig_sg_length;
1035
1036 if ((flags & MLX5_IB_UPD_XLT_INDIRECT) &&
1037 !umr_can_use_indirect_mkey(dev))
1038 return -EPERM;
1039
1040 /* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes,
1041 * so we need to align the offset and length accordingly
1042 */
1043 if (idx & page_mask) {
1044 npages += idx & page_mask;
1045 idx &= ~page_mask;
1046 }
1138
1139 if ((flags & MLX5_IB_UPD_XLT_INDIRECT) &&
1140 !umr_can_use_indirect_mkey(dev))
1141 return -EPERM;
1142
1143 /* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes,
1144 * so we need to align the offset and length accordingly
1145 */
1146 if (idx & page_mask) {
1147 npages += idx & page_mask;
1148 idx &= ~page_mask;
1149 }
1047
1048 gfp = flags & MLX5_IB_UPD_XLT_ATOMIC ? GFP_ATOMIC : GFP_KERNEL;
1049 gfp |= __GFP_ZERO | __GFP_NOWARN;
1050
1051 pages_to_map = ALIGN(npages, page_align);
1150 pages_to_map = ALIGN(npages, page_align);
1052 size = desc_size * pages_to_map;
1053 size = min_t(int, size, MLX5_MAX_UMR_CHUNK);
1054
1151
1055 xlt = (void *)__get_free_pages(gfp, get_order(size));
1056 if (!xlt && size > MLX5_SPARE_UMR_CHUNK) {
1057 mlx5_ib_dbg(dev, "Failed to allocate %d bytes of order %d. fallback to spare UMR allocation od %d bytes\n",
1058 size, get_order(size), MLX5_SPARE_UMR_CHUNK);
1152 xlt = mlx5_ib_create_xlt_wr(mr, &wr, &sg, npages, desc_size, flags);
1153 if (!xlt)
1154 return -ENOMEM;
1155 pages_iter = sg.length / desc_size;
1156 orig_sg_length = sg.length;
1059
1157
1060 size = MLX5_SPARE_UMR_CHUNK;
1061 xlt = (void *)__get_free_pages(gfp, get_order(size));
1062 }
1063
1064 if (!xlt) {
1065 mlx5_ib_warn(dev, "Using XLT emergency buffer\n");
1066 xlt = (void *)mlx5_ib_get_xlt_emergency_page();
1067 size = PAGE_SIZE;
1068 memset(xlt, 0, size);
1069 use_emergency_page = true;
1070 }
1071 pages_iter = size / desc_size;
1072 dma = dma_map_single(ddev, xlt, size, DMA_TO_DEVICE);
1073 if (dma_mapping_error(ddev, dma)) {
1074 mlx5_ib_err(dev, "unable to map DMA during XLT update.\n");
1075 err = -ENOMEM;
1076 goto free_xlt;
1077 }
1078
1079 if (mr->umem->is_odp) {
1080 if (!(flags & MLX5_IB_UPD_XLT_INDIRECT)) {
1081 struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem);
1082 size_t max_pages = ib_umem_odp_num_pages(odp) - idx;
1083
1084 pages_to_map = min_t(size_t, pages_to_map, max_pages);
1085 }
1086 }
1087
1158 if (mr->umem->is_odp) {
1159 if (!(flags & MLX5_IB_UPD_XLT_INDIRECT)) {
1160 struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem);
1161 size_t max_pages = ib_umem_odp_num_pages(odp) - idx;
1162
1163 pages_to_map = min_t(size_t, pages_to_map, max_pages);
1164 }
1165 }
1166
1088 sg.addr = dma;
1089 sg.lkey = dev->umrc.pd->local_dma_lkey;
1090
1091 memset(&wr, 0, sizeof(wr));
1092 wr.wr.send_flags = MLX5_IB_SEND_UMR_UPDATE_XLT;
1093 if (!(flags & MLX5_IB_UPD_XLT_ENABLE))
1094 wr.wr.send_flags |= MLX5_IB_SEND_UMR_FAIL_IF_FREE;
1095 wr.wr.sg_list = &sg;
1096 wr.wr.num_sge = 1;
1097 wr.wr.opcode = MLX5_IB_WR_UMR;
1098
1099 wr.pd = mr->ibmr.pd;
1100 wr.mkey = mr->mmkey.key;
1101 wr.length = mr->mmkey.size;
1102 wr.virt_addr = mr->mmkey.iova;
1103 wr.access_flags = mr->access_flags;
1104 wr.page_shift = page_shift;
1105
1106 for (pages_mapped = 0;
1107 pages_mapped < pages_to_map && !err;
1108 pages_mapped += pages_iter, idx += pages_iter) {
1109 npages = min_t(int, pages_iter, pages_to_map - pages_mapped);
1110 size_to_map = npages * desc_size;
1167 wr.page_shift = page_shift;
1168
1169 for (pages_mapped = 0;
1170 pages_mapped < pages_to_map && !err;
1171 pages_mapped += pages_iter, idx += pages_iter) {
1172 npages = min_t(int, pages_iter, pages_to_map - pages_mapped);
1173 size_to_map = npages * desc_size;
1111 dma_sync_single_for_cpu(ddev, dma, size, DMA_TO_DEVICE);
1174 dma_sync_single_for_cpu(ddev, sg.addr, sg.length,
1175 DMA_TO_DEVICE);
1112 if (mr->umem->is_odp) {
1113 mlx5_odp_populate_xlt(xlt, idx, npages, mr, flags);
1114 } else {
1115 __mlx5_ib_populate_pas(dev, mr->umem, page_shift, idx,
1116 npages, xlt,
1117 MLX5_IB_MTT_PRESENT);
1118 /* Clear padding after the pages
1119 * brought from the umem.
1120 */
1176 if (mr->umem->is_odp) {
1177 mlx5_odp_populate_xlt(xlt, idx, npages, mr, flags);
1178 } else {
1179 __mlx5_ib_populate_pas(dev, mr->umem, page_shift, idx,
1180 npages, xlt,
1181 MLX5_IB_MTT_PRESENT);
1182 /* Clear padding after the pages
1183 * brought from the umem.
1184 */
1121 memset(xlt + size_to_map, 0, size - size_to_map);
1185 memset(xlt + size_to_map, 0, sg.length - size_to_map);
1122 }
1186 }
1123 dma_sync_single_for_device(ddev, dma, size, DMA_TO_DEVICE);
1187 dma_sync_single_for_device(ddev, sg.addr, sg.length,
1188 DMA_TO_DEVICE);
1124
1125 sg.length = ALIGN(size_to_map, MLX5_UMR_MTT_ALIGNMENT);
1126
1127 if (pages_mapped + pages_iter >= pages_to_map) {
1128 if (flags & MLX5_IB_UPD_XLT_ENABLE)
1129 wr.wr.send_flags |=
1130 MLX5_IB_SEND_UMR_ENABLE_MR |
1131 MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS |

--- 7 unchanged lines hidden (view full) ---

1139 MLX5_IB_SEND_UMR_UPDATE_TRANSLATION;
1140 }
1141
1142 wr.offset = idx * desc_size;
1143 wr.xlt_size = sg.length;
1144
1145 err = mlx5_ib_post_send_wait(dev, &wr);
1146 }
1189
1190 sg.length = ALIGN(size_to_map, MLX5_UMR_MTT_ALIGNMENT);
1191
1192 if (pages_mapped + pages_iter >= pages_to_map) {
1193 if (flags & MLX5_IB_UPD_XLT_ENABLE)
1194 wr.wr.send_flags |=
1195 MLX5_IB_SEND_UMR_ENABLE_MR |
1196 MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS |

--- 7 unchanged lines hidden (view full) ---

1204 MLX5_IB_SEND_UMR_UPDATE_TRANSLATION;
1205 }
1206
1207 wr.offset = idx * desc_size;
1208 wr.xlt_size = sg.length;
1209
1210 err = mlx5_ib_post_send_wait(dev, &wr);
1211 }
1147 dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE);
1148
1149free_xlt:
1150 if (use_emergency_page)
1151 mlx5_ib_put_xlt_emergency_page();
1152 else
1153 free_pages((unsigned long)xlt, get_order(size));
1154
1212 sg.length = orig_sg_length;
1213 mlx5_ib_unmap_free_xlt(dev, xlt, &sg);
1155 return err;
1156}
1157
1158/*
1159 * If ibmr is NULL it will be allocated by reg_create.
1160 * Else, the given ibmr will be used.
1161 */
1162static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd,

--- 1266 unchanged lines hidden ---
1214 return err;
1215}
1216
1217/*
1218 * If ibmr is NULL it will be allocated by reg_create.
1219 * Else, the given ibmr will be used.
1220 */
1221static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd,

--- 1266 unchanged lines hidden ---