mr.c (f22c30aa6d27597a6da665db1a5f099e4903ecc7) | mr.c (8010d74b9965b33182651767e9788ed84cf8e5f9) |
---|---|
1/* 2 * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: --- 31 unchanged lines hidden (view full) --- 40#include <rdma/ib_umem_odp.h> 41#include <rdma/ib_verbs.h> 42#include "mlx5_ib.h" 43 44/* 45 * We can't use an array for xlt_emergency_page because dma_map_single doesn't 46 * work on kernel modules memory 47 */ | 1/* 2 * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: --- 31 unchanged lines hidden (view full) --- 40#include <rdma/ib_umem_odp.h> 41#include <rdma/ib_verbs.h> 42#include "mlx5_ib.h" 43 44/* 45 * We can't use an array for xlt_emergency_page because dma_map_single doesn't 46 * work on kernel modules memory 47 */ |
48unsigned long xlt_emergency_page; | 48void *xlt_emergency_page; |
49static DEFINE_MUTEX(xlt_emergency_page_mutex); 50 51enum { 52 MAX_PENDING_REG_MR = 8, 53}; 54 55#define MLX5_UMR_ALIGN 2048 56 --- 937 unchanged lines hidden (view full) --- 994 995 return mr; 996} 997 998#define MLX5_MAX_UMR_CHUNK ((1 << (MLX5_MAX_UMR_SHIFT + 4)) - \ 999 MLX5_UMR_MTT_ALIGNMENT) 1000#define MLX5_SPARE_UMR_CHUNK 0x10000 1001 | 49static DEFINE_MUTEX(xlt_emergency_page_mutex); 50 51enum { 52 MAX_PENDING_REG_MR = 8, 53}; 54 55#define MLX5_UMR_ALIGN 2048 56 --- 937 unchanged lines hidden (view full) --- 994 995 return mr; 996} 997 998#define MLX5_MAX_UMR_CHUNK ((1 << (MLX5_MAX_UMR_SHIFT + 4)) - \ 999 MLX5_UMR_MTT_ALIGNMENT) 1000#define MLX5_SPARE_UMR_CHUNK 0x10000 1001 |
1002static unsigned long mlx5_ib_get_xlt_emergency_page(void) | 1002/* 1003 * Allocate a temporary buffer to hold the per-page information to transfer to 1004 * HW. For efficiency this should be as large as it can be, but buffer 1005 * allocation failure is not allowed, so try smaller sizes. 1006 */ 1007static void *mlx5_ib_alloc_xlt(size_t *nents, size_t ent_size, gfp_t gfp_mask) |
1003{ | 1008{ |
1009 const size_t xlt_chunk_align = 1010 MLX5_UMR_MTT_ALIGNMENT / sizeof(ent_size); 1011 size_t size; 1012 void *res = NULL; 1013 1014 static_assert(PAGE_SIZE % MLX5_UMR_MTT_ALIGNMENT == 0); 1015 1016 /* 1017 * MLX5_IB_UPD_XLT_ATOMIC doesn't signal an atomic context just that the 1018 * allocation can't trigger any kind of reclaim. 1019 */ 1020 might_sleep(); 1021 1022 gfp_mask |= __GFP_ZERO; 1023 1024 /* 1025 * If the system already has a suitable high order page then just use 1026 * that, but don't try hard to create one. This max is about 1M, so a 1027 * free x86 huge page will satisfy it. 1028 */ 1029 size = min_t(size_t, ent_size * ALIGN(*nents, xlt_chunk_align), 1030 MLX5_MAX_UMR_CHUNK); 1031 *nents = size / ent_size; 1032 res = (void *)__get_free_pages(gfp_mask | __GFP_NOWARN, 1033 get_order(size)); 1034 if (res) 1035 return res; 1036 1037 if (size > MLX5_SPARE_UMR_CHUNK) { 1038 size = MLX5_SPARE_UMR_CHUNK; 1039 *nents = get_order(size) / ent_size; 1040 res = (void *)__get_free_pages(gfp_mask | __GFP_NOWARN, 1041 get_order(size)); 1042 if (res) 1043 return res; 1044 } 1045 1046 *nents = PAGE_SIZE / ent_size; 1047 res = (void *)__get_free_page(gfp_mask); 1048 if (res) 1049 return res; 1050 |
|
1004 mutex_lock(&xlt_emergency_page_mutex); | 1051 mutex_lock(&xlt_emergency_page_mutex); |
1052 memset(xlt_emergency_page, 0, PAGE_SIZE); |
|
1005 return xlt_emergency_page; 1006} 1007 | 1053 return xlt_emergency_page; 1054} 1055 |
1008static void mlx5_ib_put_xlt_emergency_page(void) | 1056static void mlx5_ib_free_xlt(void *xlt, size_t length) |
1009{ | 1057{ |
1010 mutex_unlock(&xlt_emergency_page_mutex); | 1058 if (xlt == xlt_emergency_page) { 1059 mutex_unlock(&xlt_emergency_page_mutex); 1060 return; 1061 } 1062 1063 free_pages((unsigned long)xlt, get_order(length)); |
1011} 1012 | 1064} 1065 |
1066/* 1067 * Create a MLX5_IB_SEND_UMR_UPDATE_XLT work request and XLT buffer ready for 1068 * submission. 1069 */ 1070static void *mlx5_ib_create_xlt_wr(struct mlx5_ib_mr *mr, 1071 struct mlx5_umr_wr *wr, struct ib_sge *sg, 1072 size_t nents, size_t ent_size, 1073 unsigned int flags) 1074{ 1075 struct mlx5_ib_dev *dev = mr->dev; 1076 struct device *ddev = dev->ib_dev.dev.parent; 1077 dma_addr_t dma; 1078 void *xlt; 1079 1080 xlt = mlx5_ib_alloc_xlt(&nents, ent_size, 1081 flags & MLX5_IB_UPD_XLT_ATOMIC ? GFP_ATOMIC : 1082 GFP_KERNEL); 1083 sg->length = nents * ent_size; 1084 dma = dma_map_single(ddev, xlt, sg->length, DMA_TO_DEVICE); 1085 if (dma_mapping_error(ddev, dma)) { 1086 mlx5_ib_err(dev, "unable to map DMA during XLT update.\n"); 1087 mlx5_ib_free_xlt(xlt, sg->length); 1088 return NULL; 1089 } 1090 sg->addr = dma; 1091 sg->lkey = dev->umrc.pd->local_dma_lkey; 1092 1093 memset(wr, 0, sizeof(*wr)); 1094 wr->wr.send_flags = MLX5_IB_SEND_UMR_UPDATE_XLT; 1095 if (!(flags & MLX5_IB_UPD_XLT_ENABLE)) 1096 wr->wr.send_flags |= MLX5_IB_SEND_UMR_FAIL_IF_FREE; 1097 wr->wr.sg_list = sg; 1098 wr->wr.num_sge = 1; 1099 wr->wr.opcode = MLX5_IB_WR_UMR; 1100 wr->pd = mr->ibmr.pd; 1101 wr->mkey = mr->mmkey.key; 1102 wr->length = mr->mmkey.size; 1103 wr->virt_addr = mr->mmkey.iova; 1104 wr->access_flags = mr->access_flags; 1105 wr->page_shift = mr->page_shift; 1106 wr->xlt_size = sg->length; 1107 return xlt; 1108} 1109 1110static void mlx5_ib_unmap_free_xlt(struct mlx5_ib_dev *dev, void *xlt, 1111 struct ib_sge *sg) 1112{ 1113 struct device *ddev = dev->ib_dev.dev.parent; 1114 1115 dma_unmap_single(ddev, sg->addr, sg->length, DMA_TO_DEVICE); 1116 mlx5_ib_free_xlt(xlt, sg->length); 1117} 1118 |
|
1013int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages, 1014 int page_shift, int flags) 1015{ 1016 struct mlx5_ib_dev *dev = mr->dev; 1017 struct device *ddev = dev->ib_dev.dev.parent; | 1119int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages, 1120 int page_shift, int flags) 1121{ 1122 struct mlx5_ib_dev *dev = mr->dev; 1123 struct device *ddev = dev->ib_dev.dev.parent; |
1018 int size; | |
1019 void *xlt; | 1124 void *xlt; |
1020 dma_addr_t dma; | |
1021 struct mlx5_umr_wr wr; 1022 struct ib_sge sg; 1023 int err = 0; 1024 int desc_size = (flags & MLX5_IB_UPD_XLT_INDIRECT) 1025 ? sizeof(struct mlx5_klm) 1026 : sizeof(struct mlx5_mtt); 1027 const int page_align = MLX5_UMR_MTT_ALIGNMENT / desc_size; 1028 const int page_mask = page_align - 1; 1029 size_t pages_mapped = 0; 1030 size_t pages_to_map = 0; | 1125 struct mlx5_umr_wr wr; 1126 struct ib_sge sg; 1127 int err = 0; 1128 int desc_size = (flags & MLX5_IB_UPD_XLT_INDIRECT) 1129 ? sizeof(struct mlx5_klm) 1130 : sizeof(struct mlx5_mtt); 1131 const int page_align = MLX5_UMR_MTT_ALIGNMENT / desc_size; 1132 const int page_mask = page_align - 1; 1133 size_t pages_mapped = 0; 1134 size_t pages_to_map = 0; |
1031 size_t pages_iter = 0; | 1135 size_t pages_iter; |
1032 size_t size_to_map = 0; | 1136 size_t size_to_map = 0; |
1033 gfp_t gfp; 1034 bool use_emergency_page = false; | 1137 size_t orig_sg_length; |
1035 1036 if ((flags & MLX5_IB_UPD_XLT_INDIRECT) && 1037 !umr_can_use_indirect_mkey(dev)) 1038 return -EPERM; 1039 1040 /* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes, 1041 * so we need to align the offset and length accordingly 1042 */ 1043 if (idx & page_mask) { 1044 npages += idx & page_mask; 1045 idx &= ~page_mask; 1046 } | 1138 1139 if ((flags & MLX5_IB_UPD_XLT_INDIRECT) && 1140 !umr_can_use_indirect_mkey(dev)) 1141 return -EPERM; 1142 1143 /* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes, 1144 * so we need to align the offset and length accordingly 1145 */ 1146 if (idx & page_mask) { 1147 npages += idx & page_mask; 1148 idx &= ~page_mask; 1149 } |
1047 1048 gfp = flags & MLX5_IB_UPD_XLT_ATOMIC ? GFP_ATOMIC : GFP_KERNEL; 1049 gfp |= __GFP_ZERO | __GFP_NOWARN; 1050 | |
1051 pages_to_map = ALIGN(npages, page_align); | 1150 pages_to_map = ALIGN(npages, page_align); |
1052 size = desc_size * pages_to_map; 1053 size = min_t(int, size, MLX5_MAX_UMR_CHUNK); | |
1054 | 1151 |
1055 xlt = (void *)__get_free_pages(gfp, get_order(size)); 1056 if (!xlt && size > MLX5_SPARE_UMR_CHUNK) { 1057 mlx5_ib_dbg(dev, "Failed to allocate %d bytes of order %d. fallback to spare UMR allocation od %d bytes\n", 1058 size, get_order(size), MLX5_SPARE_UMR_CHUNK); | 1152 xlt = mlx5_ib_create_xlt_wr(mr, &wr, &sg, npages, desc_size, flags); 1153 if (!xlt) 1154 return -ENOMEM; 1155 pages_iter = sg.length / desc_size; 1156 orig_sg_length = sg.length; |
1059 | 1157 |
1060 size = MLX5_SPARE_UMR_CHUNK; 1061 xlt = (void *)__get_free_pages(gfp, get_order(size)); 1062 } 1063 1064 if (!xlt) { 1065 mlx5_ib_warn(dev, "Using XLT emergency buffer\n"); 1066 xlt = (void *)mlx5_ib_get_xlt_emergency_page(); 1067 size = PAGE_SIZE; 1068 memset(xlt, 0, size); 1069 use_emergency_page = true; 1070 } 1071 pages_iter = size / desc_size; 1072 dma = dma_map_single(ddev, xlt, size, DMA_TO_DEVICE); 1073 if (dma_mapping_error(ddev, dma)) { 1074 mlx5_ib_err(dev, "unable to map DMA during XLT update.\n"); 1075 err = -ENOMEM; 1076 goto free_xlt; 1077 } 1078 | |
1079 if (mr->umem->is_odp) { 1080 if (!(flags & MLX5_IB_UPD_XLT_INDIRECT)) { 1081 struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem); 1082 size_t max_pages = ib_umem_odp_num_pages(odp) - idx; 1083 1084 pages_to_map = min_t(size_t, pages_to_map, max_pages); 1085 } 1086 } 1087 | 1158 if (mr->umem->is_odp) { 1159 if (!(flags & MLX5_IB_UPD_XLT_INDIRECT)) { 1160 struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem); 1161 size_t max_pages = ib_umem_odp_num_pages(odp) - idx; 1162 1163 pages_to_map = min_t(size_t, pages_to_map, max_pages); 1164 } 1165 } 1166 |
1088 sg.addr = dma; 1089 sg.lkey = dev->umrc.pd->local_dma_lkey; 1090 1091 memset(&wr, 0, sizeof(wr)); 1092 wr.wr.send_flags = MLX5_IB_SEND_UMR_UPDATE_XLT; 1093 if (!(flags & MLX5_IB_UPD_XLT_ENABLE)) 1094 wr.wr.send_flags |= MLX5_IB_SEND_UMR_FAIL_IF_FREE; 1095 wr.wr.sg_list = &sg; 1096 wr.wr.num_sge = 1; 1097 wr.wr.opcode = MLX5_IB_WR_UMR; 1098 1099 wr.pd = mr->ibmr.pd; 1100 wr.mkey = mr->mmkey.key; 1101 wr.length = mr->mmkey.size; 1102 wr.virt_addr = mr->mmkey.iova; 1103 wr.access_flags = mr->access_flags; | |
1104 wr.page_shift = page_shift; 1105 1106 for (pages_mapped = 0; 1107 pages_mapped < pages_to_map && !err; 1108 pages_mapped += pages_iter, idx += pages_iter) { 1109 npages = min_t(int, pages_iter, pages_to_map - pages_mapped); 1110 size_to_map = npages * desc_size; | 1167 wr.page_shift = page_shift; 1168 1169 for (pages_mapped = 0; 1170 pages_mapped < pages_to_map && !err; 1171 pages_mapped += pages_iter, idx += pages_iter) { 1172 npages = min_t(int, pages_iter, pages_to_map - pages_mapped); 1173 size_to_map = npages * desc_size; |
1111 dma_sync_single_for_cpu(ddev, dma, size, DMA_TO_DEVICE); | 1174 dma_sync_single_for_cpu(ddev, sg.addr, sg.length, 1175 DMA_TO_DEVICE); |
1112 if (mr->umem->is_odp) { 1113 mlx5_odp_populate_xlt(xlt, idx, npages, mr, flags); 1114 } else { 1115 __mlx5_ib_populate_pas(dev, mr->umem, page_shift, idx, 1116 npages, xlt, 1117 MLX5_IB_MTT_PRESENT); 1118 /* Clear padding after the pages 1119 * brought from the umem. 1120 */ | 1176 if (mr->umem->is_odp) { 1177 mlx5_odp_populate_xlt(xlt, idx, npages, mr, flags); 1178 } else { 1179 __mlx5_ib_populate_pas(dev, mr->umem, page_shift, idx, 1180 npages, xlt, 1181 MLX5_IB_MTT_PRESENT); 1182 /* Clear padding after the pages 1183 * brought from the umem. 1184 */ |
1121 memset(xlt + size_to_map, 0, size - size_to_map); | 1185 memset(xlt + size_to_map, 0, sg.length - size_to_map); |
1122 } | 1186 } |
1123 dma_sync_single_for_device(ddev, dma, size, DMA_TO_DEVICE); | 1187 dma_sync_single_for_device(ddev, sg.addr, sg.length, 1188 DMA_TO_DEVICE); |
1124 1125 sg.length = ALIGN(size_to_map, MLX5_UMR_MTT_ALIGNMENT); 1126 1127 if (pages_mapped + pages_iter >= pages_to_map) { 1128 if (flags & MLX5_IB_UPD_XLT_ENABLE) 1129 wr.wr.send_flags |= 1130 MLX5_IB_SEND_UMR_ENABLE_MR | 1131 MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS | --- 7 unchanged lines hidden (view full) --- 1139 MLX5_IB_SEND_UMR_UPDATE_TRANSLATION; 1140 } 1141 1142 wr.offset = idx * desc_size; 1143 wr.xlt_size = sg.length; 1144 1145 err = mlx5_ib_post_send_wait(dev, &wr); 1146 } | 1189 1190 sg.length = ALIGN(size_to_map, MLX5_UMR_MTT_ALIGNMENT); 1191 1192 if (pages_mapped + pages_iter >= pages_to_map) { 1193 if (flags & MLX5_IB_UPD_XLT_ENABLE) 1194 wr.wr.send_flags |= 1195 MLX5_IB_SEND_UMR_ENABLE_MR | 1196 MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS | --- 7 unchanged lines hidden (view full) --- 1204 MLX5_IB_SEND_UMR_UPDATE_TRANSLATION; 1205 } 1206 1207 wr.offset = idx * desc_size; 1208 wr.xlt_size = sg.length; 1209 1210 err = mlx5_ib_post_send_wait(dev, &wr); 1211 } |
1147 dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE); 1148 1149free_xlt: 1150 if (use_emergency_page) 1151 mlx5_ib_put_xlt_emergency_page(); 1152 else 1153 free_pages((unsigned long)xlt, get_order(size)); 1154 | 1212 sg.length = orig_sg_length; 1213 mlx5_ib_unmap_free_xlt(dev, xlt, &sg); |
1155 return err; 1156} 1157 1158/* 1159 * If ibmr is NULL it will be allocated by reg_create. 1160 * Else, the given ibmr will be used. 1161 */ 1162static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd, --- 1266 unchanged lines hidden --- | 1214 return err; 1215} 1216 1217/* 1218 * If ibmr is NULL it will be allocated by reg_create. 1219 * Else, the given ibmr will be used. 1220 */ 1221static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd, --- 1266 unchanged lines hidden --- |