1 /* 2 * Copyright (c) 2016 Hisilicon Limited. 3 * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved. 4 * 5 * This software is available to you under a choice of one of two 6 * licenses. You may choose to be licensed under the terms of the GNU 7 * General Public License (GPL) Version 2, available from the file 8 * COPYING in the main directory of this source tree, or the 9 * OpenIB.org BSD license below: 10 * 11 * Redistribution and use in source and binary forms, with or 12 * without modification, are permitted provided that the following 13 * conditions are met: 14 * 15 * - Redistributions of source code must retain the above 16 * copyright notice, this list of conditions and the following 17 * disclaimer. 18 * 19 * - Redistributions in binary form must reproduce the above 20 * copyright notice, this list of conditions and the following 21 * disclaimer in the documentation and/or other materials 22 * provided with the distribution. 23 * 24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 * SOFTWARE. 32 */ 33 34 #include <linux/platform_device.h> 35 #include "hns_roce_device.h" 36 #include "hns_roce_hem.h" 37 #include "hns_roce_common.h" 38 39 #define HNS_ROCE_HEM_ALLOC_SIZE (1 << 17) 40 #define HNS_ROCE_TABLE_CHUNK_SIZE (1 << 17) 41 42 #define DMA_ADDR_T_SHIFT 12 43 #define BT_BA_SHIFT 32 44 45 struct hns_roce_hem *hns_roce_alloc_hem(struct hns_roce_dev *hr_dev, int npages, 46 gfp_t gfp_mask) 47 { 48 struct hns_roce_hem_chunk *chunk = NULL; 49 struct hns_roce_hem *hem; 50 struct scatterlist *mem; 51 int order; 52 void *buf; 53 54 WARN_ON(gfp_mask & __GFP_HIGHMEM); 55 56 hem = kmalloc(sizeof(*hem), 57 gfp_mask & ~(__GFP_HIGHMEM | __GFP_NOWARN)); 58 if (!hem) 59 return NULL; 60 61 hem->refcount = 0; 62 INIT_LIST_HEAD(&hem->chunk_list); 63 64 order = get_order(HNS_ROCE_HEM_ALLOC_SIZE); 65 66 while (npages > 0) { 67 if (!chunk) { 68 chunk = kmalloc(sizeof(*chunk), 69 gfp_mask & ~(__GFP_HIGHMEM | __GFP_NOWARN)); 70 if (!chunk) 71 goto fail; 72 73 sg_init_table(chunk->mem, HNS_ROCE_HEM_CHUNK_LEN); 74 chunk->npages = 0; 75 chunk->nsg = 0; 76 list_add_tail(&chunk->list, &hem->chunk_list); 77 } 78 79 while (1 << order > npages) 80 --order; 81 82 /* 83 * Alloc memory one time. If failed, don't alloc small block 84 * memory, directly return fail. 85 */ 86 mem = &chunk->mem[chunk->npages]; 87 buf = dma_alloc_coherent(&hr_dev->pdev->dev, PAGE_SIZE << order, 88 &sg_dma_address(mem), gfp_mask); 89 if (!buf) 90 goto fail; 91 92 sg_set_buf(mem, buf, PAGE_SIZE << order); 93 WARN_ON(mem->offset); 94 sg_dma_len(mem) = PAGE_SIZE << order; 95 96 ++chunk->npages; 97 ++chunk->nsg; 98 npages -= 1 << order; 99 } 100 101 return hem; 102 103 fail: 104 hns_roce_free_hem(hr_dev, hem); 105 return NULL; 106 } 107 108 void hns_roce_free_hem(struct hns_roce_dev *hr_dev, struct hns_roce_hem *hem) 109 { 110 struct hns_roce_hem_chunk *chunk, *tmp; 111 int i; 112 113 if (!hem) 114 return; 115 116 list_for_each_entry_safe(chunk, tmp, &hem->chunk_list, list) { 117 for (i = 0; i < chunk->npages; ++i) 118 dma_free_coherent(&hr_dev->pdev->dev, 119 chunk->mem[i].length, 120 lowmem_page_address(sg_page(&chunk->mem[i])), 121 sg_dma_address(&chunk->mem[i])); 122 kfree(chunk); 123 } 124 125 kfree(hem); 126 } 127 128 static int hns_roce_set_hem(struct hns_roce_dev *hr_dev, 129 struct hns_roce_hem_table *table, unsigned long obj) 130 { 131 struct device *dev = &hr_dev->pdev->dev; 132 spinlock_t *lock = &hr_dev->bt_cmd_lock; 133 unsigned long end = 0; 134 unsigned long flags; 135 struct hns_roce_hem_iter iter; 136 void __iomem *bt_cmd; 137 u32 bt_cmd_h_val = 0; 138 u32 bt_cmd_val[2]; 139 u32 bt_cmd_l = 0; 140 u64 bt_ba = 0; 141 int ret = 0; 142 143 /* Find the HEM(Hardware Entry Memory) entry */ 144 unsigned long i = (obj & (table->num_obj - 1)) / 145 (HNS_ROCE_TABLE_CHUNK_SIZE / table->obj_size); 146 147 switch (table->type) { 148 case HEM_TYPE_QPC: 149 roce_set_field(bt_cmd_h_val, ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_M, 150 ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_S, HEM_TYPE_QPC); 151 break; 152 case HEM_TYPE_MTPT: 153 roce_set_field(bt_cmd_h_val, ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_M, 154 ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_S, 155 HEM_TYPE_MTPT); 156 break; 157 case HEM_TYPE_CQC: 158 roce_set_field(bt_cmd_h_val, ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_M, 159 ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_S, HEM_TYPE_CQC); 160 break; 161 case HEM_TYPE_SRQC: 162 roce_set_field(bt_cmd_h_val, ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_M, 163 ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_S, 164 HEM_TYPE_SRQC); 165 break; 166 default: 167 return ret; 168 } 169 roce_set_field(bt_cmd_h_val, ROCEE_BT_CMD_H_ROCEE_BT_CMD_IN_MDF_M, 170 ROCEE_BT_CMD_H_ROCEE_BT_CMD_IN_MDF_S, obj); 171 roce_set_bit(bt_cmd_h_val, ROCEE_BT_CMD_H_ROCEE_BT_CMD_S, 0); 172 roce_set_bit(bt_cmd_h_val, ROCEE_BT_CMD_H_ROCEE_BT_CMD_HW_SYNS_S, 1); 173 174 /* Currently iter only a chunk */ 175 for (hns_roce_hem_first(table->hem[i], &iter); 176 !hns_roce_hem_last(&iter); hns_roce_hem_next(&iter)) { 177 bt_ba = hns_roce_hem_addr(&iter) >> DMA_ADDR_T_SHIFT; 178 179 spin_lock_irqsave(lock, flags); 180 181 bt_cmd = hr_dev->reg_base + ROCEE_BT_CMD_H_REG; 182 183 end = msecs_to_jiffies(HW_SYNC_TIMEOUT_MSECS) + jiffies; 184 while (1) { 185 if (readl(bt_cmd) >> BT_CMD_SYNC_SHIFT) { 186 if (!(time_before(jiffies, end))) { 187 dev_err(dev, "Write bt_cmd err,hw_sync is not zero.\n"); 188 spin_unlock_irqrestore(lock, flags); 189 return -EBUSY; 190 } 191 } else { 192 break; 193 } 194 msleep(HW_SYNC_SLEEP_TIME_INTERVAL); 195 } 196 197 bt_cmd_l = (u32)bt_ba; 198 roce_set_field(bt_cmd_h_val, ROCEE_BT_CMD_H_ROCEE_BT_CMD_BA_H_M, 199 ROCEE_BT_CMD_H_ROCEE_BT_CMD_BA_H_S, 200 bt_ba >> BT_BA_SHIFT); 201 202 bt_cmd_val[0] = bt_cmd_l; 203 bt_cmd_val[1] = bt_cmd_h_val; 204 hns_roce_write64_k(bt_cmd_val, 205 hr_dev->reg_base + ROCEE_BT_CMD_L_REG); 206 spin_unlock_irqrestore(lock, flags); 207 } 208 209 return ret; 210 } 211 212 int hns_roce_table_get(struct hns_roce_dev *hr_dev, 213 struct hns_roce_hem_table *table, unsigned long obj) 214 { 215 struct device *dev = &hr_dev->pdev->dev; 216 int ret = 0; 217 unsigned long i; 218 219 i = (obj & (table->num_obj - 1)) / (HNS_ROCE_TABLE_CHUNK_SIZE / 220 table->obj_size); 221 222 mutex_lock(&table->mutex); 223 224 if (table->hem[i]) { 225 ++table->hem[i]->refcount; 226 goto out; 227 } 228 229 table->hem[i] = hns_roce_alloc_hem(hr_dev, 230 HNS_ROCE_TABLE_CHUNK_SIZE >> PAGE_SHIFT, 231 (table->lowmem ? GFP_KERNEL : 232 GFP_HIGHUSER) | __GFP_NOWARN); 233 if (!table->hem[i]) { 234 ret = -ENOMEM; 235 goto out; 236 } 237 238 /* Set HEM base address(128K/page, pa) to Hardware */ 239 if (hns_roce_set_hem(hr_dev, table, obj)) { 240 ret = -ENODEV; 241 dev_err(dev, "set HEM base address to HW failed.\n"); 242 goto out; 243 } 244 245 ++table->hem[i]->refcount; 246 out: 247 mutex_unlock(&table->mutex); 248 return ret; 249 } 250 251 void hns_roce_table_put(struct hns_roce_dev *hr_dev, 252 struct hns_roce_hem_table *table, unsigned long obj) 253 { 254 struct device *dev = &hr_dev->pdev->dev; 255 unsigned long i; 256 257 i = (obj & (table->num_obj - 1)) / 258 (HNS_ROCE_TABLE_CHUNK_SIZE / table->obj_size); 259 260 mutex_lock(&table->mutex); 261 262 if (--table->hem[i]->refcount == 0) { 263 /* Clear HEM base address */ 264 if (hr_dev->hw->clear_hem(hr_dev, table, obj)) 265 dev_warn(dev, "Clear HEM base address failed.\n"); 266 267 hns_roce_free_hem(hr_dev, table->hem[i]); 268 table->hem[i] = NULL; 269 } 270 271 mutex_unlock(&table->mutex); 272 } 273 274 void *hns_roce_table_find(struct hns_roce_hem_table *table, unsigned long obj, 275 dma_addr_t *dma_handle) 276 { 277 struct hns_roce_hem_chunk *chunk; 278 unsigned long idx; 279 int i; 280 int offset, dma_offset; 281 struct hns_roce_hem *hem; 282 struct page *page = NULL; 283 284 if (!table->lowmem) 285 return NULL; 286 287 mutex_lock(&table->mutex); 288 idx = (obj & (table->num_obj - 1)) * table->obj_size; 289 hem = table->hem[idx / HNS_ROCE_TABLE_CHUNK_SIZE]; 290 dma_offset = offset = idx % HNS_ROCE_TABLE_CHUNK_SIZE; 291 292 if (!hem) 293 goto out; 294 295 list_for_each_entry(chunk, &hem->chunk_list, list) { 296 for (i = 0; i < chunk->npages; ++i) { 297 if (dma_handle && dma_offset >= 0) { 298 if (sg_dma_len(&chunk->mem[i]) > 299 (u32)dma_offset) 300 *dma_handle = sg_dma_address( 301 &chunk->mem[i]) + dma_offset; 302 dma_offset -= sg_dma_len(&chunk->mem[i]); 303 } 304 305 if (chunk->mem[i].length > (u32)offset) { 306 page = sg_page(&chunk->mem[i]); 307 goto out; 308 } 309 offset -= chunk->mem[i].length; 310 } 311 } 312 313 out: 314 mutex_unlock(&table->mutex); 315 return page ? lowmem_page_address(page) + offset : NULL; 316 } 317 318 int hns_roce_table_get_range(struct hns_roce_dev *hr_dev, 319 struct hns_roce_hem_table *table, 320 unsigned long start, unsigned long end) 321 { 322 unsigned long inc = HNS_ROCE_TABLE_CHUNK_SIZE / table->obj_size; 323 unsigned long i = 0; 324 int ret = 0; 325 326 /* Allocate MTT entry memory according to chunk(128K) */ 327 for (i = start; i <= end; i += inc) { 328 ret = hns_roce_table_get(hr_dev, table, i); 329 if (ret) 330 goto fail; 331 } 332 333 return 0; 334 335 fail: 336 while (i > start) { 337 i -= inc; 338 hns_roce_table_put(hr_dev, table, i); 339 } 340 return ret; 341 } 342 343 void hns_roce_table_put_range(struct hns_roce_dev *hr_dev, 344 struct hns_roce_hem_table *table, 345 unsigned long start, unsigned long end) 346 { 347 unsigned long i; 348 349 for (i = start; i <= end; 350 i += HNS_ROCE_TABLE_CHUNK_SIZE / table->obj_size) 351 hns_roce_table_put(hr_dev, table, i); 352 } 353 354 int hns_roce_init_hem_table(struct hns_roce_dev *hr_dev, 355 struct hns_roce_hem_table *table, u32 type, 356 unsigned long obj_size, unsigned long nobj, 357 int use_lowmem) 358 { 359 unsigned long obj_per_chunk; 360 unsigned long num_hem; 361 362 obj_per_chunk = HNS_ROCE_TABLE_CHUNK_SIZE / obj_size; 363 num_hem = (nobj + obj_per_chunk - 1) / obj_per_chunk; 364 365 table->hem = kcalloc(num_hem, sizeof(*table->hem), GFP_KERNEL); 366 if (!table->hem) 367 return -ENOMEM; 368 369 table->type = type; 370 table->num_hem = num_hem; 371 table->num_obj = nobj; 372 table->obj_size = obj_size; 373 table->lowmem = use_lowmem; 374 mutex_init(&table->mutex); 375 376 return 0; 377 } 378 379 void hns_roce_cleanup_hem_table(struct hns_roce_dev *hr_dev, 380 struct hns_roce_hem_table *table) 381 { 382 struct device *dev = &hr_dev->pdev->dev; 383 unsigned long i; 384 385 for (i = 0; i < table->num_hem; ++i) 386 if (table->hem[i]) { 387 if (hr_dev->hw->clear_hem(hr_dev, table, 388 i * HNS_ROCE_TABLE_CHUNK_SIZE / table->obj_size)) 389 dev_err(dev, "Clear HEM base address failed.\n"); 390 391 hns_roce_free_hem(hr_dev, table->hem[i]); 392 } 393 394 kfree(table->hem); 395 } 396 397 void hns_roce_cleanup_hem(struct hns_roce_dev *hr_dev) 398 { 399 hns_roce_cleanup_hem_table(hr_dev, &hr_dev->cq_table.table); 400 hns_roce_cleanup_hem_table(hr_dev, &hr_dev->qp_table.irrl_table); 401 hns_roce_cleanup_hem_table(hr_dev, &hr_dev->qp_table.qp_table); 402 hns_roce_cleanup_hem_table(hr_dev, &hr_dev->mr_table.mtpt_table); 403 hns_roce_cleanup_hem_table(hr_dev, &hr_dev->mr_table.mtt_table); 404 } 405