1 /* 2 * Copyright (c) 2017, Mellanox Technologies. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 */ 32 33 #include <linux/etherdevice.h> 34 #include <linux/mlx5/driver.h> 35 36 #include "mlx5_core.h" 37 #include "lib/mlx5.h" 38 #include "lib/eq.h" 39 #include "fpga/core.h" 40 #include "fpga/conn.h" 41 42 static const char *const mlx5_fpga_error_strings[] = { 43 "Null Syndrome", 44 "Corrupted DDR", 45 "Flash Timeout", 46 "Internal Link Error", 47 "Watchdog HW Failure", 48 "I2C Failure", 49 "Image Changed", 50 "Temperature Critical", 51 }; 52 53 static const char * const mlx5_fpga_qp_error_strings[] = { 54 "Null Syndrome", 55 "Retry Counter Expired", 56 "RNR Expired", 57 }; 58 static struct mlx5_fpga_device *mlx5_fpga_device_alloc(void) 59 { 60 struct mlx5_fpga_device *fdev = NULL; 61 62 fdev = kzalloc(sizeof(*fdev), GFP_KERNEL); 63 if (!fdev) 64 return NULL; 65 66 spin_lock_init(&fdev->state_lock); 67 fdev->state = MLX5_FPGA_STATUS_NONE; 68 return fdev; 69 } 70 71 static const char *mlx5_fpga_image_name(enum mlx5_fpga_image image) 72 { 73 switch (image) { 74 case MLX5_FPGA_IMAGE_USER: 75 return "user"; 76 case MLX5_FPGA_IMAGE_FACTORY: 77 return "factory"; 78 default: 79 return "unknown"; 80 } 81 } 82 83 static const char *mlx5_fpga_name(u32 fpga_id) 84 { 85 static char ret[32]; 86 87 switch (fpga_id) { 88 case MLX5_FPGA_NEWTON: 89 return "Newton"; 90 case MLX5_FPGA_EDISON: 91 return "Edison"; 92 case MLX5_FPGA_MORSE: 93 return "Morse"; 94 case MLX5_FPGA_MORSEQ: 95 return "MorseQ"; 96 } 97 98 snprintf(ret, sizeof(ret), "Unknown %d", fpga_id); 99 return ret; 100 } 101 102 static int mlx5_is_fpga_lookaside(u32 fpga_id) 103 { 104 return fpga_id != MLX5_FPGA_NEWTON && fpga_id != MLX5_FPGA_EDISON; 105 } 106 107 static int mlx5_fpga_device_load_check(struct mlx5_fpga_device *fdev) 108 { 109 struct mlx5_fpga_query query; 110 int err; 111 112 err = mlx5_fpga_query(fdev->mdev, &query); 113 if (err) { 114 mlx5_fpga_err(fdev, "Failed to query status: %d\n", err); 115 return err; 116 } 117 118 fdev->last_admin_image = query.admin_image; 119 fdev->last_oper_image = query.oper_image; 120 121 mlx5_fpga_info(fdev, "Status %u; Admin image %u; Oper image %u\n", 122 query.status, query.admin_image, query.oper_image); 123 124 /* for FPGA lookaside projects FPGA load status is not important */ 125 if (mlx5_is_fpga_lookaside(MLX5_CAP_FPGA(fdev->mdev, fpga_id))) 126 return 0; 127 128 if (query.status != MLX5_FPGA_STATUS_SUCCESS) { 129 mlx5_fpga_err(fdev, "%s image failed to load; status %u\n", 130 mlx5_fpga_image_name(fdev->last_oper_image), 131 query.status); 132 return -EIO; 133 } 134 135 return 0; 136 } 137 138 static int mlx5_fpga_device_brb(struct mlx5_fpga_device *fdev) 139 { 140 int err; 141 struct mlx5_core_dev *mdev = fdev->mdev; 142 143 err = mlx5_fpga_ctrl_op(mdev, MLX5_FPGA_CTRL_OPERATION_SANDBOX_BYPASS_ON); 144 if (err) { 145 mlx5_fpga_err(fdev, "Failed to set bypass on: %d\n", err); 146 return err; 147 } 148 err = mlx5_fpga_ctrl_op(mdev, MLX5_FPGA_CTRL_OPERATION_RESET_SANDBOX); 149 if (err) { 150 mlx5_fpga_err(fdev, "Failed to reset SBU: %d\n", err); 151 return err; 152 } 153 err = mlx5_fpga_ctrl_op(mdev, MLX5_FPGA_CTRL_OPERATION_SANDBOX_BYPASS_OFF); 154 if (err) { 155 mlx5_fpga_err(fdev, "Failed to set bypass off: %d\n", err); 156 return err; 157 } 158 return 0; 159 } 160 161 static int mlx5_fpga_event(struct mlx5_fpga_device *, unsigned long, void *); 162 163 static int fpga_err_event(struct notifier_block *nb, unsigned long event, void *eqe) 164 { 165 struct mlx5_fpga_device *fdev = mlx5_nb_cof(nb, struct mlx5_fpga_device, fpga_err_nb); 166 167 return mlx5_fpga_event(fdev, event, eqe); 168 } 169 170 static int fpga_qp_err_event(struct notifier_block *nb, unsigned long event, void *eqe) 171 { 172 struct mlx5_fpga_device *fdev = mlx5_nb_cof(nb, struct mlx5_fpga_device, fpga_qp_err_nb); 173 174 return mlx5_fpga_event(fdev, event, eqe); 175 } 176 177 int mlx5_fpga_device_start(struct mlx5_core_dev *mdev) 178 { 179 struct mlx5_fpga_device *fdev = mdev->fpga; 180 unsigned int max_num_qps; 181 unsigned long flags; 182 u32 fpga_id; 183 int err; 184 185 if (!fdev) 186 return 0; 187 188 err = mlx5_fpga_caps(fdev->mdev); 189 if (err) 190 goto out; 191 192 err = mlx5_fpga_device_load_check(fdev); 193 if (err) 194 goto out; 195 196 fpga_id = MLX5_CAP_FPGA(fdev->mdev, fpga_id); 197 mlx5_fpga_info(fdev, "FPGA card %s:%u\n", mlx5_fpga_name(fpga_id), fpga_id); 198 199 /* No QPs if FPGA does not participate in net processing */ 200 if (mlx5_is_fpga_lookaside(fpga_id)) 201 goto out; 202 203 mlx5_fpga_info(fdev, "%s(%d): image, version %u; SBU %06x:%04x version %d\n", 204 mlx5_fpga_image_name(fdev->last_oper_image), 205 fdev->last_oper_image, 206 MLX5_CAP_FPGA(fdev->mdev, image_version), 207 MLX5_CAP_FPGA(fdev->mdev, ieee_vendor_id), 208 MLX5_CAP_FPGA(fdev->mdev, sandbox_product_id), 209 MLX5_CAP_FPGA(fdev->mdev, sandbox_product_version)); 210 211 max_num_qps = MLX5_CAP_FPGA(mdev, shell_caps.max_num_qps); 212 if (!max_num_qps) { 213 mlx5_fpga_err(fdev, "FPGA reports 0 QPs in SHELL_CAPS\n"); 214 err = -ENOTSUPP; 215 goto out; 216 } 217 218 err = mlx5_core_reserve_gids(mdev, max_num_qps); 219 if (err) 220 goto out; 221 222 MLX5_NB_INIT(&fdev->fpga_err_nb, fpga_err_event, FPGA_ERROR); 223 MLX5_NB_INIT(&fdev->fpga_qp_err_nb, fpga_qp_err_event, FPGA_QP_ERROR); 224 mlx5_eq_notifier_register(fdev->mdev, &fdev->fpga_err_nb); 225 mlx5_eq_notifier_register(fdev->mdev, &fdev->fpga_qp_err_nb); 226 227 err = mlx5_fpga_conn_device_init(fdev); 228 if (err) 229 goto err_rsvd_gid; 230 231 if (fdev->last_oper_image == MLX5_FPGA_IMAGE_USER) { 232 err = mlx5_fpga_device_brb(fdev); 233 if (err) 234 goto err_conn_init; 235 } 236 237 goto out; 238 239 err_conn_init: 240 mlx5_fpga_conn_device_cleanup(fdev); 241 242 err_rsvd_gid: 243 mlx5_eq_notifier_unregister(fdev->mdev, &fdev->fpga_err_nb); 244 mlx5_eq_notifier_unregister(fdev->mdev, &fdev->fpga_qp_err_nb); 245 mlx5_core_unreserve_gids(mdev, max_num_qps); 246 out: 247 spin_lock_irqsave(&fdev->state_lock, flags); 248 fdev->state = err ? MLX5_FPGA_STATUS_FAILURE : MLX5_FPGA_STATUS_SUCCESS; 249 spin_unlock_irqrestore(&fdev->state_lock, flags); 250 return err; 251 } 252 253 int mlx5_fpga_init(struct mlx5_core_dev *mdev) 254 { 255 struct mlx5_fpga_device *fdev = NULL; 256 257 if (!MLX5_CAP_GEN(mdev, fpga)) { 258 mlx5_core_dbg(mdev, "FPGA capability not present\n"); 259 return 0; 260 } 261 262 mlx5_core_dbg(mdev, "Initializing FPGA\n"); 263 264 fdev = mlx5_fpga_device_alloc(); 265 if (!fdev) 266 return -ENOMEM; 267 268 fdev->mdev = mdev; 269 mdev->fpga = fdev; 270 271 return 0; 272 } 273 274 void mlx5_fpga_device_stop(struct mlx5_core_dev *mdev) 275 { 276 struct mlx5_fpga_device *fdev = mdev->fpga; 277 unsigned int max_num_qps; 278 unsigned long flags; 279 int err; 280 281 if (!fdev) 282 return; 283 284 if (mlx5_is_fpga_lookaside(MLX5_CAP_FPGA(fdev->mdev, fpga_id))) 285 return; 286 287 spin_lock_irqsave(&fdev->state_lock, flags); 288 if (fdev->state != MLX5_FPGA_STATUS_SUCCESS) { 289 spin_unlock_irqrestore(&fdev->state_lock, flags); 290 return; 291 } 292 fdev->state = MLX5_FPGA_STATUS_NONE; 293 spin_unlock_irqrestore(&fdev->state_lock, flags); 294 295 if (fdev->last_oper_image == MLX5_FPGA_IMAGE_USER) { 296 err = mlx5_fpga_ctrl_op(mdev, MLX5_FPGA_CTRL_OPERATION_SANDBOX_BYPASS_ON); 297 if (err) 298 mlx5_fpga_err(fdev, "Failed to re-set SBU bypass on: %d\n", 299 err); 300 } 301 302 mlx5_fpga_conn_device_cleanup(fdev); 303 mlx5_eq_notifier_unregister(fdev->mdev, &fdev->fpga_err_nb); 304 mlx5_eq_notifier_unregister(fdev->mdev, &fdev->fpga_qp_err_nb); 305 306 max_num_qps = MLX5_CAP_FPGA(mdev, shell_caps.max_num_qps); 307 mlx5_core_unreserve_gids(mdev, max_num_qps); 308 } 309 310 void mlx5_fpga_cleanup(struct mlx5_core_dev *mdev) 311 { 312 struct mlx5_fpga_device *fdev = mdev->fpga; 313 314 mlx5_fpga_device_stop(mdev); 315 kfree(fdev); 316 mdev->fpga = NULL; 317 } 318 319 static const char *mlx5_fpga_syndrome_to_string(u8 syndrome) 320 { 321 if (syndrome < ARRAY_SIZE(mlx5_fpga_error_strings)) 322 return mlx5_fpga_error_strings[syndrome]; 323 return "Unknown"; 324 } 325 326 static const char *mlx5_fpga_qp_syndrome_to_string(u8 syndrome) 327 { 328 if (syndrome < ARRAY_SIZE(mlx5_fpga_qp_error_strings)) 329 return mlx5_fpga_qp_error_strings[syndrome]; 330 return "Unknown"; 331 } 332 333 static int mlx5_fpga_event(struct mlx5_fpga_device *fdev, 334 unsigned long event, void *eqe) 335 { 336 void *data = ((struct mlx5_eqe *)eqe)->data.raw; 337 const char *event_name; 338 bool teardown = false; 339 unsigned long flags; 340 u8 syndrome; 341 342 switch (event) { 343 case MLX5_EVENT_TYPE_FPGA_ERROR: 344 syndrome = MLX5_GET(fpga_error_event, data, syndrome); 345 event_name = mlx5_fpga_syndrome_to_string(syndrome); 346 break; 347 case MLX5_EVENT_TYPE_FPGA_QP_ERROR: 348 syndrome = MLX5_GET(fpga_qp_error_event, data, syndrome); 349 event_name = mlx5_fpga_qp_syndrome_to_string(syndrome); 350 break; 351 default: 352 return NOTIFY_DONE; 353 } 354 355 spin_lock_irqsave(&fdev->state_lock, flags); 356 switch (fdev->state) { 357 case MLX5_FPGA_STATUS_SUCCESS: 358 mlx5_fpga_warn(fdev, "Error %u: %s\n", syndrome, event_name); 359 teardown = true; 360 break; 361 default: 362 mlx5_fpga_warn_ratelimited(fdev, "Unexpected error event %u: %s\n", 363 syndrome, event_name); 364 } 365 spin_unlock_irqrestore(&fdev->state_lock, flags); 366 /* We tear-down the card's interfaces and functionality because 367 * the FPGA bump-on-the-wire is misbehaving and we lose ability 368 * to communicate with the network. User may still be able to 369 * recover by re-programming or debugging the FPGA 370 */ 371 if (teardown) 372 mlx5_trigger_health_work(fdev->mdev); 373 374 return NOTIFY_OK; 375 } 376