1 /*
2 * Copyright (c) 2017, Mellanox Technologies. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33 #include <linux/etherdevice.h>
34 #include <linux/mlx5/driver.h>
35
36 #include "mlx5_core.h"
37 #include "lib/mlx5.h"
38 #include "lib/eq.h"
39 #include "fpga/core.h"
40 #include "fpga/conn.h"
41
42 static const char *const mlx5_fpga_error_strings[] = {
43 "Null Syndrome",
44 "Corrupted DDR",
45 "Flash Timeout",
46 "Internal Link Error",
47 "Watchdog HW Failure",
48 "I2C Failure",
49 "Image Changed",
50 "Temperature Critical",
51 };
52
53 static const char * const mlx5_fpga_qp_error_strings[] = {
54 "Null Syndrome",
55 "Retry Counter Expired",
56 "RNR Expired",
57 };
mlx5_fpga_device_alloc(void)58 static struct mlx5_fpga_device *mlx5_fpga_device_alloc(void)
59 {
60 struct mlx5_fpga_device *fdev;
61
62 fdev = kzalloc(sizeof(*fdev), GFP_KERNEL);
63 if (!fdev)
64 return NULL;
65
66 spin_lock_init(&fdev->state_lock);
67 fdev->state = MLX5_FPGA_STATUS_NONE;
68 return fdev;
69 }
70
mlx5_fpga_image_name(enum mlx5_fpga_image image)71 static const char *mlx5_fpga_image_name(enum mlx5_fpga_image image)
72 {
73 switch (image) {
74 case MLX5_FPGA_IMAGE_USER:
75 return "user";
76 case MLX5_FPGA_IMAGE_FACTORY:
77 return "factory";
78 default:
79 return "unknown";
80 }
81 }
82
mlx5_fpga_name(u32 fpga_id)83 static const char *mlx5_fpga_name(u32 fpga_id)
84 {
85 static char ret[32];
86
87 switch (fpga_id) {
88 case MLX5_FPGA_NEWTON:
89 return "Newton";
90 case MLX5_FPGA_EDISON:
91 return "Edison";
92 case MLX5_FPGA_MORSE:
93 return "Morse";
94 case MLX5_FPGA_MORSEQ:
95 return "MorseQ";
96 }
97
98 snprintf(ret, sizeof(ret), "Unknown %d", fpga_id);
99 return ret;
100 }
101
mlx5_is_fpga_lookaside(u32 fpga_id)102 static int mlx5_is_fpga_lookaside(u32 fpga_id)
103 {
104 return fpga_id != MLX5_FPGA_NEWTON && fpga_id != MLX5_FPGA_EDISON;
105 }
106
mlx5_fpga_device_load_check(struct mlx5_fpga_device * fdev)107 static int mlx5_fpga_device_load_check(struct mlx5_fpga_device *fdev)
108 {
109 struct mlx5_fpga_query query;
110 int err;
111
112 err = mlx5_fpga_query(fdev->mdev, &query);
113 if (err) {
114 mlx5_fpga_err(fdev, "Failed to query status: %d\n", err);
115 return err;
116 }
117
118 fdev->last_admin_image = query.admin_image;
119 fdev->last_oper_image = query.oper_image;
120
121 mlx5_fpga_info(fdev, "Status %u; Admin image %u; Oper image %u\n",
122 query.status, query.admin_image, query.oper_image);
123
124 /* for FPGA lookaside projects FPGA load status is not important */
125 if (mlx5_is_fpga_lookaside(MLX5_CAP_FPGA(fdev->mdev, fpga_id)))
126 return 0;
127
128 if (query.status != MLX5_FPGA_STATUS_SUCCESS) {
129 mlx5_fpga_err(fdev, "%s image failed to load; status %u\n",
130 mlx5_fpga_image_name(fdev->last_oper_image),
131 query.status);
132 return -EIO;
133 }
134
135 return 0;
136 }
137
mlx5_fpga_device_brb(struct mlx5_fpga_device * fdev)138 static int mlx5_fpga_device_brb(struct mlx5_fpga_device *fdev)
139 {
140 int err;
141 struct mlx5_core_dev *mdev = fdev->mdev;
142
143 err = mlx5_fpga_ctrl_op(mdev, MLX5_FPGA_CTRL_OPERATION_SANDBOX_BYPASS_ON);
144 if (err) {
145 mlx5_fpga_err(fdev, "Failed to set bypass on: %d\n", err);
146 return err;
147 }
148 err = mlx5_fpga_ctrl_op(mdev, MLX5_FPGA_CTRL_OPERATION_RESET_SANDBOX);
149 if (err) {
150 mlx5_fpga_err(fdev, "Failed to reset SBU: %d\n", err);
151 return err;
152 }
153 err = mlx5_fpga_ctrl_op(mdev, MLX5_FPGA_CTRL_OPERATION_SANDBOX_BYPASS_OFF);
154 if (err) {
155 mlx5_fpga_err(fdev, "Failed to set bypass off: %d\n", err);
156 return err;
157 }
158 return 0;
159 }
160
161 static int mlx5_fpga_event(struct mlx5_fpga_device *, unsigned long, void *);
162
fpga_err_event(struct notifier_block * nb,unsigned long event,void * eqe)163 static int fpga_err_event(struct notifier_block *nb, unsigned long event, void *eqe)
164 {
165 struct mlx5_fpga_device *fdev = mlx5_nb_cof(nb, struct mlx5_fpga_device, fpga_err_nb);
166
167 return mlx5_fpga_event(fdev, event, eqe);
168 }
169
fpga_qp_err_event(struct notifier_block * nb,unsigned long event,void * eqe)170 static int fpga_qp_err_event(struct notifier_block *nb, unsigned long event, void *eqe)
171 {
172 struct mlx5_fpga_device *fdev = mlx5_nb_cof(nb, struct mlx5_fpga_device, fpga_qp_err_nb);
173
174 return mlx5_fpga_event(fdev, event, eqe);
175 }
176
mlx5_fpga_device_start(struct mlx5_core_dev * mdev)177 int mlx5_fpga_device_start(struct mlx5_core_dev *mdev)
178 {
179 struct mlx5_fpga_device *fdev = mdev->fpga;
180 unsigned int max_num_qps;
181 unsigned long flags;
182 u32 fpga_id;
183 int err;
184
185 if (!fdev)
186 return 0;
187
188 err = mlx5_fpga_caps(fdev->mdev);
189 if (err)
190 goto out;
191
192 err = mlx5_fpga_device_load_check(fdev);
193 if (err)
194 goto out;
195
196 fpga_id = MLX5_CAP_FPGA(fdev->mdev, fpga_id);
197 mlx5_fpga_info(fdev, "FPGA card %s:%u\n", mlx5_fpga_name(fpga_id), fpga_id);
198
199 /* No QPs if FPGA does not participate in net processing */
200 if (mlx5_is_fpga_lookaside(fpga_id))
201 goto out;
202
203 mlx5_fpga_info(fdev, "%s(%d): image, version %u; SBU %06x:%04x version %d\n",
204 mlx5_fpga_image_name(fdev->last_oper_image),
205 fdev->last_oper_image,
206 MLX5_CAP_FPGA(fdev->mdev, image_version),
207 MLX5_CAP_FPGA(fdev->mdev, ieee_vendor_id),
208 MLX5_CAP_FPGA(fdev->mdev, sandbox_product_id),
209 MLX5_CAP_FPGA(fdev->mdev, sandbox_product_version));
210
211 max_num_qps = MLX5_CAP_FPGA(mdev, shell_caps.max_num_qps);
212 if (!max_num_qps) {
213 mlx5_fpga_err(fdev, "FPGA reports 0 QPs in SHELL_CAPS\n");
214 err = -ENOTSUPP;
215 goto out;
216 }
217
218 err = mlx5_core_reserve_gids(mdev, max_num_qps);
219 if (err)
220 goto out;
221
222 MLX5_NB_INIT(&fdev->fpga_err_nb, fpga_err_event, FPGA_ERROR);
223 MLX5_NB_INIT(&fdev->fpga_qp_err_nb, fpga_qp_err_event, FPGA_QP_ERROR);
224 mlx5_eq_notifier_register(fdev->mdev, &fdev->fpga_err_nb);
225 mlx5_eq_notifier_register(fdev->mdev, &fdev->fpga_qp_err_nb);
226
227 err = mlx5_fpga_conn_device_init(fdev);
228 if (err)
229 goto err_rsvd_gid;
230
231 if (fdev->last_oper_image == MLX5_FPGA_IMAGE_USER) {
232 err = mlx5_fpga_device_brb(fdev);
233 if (err)
234 goto err_conn_init;
235 }
236
237 goto out;
238
239 err_conn_init:
240 mlx5_fpga_conn_device_cleanup(fdev);
241
242 err_rsvd_gid:
243 mlx5_eq_notifier_unregister(fdev->mdev, &fdev->fpga_err_nb);
244 mlx5_eq_notifier_unregister(fdev->mdev, &fdev->fpga_qp_err_nb);
245 mlx5_core_unreserve_gids(mdev, max_num_qps);
246 out:
247 spin_lock_irqsave(&fdev->state_lock, flags);
248 fdev->state = err ? MLX5_FPGA_STATUS_FAILURE : MLX5_FPGA_STATUS_SUCCESS;
249 spin_unlock_irqrestore(&fdev->state_lock, flags);
250 return err;
251 }
252
mlx5_fpga_init(struct mlx5_core_dev * mdev)253 int mlx5_fpga_init(struct mlx5_core_dev *mdev)
254 {
255 struct mlx5_fpga_device *fdev;
256
257 if (!MLX5_CAP_GEN(mdev, fpga)) {
258 mlx5_core_dbg(mdev, "FPGA capability not present\n");
259 return 0;
260 }
261
262 mlx5_core_dbg(mdev, "Initializing FPGA\n");
263
264 fdev = mlx5_fpga_device_alloc();
265 if (!fdev)
266 return -ENOMEM;
267
268 fdev->mdev = mdev;
269 mdev->fpga = fdev;
270
271 return 0;
272 }
273
mlx5_fpga_device_stop(struct mlx5_core_dev * mdev)274 void mlx5_fpga_device_stop(struct mlx5_core_dev *mdev)
275 {
276 struct mlx5_fpga_device *fdev = mdev->fpga;
277 unsigned int max_num_qps;
278 unsigned long flags;
279 int err;
280
281 if (!fdev)
282 return;
283
284 if (mlx5_is_fpga_lookaside(MLX5_CAP_FPGA(fdev->mdev, fpga_id)))
285 return;
286
287 spin_lock_irqsave(&fdev->state_lock, flags);
288 if (fdev->state != MLX5_FPGA_STATUS_SUCCESS) {
289 spin_unlock_irqrestore(&fdev->state_lock, flags);
290 return;
291 }
292 fdev->state = MLX5_FPGA_STATUS_NONE;
293 spin_unlock_irqrestore(&fdev->state_lock, flags);
294
295 if (fdev->last_oper_image == MLX5_FPGA_IMAGE_USER) {
296 err = mlx5_fpga_ctrl_op(mdev, MLX5_FPGA_CTRL_OPERATION_SANDBOX_BYPASS_ON);
297 if (err)
298 mlx5_fpga_err(fdev, "Failed to re-set SBU bypass on: %d\n",
299 err);
300 }
301
302 mlx5_fpga_conn_device_cleanup(fdev);
303 mlx5_eq_notifier_unregister(fdev->mdev, &fdev->fpga_err_nb);
304 mlx5_eq_notifier_unregister(fdev->mdev, &fdev->fpga_qp_err_nb);
305
306 max_num_qps = MLX5_CAP_FPGA(mdev, shell_caps.max_num_qps);
307 mlx5_core_unreserve_gids(mdev, max_num_qps);
308 }
309
mlx5_fpga_cleanup(struct mlx5_core_dev * mdev)310 void mlx5_fpga_cleanup(struct mlx5_core_dev *mdev)
311 {
312 struct mlx5_fpga_device *fdev = mdev->fpga;
313
314 mlx5_fpga_device_stop(mdev);
315 kfree(fdev);
316 mdev->fpga = NULL;
317 }
318
mlx5_fpga_syndrome_to_string(u8 syndrome)319 static const char *mlx5_fpga_syndrome_to_string(u8 syndrome)
320 {
321 if (syndrome < ARRAY_SIZE(mlx5_fpga_error_strings))
322 return mlx5_fpga_error_strings[syndrome];
323 return "Unknown";
324 }
325
mlx5_fpga_qp_syndrome_to_string(u8 syndrome)326 static const char *mlx5_fpga_qp_syndrome_to_string(u8 syndrome)
327 {
328 if (syndrome < ARRAY_SIZE(mlx5_fpga_qp_error_strings))
329 return mlx5_fpga_qp_error_strings[syndrome];
330 return "Unknown";
331 }
332
mlx5_fpga_event(struct mlx5_fpga_device * fdev,unsigned long event,void * eqe)333 static int mlx5_fpga_event(struct mlx5_fpga_device *fdev,
334 unsigned long event, void *eqe)
335 {
336 void *data = ((struct mlx5_eqe *)eqe)->data.raw;
337 const char *event_name;
338 bool teardown = false;
339 unsigned long flags;
340 u8 syndrome;
341
342 switch (event) {
343 case MLX5_EVENT_TYPE_FPGA_ERROR:
344 syndrome = MLX5_GET(fpga_error_event, data, syndrome);
345 event_name = mlx5_fpga_syndrome_to_string(syndrome);
346 break;
347 case MLX5_EVENT_TYPE_FPGA_QP_ERROR:
348 syndrome = MLX5_GET(fpga_qp_error_event, data, syndrome);
349 event_name = mlx5_fpga_qp_syndrome_to_string(syndrome);
350 break;
351 default:
352 return NOTIFY_DONE;
353 }
354
355 spin_lock_irqsave(&fdev->state_lock, flags);
356 switch (fdev->state) {
357 case MLX5_FPGA_STATUS_SUCCESS:
358 mlx5_fpga_warn(fdev, "Error %u: %s\n", syndrome, event_name);
359 teardown = true;
360 break;
361 default:
362 mlx5_fpga_warn_ratelimited(fdev, "Unexpected error event %u: %s\n",
363 syndrome, event_name);
364 }
365 spin_unlock_irqrestore(&fdev->state_lock, flags);
366 /* We tear-down the card's interfaces and functionality because
367 * the FPGA bump-on-the-wire is misbehaving and we lose ability
368 * to communicate with the network. User may still be able to
369 * recover by re-programming or debugging the FPGA
370 */
371 if (teardown)
372 mlx5_trigger_health_work(fdev->mdev);
373
374 return NOTIFY_OK;
375 }
376