xref: /openbmc/linux/drivers/misc/genwqe/card_base.c (revision b34e08d5)
1 /**
2  * IBM Accelerator Family 'GenWQE'
3  *
4  * (C) Copyright IBM Corp. 2013
5  *
6  * Author: Frank Haverkamp <haver@linux.vnet.ibm.com>
7  * Author: Joerg-Stephan Vogt <jsvogt@de.ibm.com>
8  * Author: Michael Jung <mijung@de.ibm.com>
9  * Author: Michael Ruettger <michael@ibmra.de>
10  *
11  * This program is free software; you can redistribute it and/or modify
12  * it under the terms of the GNU General Public License (version 2 only)
13  * as published by the Free Software Foundation.
14  *
15  * This program is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18  * GNU General Public License for more details.
19  */
20 
21 /*
22  * Module initialization and PCIe setup. Card health monitoring and
23  * recovery functionality. Character device creation and deletion are
24  * controlled from here.
25  */
26 
27 #include <linux/module.h>
28 #include <linux/types.h>
29 #include <linux/pci.h>
30 #include <linux/err.h>
31 #include <linux/aer.h>
32 #include <linux/string.h>
33 #include <linux/sched.h>
34 #include <linux/wait.h>
35 #include <linux/delay.h>
36 #include <linux/dma-mapping.h>
37 #include <linux/module.h>
38 #include <linux/notifier.h>
39 #include <linux/device.h>
40 #include <linux/log2.h>
41 #include <linux/genwqe/genwqe_card.h>
42 
43 #include "card_base.h"
44 #include "card_ddcb.h"
45 
46 MODULE_AUTHOR("Frank Haverkamp <haver@linux.vnet.ibm.com>");
47 MODULE_AUTHOR("Michael Ruettger <michael@ibmra.de>");
48 MODULE_AUTHOR("Joerg-Stephan Vogt <jsvogt@de.ibm.com>");
49 MODULE_AUTHOR("Michal Jung <mijung@de.ibm.com>");
50 
51 MODULE_DESCRIPTION("GenWQE Card");
52 MODULE_VERSION(DRV_VERS_STRING);
53 MODULE_LICENSE("GPL");
54 
55 static char genwqe_driver_name[] = GENWQE_DEVNAME;
56 static struct class *class_genwqe;
57 static struct dentry *debugfs_genwqe;
58 static struct genwqe_dev *genwqe_devices[GENWQE_CARD_NO_MAX];
59 
60 /* PCI structure for identifying device by PCI vendor and device ID */
61 static DEFINE_PCI_DEVICE_TABLE(genwqe_device_table) = {
62 	{ .vendor      = PCI_VENDOR_ID_IBM,
63 	  .device      = PCI_DEVICE_GENWQE,
64 	  .subvendor   = PCI_SUBVENDOR_ID_IBM,
65 	  .subdevice   = PCI_SUBSYSTEM_ID_GENWQE5,
66 	  .class       = (PCI_CLASSCODE_GENWQE5 << 8),
67 	  .class_mask  = ~0,
68 	  .driver_data = 0 },
69 
70 	/* Initial SR-IOV bring-up image */
71 	{ .vendor      = PCI_VENDOR_ID_IBM,
72 	  .device      = PCI_DEVICE_GENWQE,
73 	  .subvendor   = PCI_SUBVENDOR_ID_IBM_SRIOV,
74 	  .subdevice   = PCI_SUBSYSTEM_ID_GENWQE5_SRIOV,
75 	  .class       = (PCI_CLASSCODE_GENWQE5_SRIOV << 8),
76 	  .class_mask  = ~0,
77 	  .driver_data = 0 },
78 
79 	{ .vendor      = PCI_VENDOR_ID_IBM,  /* VF Vendor ID */
80 	  .device      = 0x0000,  /* VF Device ID */
81 	  .subvendor   = PCI_SUBVENDOR_ID_IBM_SRIOV,
82 	  .subdevice   = PCI_SUBSYSTEM_ID_GENWQE5_SRIOV,
83 	  .class       = (PCI_CLASSCODE_GENWQE5_SRIOV << 8),
84 	  .class_mask  = ~0,
85 	  .driver_data = 0 },
86 
87 	/* Fixed up image */
88 	{ .vendor      = PCI_VENDOR_ID_IBM,
89 	  .device      = PCI_DEVICE_GENWQE,
90 	  .subvendor   = PCI_SUBVENDOR_ID_IBM_SRIOV,
91 	  .subdevice   = PCI_SUBSYSTEM_ID_GENWQE5,
92 	  .class       = (PCI_CLASSCODE_GENWQE5_SRIOV << 8),
93 	  .class_mask  = ~0,
94 	  .driver_data = 0 },
95 
96 	{ .vendor      = PCI_VENDOR_ID_IBM,  /* VF Vendor ID */
97 	  .device      = 0x0000,  /* VF Device ID */
98 	  .subvendor   = PCI_SUBVENDOR_ID_IBM_SRIOV,
99 	  .subdevice   = PCI_SUBSYSTEM_ID_GENWQE5,
100 	  .class       = (PCI_CLASSCODE_GENWQE5_SRIOV << 8),
101 	  .class_mask  = ~0,
102 	  .driver_data = 0 },
103 
104 	/* Even one more ... */
105 	{ .vendor      = PCI_VENDOR_ID_IBM,
106 	  .device      = PCI_DEVICE_GENWQE,
107 	  .subvendor   = PCI_SUBVENDOR_ID_IBM,
108 	  .subdevice   = PCI_SUBSYSTEM_ID_GENWQE5_NEW,
109 	  .class       = (PCI_CLASSCODE_GENWQE5 << 8),
110 	  .class_mask  = ~0,
111 	  .driver_data = 0 },
112 
113 	{ 0, }			/* 0 terminated list. */
114 };
115 
116 MODULE_DEVICE_TABLE(pci, genwqe_device_table);
117 
118 /**
119  * genwqe_dev_alloc() - Create and prepare a new card descriptor
120  *
121  * Return: Pointer to card descriptor, or ERR_PTR(err) on error
122  */
123 static struct genwqe_dev *genwqe_dev_alloc(void)
124 {
125 	unsigned int i = 0, j;
126 	struct genwqe_dev *cd;
127 
128 	for (i = 0; i < GENWQE_CARD_NO_MAX; i++) {
129 		if (genwqe_devices[i] == NULL)
130 			break;
131 	}
132 	if (i >= GENWQE_CARD_NO_MAX)
133 		return ERR_PTR(-ENODEV);
134 
135 	cd = kzalloc(sizeof(struct genwqe_dev), GFP_KERNEL);
136 	if (!cd)
137 		return ERR_PTR(-ENOMEM);
138 
139 	cd->card_idx = i;
140 	cd->class_genwqe = class_genwqe;
141 	cd->debugfs_genwqe = debugfs_genwqe;
142 
143 	init_waitqueue_head(&cd->queue_waitq);
144 
145 	spin_lock_init(&cd->file_lock);
146 	INIT_LIST_HEAD(&cd->file_list);
147 
148 	cd->card_state = GENWQE_CARD_UNUSED;
149 	spin_lock_init(&cd->print_lock);
150 
151 	cd->ddcb_software_timeout = genwqe_ddcb_software_timeout;
152 	cd->kill_timeout = genwqe_kill_timeout;
153 
154 	for (j = 0; j < GENWQE_MAX_VFS; j++)
155 		cd->vf_jobtimeout_msec[j] = genwqe_vf_jobtimeout_msec;
156 
157 	genwqe_devices[i] = cd;
158 	return cd;
159 }
160 
161 static void genwqe_dev_free(struct genwqe_dev *cd)
162 {
163 	if (!cd)
164 		return;
165 
166 	genwqe_devices[cd->card_idx] = NULL;
167 	kfree(cd);
168 }
169 
170 /**
171  * genwqe_bus_reset() - Card recovery
172  *
173  * pci_reset_function() will recover the device and ensure that the
174  * registers are accessible again when it completes with success. If
175  * not, the card will stay dead and registers will be unaccessible
176  * still.
177  */
178 static int genwqe_bus_reset(struct genwqe_dev *cd)
179 {
180 	int bars, rc = 0;
181 	struct pci_dev *pci_dev = cd->pci_dev;
182 	void __iomem *mmio;
183 
184 	if (cd->err_inject & GENWQE_INJECT_BUS_RESET_FAILURE)
185 		return -EIO;
186 
187 	mmio = cd->mmio;
188 	cd->mmio = NULL;
189 	pci_iounmap(pci_dev, mmio);
190 
191 	bars = pci_select_bars(pci_dev, IORESOURCE_MEM);
192 	pci_release_selected_regions(pci_dev, bars);
193 
194 	/*
195 	 * Firmware/BIOS might change memory mapping during bus reset.
196 	 * Settings like enable bus-mastering, ... are backuped and
197 	 * restored by the pci_reset_function().
198 	 */
199 	dev_dbg(&pci_dev->dev, "[%s] pci_reset function ...\n", __func__);
200 	rc = pci_reset_function(pci_dev);
201 	if (rc) {
202 		dev_err(&pci_dev->dev,
203 			"[%s] err: failed reset func (rc %d)\n", __func__, rc);
204 		return rc;
205 	}
206 	dev_dbg(&pci_dev->dev, "[%s] done with rc=%d\n", __func__, rc);
207 
208 	/*
209 	 * Here is the right spot to clear the register read
210 	 * failure. pci_bus_reset() does this job in real systems.
211 	 */
212 	cd->err_inject &= ~(GENWQE_INJECT_HARDWARE_FAILURE |
213 			    GENWQE_INJECT_GFIR_FATAL |
214 			    GENWQE_INJECT_GFIR_INFO);
215 
216 	rc = pci_request_selected_regions(pci_dev, bars, genwqe_driver_name);
217 	if (rc) {
218 		dev_err(&pci_dev->dev,
219 			"[%s] err: request bars failed (%d)\n", __func__, rc);
220 		return -EIO;
221 	}
222 
223 	cd->mmio = pci_iomap(pci_dev, 0, 0);
224 	if (cd->mmio == NULL) {
225 		dev_err(&pci_dev->dev,
226 			"[%s] err: mapping BAR0 failed\n", __func__);
227 		return -ENOMEM;
228 	}
229 	return 0;
230 }
231 
232 /*
233  * Hardware circumvention section. Certain bitstreams in our test-lab
234  * had different kinds of problems. Here is where we adjust those
235  * bitstreams to function will with this version of our device driver.
236  *
237  * Thise circumventions are applied to the physical function only.
238  * The magical numbers below are identifying development/manufacturing
239  * versions of the bitstream used on the card.
240  *
241  * Turn off error reporting for old/manufacturing images.
242  */
243 
244 bool genwqe_need_err_masking(struct genwqe_dev *cd)
245 {
246 	return (cd->slu_unitcfg & 0xFFFF0ull) < 0x32170ull;
247 }
248 
249 static void genwqe_tweak_hardware(struct genwqe_dev *cd)
250 {
251 	struct pci_dev *pci_dev = cd->pci_dev;
252 
253 	/* Mask FIRs for development images */
254 	if (((cd->slu_unitcfg & 0xFFFF0ull) >= 0x32000ull) &&
255 	    ((cd->slu_unitcfg & 0xFFFF0ull) <= 0x33250ull)) {
256 		dev_warn(&pci_dev->dev,
257 			 "FIRs masked due to bitstream %016llx.%016llx\n",
258 			 cd->slu_unitcfg, cd->app_unitcfg);
259 
260 		__genwqe_writeq(cd, IO_APP_SEC_LEM_DEBUG_OVR,
261 				0xFFFFFFFFFFFFFFFFull);
262 
263 		__genwqe_writeq(cd, IO_APP_ERR_ACT_MASK,
264 				0x0000000000000000ull);
265 	}
266 }
267 
268 /**
269  * genwqe_recovery_on_fatal_gfir_required() - Version depended actions
270  *
271  * Bitstreams older than 2013-02-17 have a bug where fatal GFIRs must
272  * be ignored. This is e.g. true for the bitstream we gave to the card
273  * manufacturer, but also for some old bitstreams we released to our
274  * test-lab.
275  */
276 int genwqe_recovery_on_fatal_gfir_required(struct genwqe_dev *cd)
277 {
278 	return (cd->slu_unitcfg & 0xFFFF0ull) >= 0x32170ull;
279 }
280 
281 int genwqe_flash_readback_fails(struct genwqe_dev *cd)
282 {
283 	return (cd->slu_unitcfg & 0xFFFF0ull) < 0x32170ull;
284 }
285 
286 /**
287  * genwqe_T_psec() - Calculate PF/VF timeout register content
288  *
289  * Note: From a design perspective it turned out to be a bad idea to
290  * use codes here to specifiy the frequency/speed values. An old
291  * driver cannot understand new codes and is therefore always a
292  * problem. Better is to measure out the value or put the
293  * speed/frequency directly into a register which is always a valid
294  * value for old as well as for new software.
295  */
296 /* T = 1/f */
297 static int genwqe_T_psec(struct genwqe_dev *cd)
298 {
299 	u16 speed;	/* 1/f -> 250,  200,  166,  175 */
300 	static const int T[] = { 4000, 5000, 6000, 5714 };
301 
302 	speed = (u16)((cd->slu_unitcfg >> 28) & 0x0full);
303 	if (speed >= ARRAY_SIZE(T))
304 		return -1;	/* illegal value */
305 
306 	return T[speed];
307 }
308 
309 /**
310  * genwqe_setup_pf_jtimer() - Setup PF hardware timeouts for DDCB execution
311  *
312  * Do this _after_ card_reset() is called. Otherwise the values will
313  * vanish. The settings need to be done when the queues are inactive.
314  *
315  * The max. timeout value is 2^(10+x) * T (6ns for 166MHz) * 15/16.
316  * The min. timeout value is 2^(10+x) * T (6ns for 166MHz) * 14/16.
317  */
318 static bool genwqe_setup_pf_jtimer(struct genwqe_dev *cd)
319 {
320 	u32 T = genwqe_T_psec(cd);
321 	u64 x;
322 
323 	if (genwqe_pf_jobtimeout_msec == 0)
324 		return false;
325 
326 	/* PF: large value needed, flash update 2sec per block */
327 	x = ilog2(genwqe_pf_jobtimeout_msec *
328 		  16000000000uL/(T * 15)) - 10;
329 
330 	genwqe_write_vreg(cd, IO_SLC_VF_APPJOB_TIMEOUT,
331 			  0xff00 | (x & 0xff), 0);
332 	return true;
333 }
334 
335 /**
336  * genwqe_setup_vf_jtimer() - Setup VF hardware timeouts for DDCB execution
337  */
338 static bool genwqe_setup_vf_jtimer(struct genwqe_dev *cd)
339 {
340 	struct pci_dev *pci_dev = cd->pci_dev;
341 	unsigned int vf;
342 	u32 T = genwqe_T_psec(cd);
343 	u64 x;
344 
345 	for (vf = 0; vf < pci_sriov_get_totalvfs(pci_dev); vf++) {
346 
347 		if (cd->vf_jobtimeout_msec[vf] == 0)
348 			continue;
349 
350 		x = ilog2(cd->vf_jobtimeout_msec[vf] *
351 			  16000000000uL/(T * 15)) - 10;
352 
353 		genwqe_write_vreg(cd, IO_SLC_VF_APPJOB_TIMEOUT,
354 				  0xff00 | (x & 0xff), vf + 1);
355 	}
356 	return true;
357 }
358 
359 static int genwqe_ffdc_buffs_alloc(struct genwqe_dev *cd)
360 {
361 	unsigned int type, e = 0;
362 
363 	for (type = 0; type < GENWQE_DBG_UNITS; type++) {
364 		switch (type) {
365 		case GENWQE_DBG_UNIT0:
366 			e = genwqe_ffdc_buff_size(cd, 0);
367 			break;
368 		case GENWQE_DBG_UNIT1:
369 			e = genwqe_ffdc_buff_size(cd, 1);
370 			break;
371 		case GENWQE_DBG_UNIT2:
372 			e = genwqe_ffdc_buff_size(cd, 2);
373 			break;
374 		case GENWQE_DBG_REGS:
375 			e = GENWQE_FFDC_REGS;
376 			break;
377 		}
378 
379 		/* currently support only the debug units mentioned here */
380 		cd->ffdc[type].entries = e;
381 		cd->ffdc[type].regs = kmalloc(e * sizeof(struct genwqe_reg),
382 					      GFP_KERNEL);
383 		/*
384 		 * regs == NULL is ok, the using code treats this as no regs,
385 		 * Printing warning is ok in this case.
386 		 */
387 	}
388 	return 0;
389 }
390 
391 static void genwqe_ffdc_buffs_free(struct genwqe_dev *cd)
392 {
393 	unsigned int type;
394 
395 	for (type = 0; type < GENWQE_DBG_UNITS; type++) {
396 		kfree(cd->ffdc[type].regs);
397 		cd->ffdc[type].regs = NULL;
398 	}
399 }
400 
401 static int genwqe_read_ids(struct genwqe_dev *cd)
402 {
403 	int err = 0;
404 	int slu_id;
405 	struct pci_dev *pci_dev = cd->pci_dev;
406 
407 	cd->slu_unitcfg = __genwqe_readq(cd, IO_SLU_UNITCFG);
408 	if (cd->slu_unitcfg == IO_ILLEGAL_VALUE) {
409 		dev_err(&pci_dev->dev,
410 			"err: SLUID=%016llx\n", cd->slu_unitcfg);
411 		err = -EIO;
412 		goto out_err;
413 	}
414 
415 	slu_id = genwqe_get_slu_id(cd);
416 	if (slu_id < GENWQE_SLU_ARCH_REQ || slu_id == 0xff) {
417 		dev_err(&pci_dev->dev,
418 			"err: incompatible SLU Architecture %u\n", slu_id);
419 		err = -ENOENT;
420 		goto out_err;
421 	}
422 
423 	cd->app_unitcfg = __genwqe_readq(cd, IO_APP_UNITCFG);
424 	if (cd->app_unitcfg == IO_ILLEGAL_VALUE) {
425 		dev_err(&pci_dev->dev,
426 			"err: APPID=%016llx\n", cd->app_unitcfg);
427 		err = -EIO;
428 		goto out_err;
429 	}
430 	genwqe_read_app_id(cd, cd->app_name, sizeof(cd->app_name));
431 
432 	/*
433 	 * Is access to all registers possible? If we are a VF the
434 	 * answer is obvious. If we run fully virtualized, we need to
435 	 * check if we can access all registers. If we do not have
436 	 * full access we will cause an UR and some informational FIRs
437 	 * in the PF, but that should not harm.
438 	 */
439 	if (pci_dev->is_virtfn)
440 		cd->is_privileged = 0;
441 	else
442 		cd->is_privileged = (__genwqe_readq(cd, IO_SLU_BITSTREAM)
443 				     != IO_ILLEGAL_VALUE);
444 
445  out_err:
446 	return err;
447 }
448 
449 static int genwqe_start(struct genwqe_dev *cd)
450 {
451 	int err;
452 	struct pci_dev *pci_dev = cd->pci_dev;
453 
454 	err = genwqe_read_ids(cd);
455 	if (err)
456 		return err;
457 
458 	if (genwqe_is_privileged(cd)) {
459 		/* do this after the tweaks. alloc fail is acceptable */
460 		genwqe_ffdc_buffs_alloc(cd);
461 		genwqe_stop_traps(cd);
462 
463 		/* Collect registers e.g. FIRs, UNITIDs, traces ... */
464 		genwqe_read_ffdc_regs(cd, cd->ffdc[GENWQE_DBG_REGS].regs,
465 				      cd->ffdc[GENWQE_DBG_REGS].entries, 0);
466 
467 		genwqe_ffdc_buff_read(cd, GENWQE_DBG_UNIT0,
468 				      cd->ffdc[GENWQE_DBG_UNIT0].regs,
469 				      cd->ffdc[GENWQE_DBG_UNIT0].entries);
470 
471 		genwqe_ffdc_buff_read(cd, GENWQE_DBG_UNIT1,
472 				      cd->ffdc[GENWQE_DBG_UNIT1].regs,
473 				      cd->ffdc[GENWQE_DBG_UNIT1].entries);
474 
475 		genwqe_ffdc_buff_read(cd, GENWQE_DBG_UNIT2,
476 				      cd->ffdc[GENWQE_DBG_UNIT2].regs,
477 				      cd->ffdc[GENWQE_DBG_UNIT2].entries);
478 
479 		genwqe_start_traps(cd);
480 
481 		if (cd->card_state == GENWQE_CARD_FATAL_ERROR) {
482 			dev_warn(&pci_dev->dev,
483 				 "[%s] chip reload/recovery!\n", __func__);
484 
485 			/*
486 			 * Stealth Mode: Reload chip on either hot
487 			 * reset or PERST.
488 			 */
489 			cd->softreset = 0x7Cull;
490 			__genwqe_writeq(cd, IO_SLC_CFGREG_SOFTRESET,
491 				       cd->softreset);
492 
493 			err = genwqe_bus_reset(cd);
494 			if (err != 0) {
495 				dev_err(&pci_dev->dev,
496 					"[%s] err: bus reset failed!\n",
497 					__func__);
498 				goto out;
499 			}
500 
501 			/*
502 			 * Re-read the IDs because
503 			 * it could happen that the bitstream load
504 			 * failed!
505 			 */
506 			err = genwqe_read_ids(cd);
507 			if (err)
508 				goto out;
509 		}
510 	}
511 
512 	err = genwqe_setup_service_layer(cd);  /* does a reset to the card */
513 	if (err != 0) {
514 		dev_err(&pci_dev->dev,
515 			"[%s] err: could not setup servicelayer!\n", __func__);
516 		err = -ENODEV;
517 		goto out;
518 	}
519 
520 	if (genwqe_is_privileged(cd)) {	 /* code is running _after_ reset */
521 		genwqe_tweak_hardware(cd);
522 
523 		genwqe_setup_pf_jtimer(cd);
524 		genwqe_setup_vf_jtimer(cd);
525 	}
526 
527 	err = genwqe_device_create(cd);
528 	if (err < 0) {
529 		dev_err(&pci_dev->dev,
530 			"err: chdev init failed! (err=%d)\n", err);
531 		goto out_release_service_layer;
532 	}
533 	return 0;
534 
535  out_release_service_layer:
536 	genwqe_release_service_layer(cd);
537  out:
538 	if (genwqe_is_privileged(cd))
539 		genwqe_ffdc_buffs_free(cd);
540 	return -EIO;
541 }
542 
543 /**
544  * genwqe_stop() - Stop card operation
545  *
546  * Recovery notes:
547  *   As long as genwqe_thread runs we might access registers during
548  *   error data capture. Same is with the genwqe_health_thread.
549  *   When genwqe_bus_reset() fails this function might called two times:
550  *   first by the genwqe_health_thread() and later by genwqe_remove() to
551  *   unbind the device. We must be able to survive that.
552  *
553  * This function must be robust enough to be called twice.
554  */
555 static int genwqe_stop(struct genwqe_dev *cd)
556 {
557 	genwqe_finish_queue(cd);	    /* no register access */
558 	genwqe_device_remove(cd);	    /* device removed, procs killed */
559 	genwqe_release_service_layer(cd);   /* here genwqe_thread is stopped */
560 
561 	if (genwqe_is_privileged(cd)) {
562 		pci_disable_sriov(cd->pci_dev);	/* access pci config space */
563 		genwqe_ffdc_buffs_free(cd);
564 	}
565 
566 	return 0;
567 }
568 
569 /**
570  * genwqe_recover_card() - Try to recover the card if it is possible
571  *
572  * If fatal_err is set no register access is possible anymore. It is
573  * likely that genwqe_start fails in that situation. Proper error
574  * handling is required in this case.
575  *
576  * genwqe_bus_reset() will cause the pci code to call genwqe_remove()
577  * and later genwqe_probe() for all virtual functions.
578  */
579 static int genwqe_recover_card(struct genwqe_dev *cd, int fatal_err)
580 {
581 	int rc;
582 	struct pci_dev *pci_dev = cd->pci_dev;
583 
584 	genwqe_stop(cd);
585 
586 	/*
587 	 * Make sure chip is not reloaded to maintain FFDC. Write SLU
588 	 * Reset Register, CPLDReset field to 0.
589 	 */
590 	if (!fatal_err) {
591 		cd->softreset = 0x70ull;
592 		__genwqe_writeq(cd, IO_SLC_CFGREG_SOFTRESET, cd->softreset);
593 	}
594 
595 	rc = genwqe_bus_reset(cd);
596 	if (rc != 0) {
597 		dev_err(&pci_dev->dev,
598 			"[%s] err: card recovery impossible!\n", __func__);
599 		return rc;
600 	}
601 
602 	rc = genwqe_start(cd);
603 	if (rc < 0) {
604 		dev_err(&pci_dev->dev,
605 			"[%s] err: failed to launch device!\n", __func__);
606 		return rc;
607 	}
608 	return 0;
609 }
610 
611 static int genwqe_health_check_cond(struct genwqe_dev *cd, u64 *gfir)
612 {
613 	*gfir = __genwqe_readq(cd, IO_SLC_CFGREG_GFIR);
614 	return (*gfir & GFIR_ERR_TRIGGER) &&
615 		genwqe_recovery_on_fatal_gfir_required(cd);
616 }
617 
618 /**
619  * genwqe_fir_checking() - Check the fault isolation registers of the card
620  *
621  * If this code works ok, can be tried out with help of the genwqe_poke tool:
622  *   sudo ./tools/genwqe_poke 0x8 0xfefefefefef
623  *
624  * Now the relevant FIRs/sFIRs should be printed out and the driver should
625  * invoke recovery (devices are removed and readded).
626  */
627 static u64 genwqe_fir_checking(struct genwqe_dev *cd)
628 {
629 	int j, iterations = 0;
630 	u64 mask, fir, fec, uid, gfir, gfir_masked, sfir, sfec;
631 	u32 fir_addr, fir_clr_addr, fec_addr, sfir_addr, sfec_addr;
632 	struct pci_dev *pci_dev = cd->pci_dev;
633 
634  healthMonitor:
635 	iterations++;
636 	if (iterations > 16) {
637 		dev_err(&pci_dev->dev, "* exit looping after %d times\n",
638 			iterations);
639 		goto fatal_error;
640 	}
641 
642 	gfir = __genwqe_readq(cd, IO_SLC_CFGREG_GFIR);
643 	if (gfir != 0x0)
644 		dev_err(&pci_dev->dev, "* 0x%08x 0x%016llx\n",
645 				    IO_SLC_CFGREG_GFIR, gfir);
646 	if (gfir == IO_ILLEGAL_VALUE)
647 		goto fatal_error;
648 
649 	/*
650 	 * Avoid printing when to GFIR bit is on prevents contignous
651 	 * printout e.g. for the following bug:
652 	 *   FIR set without a 2ndary FIR/FIR cannot be cleared
653 	 * Comment out the following if to get the prints:
654 	 */
655 	if (gfir == 0)
656 		return 0;
657 
658 	gfir_masked = gfir & GFIR_ERR_TRIGGER;  /* fatal errors */
659 
660 	for (uid = 0; uid < GENWQE_MAX_UNITS; uid++) { /* 0..2 in zEDC */
661 
662 		/* read the primary FIR (pfir) */
663 		fir_addr = (uid << 24) + 0x08;
664 		fir = __genwqe_readq(cd, fir_addr);
665 		if (fir == 0x0)
666 			continue;  /* no error in this unit */
667 
668 		dev_err(&pci_dev->dev, "* 0x%08x 0x%016llx\n", fir_addr, fir);
669 		if (fir == IO_ILLEGAL_VALUE)
670 			goto fatal_error;
671 
672 		/* read primary FEC */
673 		fec_addr = (uid << 24) + 0x18;
674 		fec = __genwqe_readq(cd, fec_addr);
675 
676 		dev_err(&pci_dev->dev, "* 0x%08x 0x%016llx\n", fec_addr, fec);
677 		if (fec == IO_ILLEGAL_VALUE)
678 			goto fatal_error;
679 
680 		for (j = 0, mask = 1ULL; j < 64; j++, mask <<= 1) {
681 
682 			/* secondary fir empty, skip it */
683 			if ((fir & mask) == 0x0)
684 				continue;
685 
686 			sfir_addr = (uid << 24) + 0x100 + 0x08 * j;
687 			sfir = __genwqe_readq(cd, sfir_addr);
688 
689 			if (sfir == IO_ILLEGAL_VALUE)
690 				goto fatal_error;
691 			dev_err(&pci_dev->dev,
692 				"* 0x%08x 0x%016llx\n", sfir_addr, sfir);
693 
694 			sfec_addr = (uid << 24) + 0x300 + 0x08 * j;
695 			sfec = __genwqe_readq(cd, sfec_addr);
696 
697 			if (sfec == IO_ILLEGAL_VALUE)
698 				goto fatal_error;
699 			dev_err(&pci_dev->dev,
700 				"* 0x%08x 0x%016llx\n", sfec_addr, sfec);
701 
702 			gfir = __genwqe_readq(cd, IO_SLC_CFGREG_GFIR);
703 			if (gfir == IO_ILLEGAL_VALUE)
704 				goto fatal_error;
705 
706 			/* gfir turned on during routine! get out and
707 			   start over. */
708 			if ((gfir_masked == 0x0) &&
709 			    (gfir & GFIR_ERR_TRIGGER)) {
710 				goto healthMonitor;
711 			}
712 
713 			/* do not clear if we entered with a fatal gfir */
714 			if (gfir_masked == 0x0) {
715 
716 				/* NEW clear by mask the logged bits */
717 				sfir_addr = (uid << 24) + 0x100 + 0x08 * j;
718 				__genwqe_writeq(cd, sfir_addr, sfir);
719 
720 				dev_dbg(&pci_dev->dev,
721 					"[HM] Clearing  2ndary FIR 0x%08x "
722 					"with 0x%016llx\n", sfir_addr, sfir);
723 
724 				/*
725 				 * note, these cannot be error-Firs
726 				 * since gfir_masked is 0 after sfir
727 				 * was read. Also, it is safe to do
728 				 * this write if sfir=0. Still need to
729 				 * clear the primary. This just means
730 				 * there is no secondary FIR.
731 				 */
732 
733 				/* clear by mask the logged bit. */
734 				fir_clr_addr = (uid << 24) + 0x10;
735 				__genwqe_writeq(cd, fir_clr_addr, mask);
736 
737 				dev_dbg(&pci_dev->dev,
738 					"[HM] Clearing primary FIR 0x%08x "
739 					"with 0x%016llx\n", fir_clr_addr,
740 					mask);
741 			}
742 		}
743 	}
744 	gfir = __genwqe_readq(cd, IO_SLC_CFGREG_GFIR);
745 	if (gfir == IO_ILLEGAL_VALUE)
746 		goto fatal_error;
747 
748 	if ((gfir_masked == 0x0) && (gfir & GFIR_ERR_TRIGGER)) {
749 		/*
750 		 * Check once more that it didn't go on after all the
751 		 * FIRS were cleared.
752 		 */
753 		dev_dbg(&pci_dev->dev, "ACK! Another FIR! Recursing %d!\n",
754 			iterations);
755 		goto healthMonitor;
756 	}
757 	return gfir_masked;
758 
759  fatal_error:
760 	return IO_ILLEGAL_VALUE;
761 }
762 
763 /**
764  * genwqe_health_thread() - Health checking thread
765  *
766  * This thread is only started for the PF of the card.
767  *
768  * This thread monitors the health of the card. A critical situation
769  * is when we read registers which contain -1 (IO_ILLEGAL_VALUE). In
770  * this case we need to be recovered from outside. Writing to
771  * registers will very likely not work either.
772  *
773  * This thread must only exit if kthread_should_stop() becomes true.
774  *
775  * Condition for the health-thread to trigger:
776  *   a) when a kthread_stop() request comes in or
777  *   b) a critical GFIR occured
778  *
779  * Informational GFIRs are checked and potentially printed in
780  * health_check_interval seconds.
781  */
782 static int genwqe_health_thread(void *data)
783 {
784 	int rc, should_stop = 0;
785 	struct genwqe_dev *cd = data;
786 	struct pci_dev *pci_dev = cd->pci_dev;
787 	u64 gfir, gfir_masked, slu_unitcfg, app_unitcfg;
788 
789 	while (!kthread_should_stop()) {
790 		rc = wait_event_interruptible_timeout(cd->health_waitq,
791 			 (genwqe_health_check_cond(cd, &gfir) ||
792 			  (should_stop = kthread_should_stop())),
793 				genwqe_health_check_interval * HZ);
794 
795 		if (should_stop)
796 			break;
797 
798 		if (gfir == IO_ILLEGAL_VALUE) {
799 			dev_err(&pci_dev->dev,
800 				"[%s] GFIR=%016llx\n", __func__, gfir);
801 			goto fatal_error;
802 		}
803 
804 		slu_unitcfg = __genwqe_readq(cd, IO_SLU_UNITCFG);
805 		if (slu_unitcfg == IO_ILLEGAL_VALUE) {
806 			dev_err(&pci_dev->dev,
807 				"[%s] SLU_UNITCFG=%016llx\n",
808 				__func__, slu_unitcfg);
809 			goto fatal_error;
810 		}
811 
812 		app_unitcfg = __genwqe_readq(cd, IO_APP_UNITCFG);
813 		if (app_unitcfg == IO_ILLEGAL_VALUE) {
814 			dev_err(&pci_dev->dev,
815 				"[%s] APP_UNITCFG=%016llx\n",
816 				__func__, app_unitcfg);
817 			goto fatal_error;
818 		}
819 
820 		gfir = __genwqe_readq(cd, IO_SLC_CFGREG_GFIR);
821 		if (gfir == IO_ILLEGAL_VALUE) {
822 			dev_err(&pci_dev->dev,
823 				"[%s] %s: GFIR=%016llx\n", __func__,
824 				(gfir & GFIR_ERR_TRIGGER) ? "err" : "info",
825 				gfir);
826 			goto fatal_error;
827 		}
828 
829 		gfir_masked = genwqe_fir_checking(cd);
830 		if (gfir_masked == IO_ILLEGAL_VALUE)
831 			goto fatal_error;
832 
833 		/*
834 		 * GFIR ErrorTrigger bits set => reset the card!
835 		 * Never do this for old/manufacturing images!
836 		 */
837 		if ((gfir_masked) && !cd->skip_recovery &&
838 		    genwqe_recovery_on_fatal_gfir_required(cd)) {
839 
840 			cd->card_state = GENWQE_CARD_FATAL_ERROR;
841 
842 			rc = genwqe_recover_card(cd, 0);
843 			if (rc < 0) {
844 				/* FIXME Card is unusable and needs unbind! */
845 				goto fatal_error;
846 			}
847 		}
848 
849 		cd->last_gfir = gfir;
850 		cond_resched();
851 	}
852 
853 	return 0;
854 
855  fatal_error:
856 	dev_err(&pci_dev->dev,
857 		"[%s] card unusable. Please trigger unbind!\n", __func__);
858 
859 	/* Bring down logical devices to inform user space via udev remove. */
860 	cd->card_state = GENWQE_CARD_FATAL_ERROR;
861 	genwqe_stop(cd);
862 
863 	/* genwqe_bus_reset failed(). Now wait for genwqe_remove(). */
864 	while (!kthread_should_stop())
865 		cond_resched();
866 
867 	return -EIO;
868 }
869 
870 static int genwqe_health_check_start(struct genwqe_dev *cd)
871 {
872 	int rc;
873 
874 	if (genwqe_health_check_interval <= 0)
875 		return 0;	/* valid for disabling the service */
876 
877 	/* moved before request_irq() */
878 	/* init_waitqueue_head(&cd->health_waitq); */
879 
880 	cd->health_thread = kthread_run(genwqe_health_thread, cd,
881 					GENWQE_DEVNAME "%d_health",
882 					cd->card_idx);
883 	if (IS_ERR(cd->health_thread)) {
884 		rc = PTR_ERR(cd->health_thread);
885 		cd->health_thread = NULL;
886 		return rc;
887 	}
888 	return 0;
889 }
890 
891 static int genwqe_health_thread_running(struct genwqe_dev *cd)
892 {
893 	return cd->health_thread != NULL;
894 }
895 
896 static int genwqe_health_check_stop(struct genwqe_dev *cd)
897 {
898 	int rc;
899 
900 	if (!genwqe_health_thread_running(cd))
901 		return -EIO;
902 
903 	rc = kthread_stop(cd->health_thread);
904 	cd->health_thread = NULL;
905 	return 0;
906 }
907 
908 /**
909  * genwqe_pci_setup() - Allocate PCIe related resources for our card
910  */
911 static int genwqe_pci_setup(struct genwqe_dev *cd)
912 {
913 	int err, bars;
914 	struct pci_dev *pci_dev = cd->pci_dev;
915 
916 	bars = pci_select_bars(pci_dev, IORESOURCE_MEM);
917 	err = pci_enable_device_mem(pci_dev);
918 	if (err) {
919 		dev_err(&pci_dev->dev,
920 			"err: failed to enable pci memory (err=%d)\n", err);
921 		goto err_out;
922 	}
923 
924 	/* Reserve PCI I/O and memory resources */
925 	err = pci_request_selected_regions(pci_dev, bars, genwqe_driver_name);
926 	if (err) {
927 		dev_err(&pci_dev->dev,
928 			"[%s] err: request bars failed (%d)\n", __func__, err);
929 		err = -EIO;
930 		goto err_disable_device;
931 	}
932 
933 	/* check for 64-bit DMA address supported (DAC) */
934 	if (!pci_set_dma_mask(pci_dev, DMA_BIT_MASK(64))) {
935 		err = pci_set_consistent_dma_mask(pci_dev, DMA_BIT_MASK(64));
936 		if (err) {
937 			dev_err(&pci_dev->dev,
938 				"err: DMA64 consistent mask error\n");
939 			err = -EIO;
940 			goto out_release_resources;
941 		}
942 	/* check for 32-bit DMA address supported (SAC) */
943 	} else if (!pci_set_dma_mask(pci_dev, DMA_BIT_MASK(32))) {
944 		err = pci_set_consistent_dma_mask(pci_dev, DMA_BIT_MASK(32));
945 		if (err) {
946 			dev_err(&pci_dev->dev,
947 				"err: DMA32 consistent mask error\n");
948 			err = -EIO;
949 			goto out_release_resources;
950 		}
951 	} else {
952 		dev_err(&pci_dev->dev,
953 			"err: neither DMA32 nor DMA64 supported\n");
954 		err = -EIO;
955 		goto out_release_resources;
956 	}
957 
958 	pci_set_master(pci_dev);
959 	pci_enable_pcie_error_reporting(pci_dev);
960 
961 	/* request complete BAR-0 space (length = 0) */
962 	cd->mmio_len = pci_resource_len(pci_dev, 0);
963 	cd->mmio = pci_iomap(pci_dev, 0, 0);
964 	if (cd->mmio == NULL) {
965 		dev_err(&pci_dev->dev,
966 			"[%s] err: mapping BAR0 failed\n", __func__);
967 		err = -ENOMEM;
968 		goto out_release_resources;
969 	}
970 
971 	cd->num_vfs = pci_sriov_get_totalvfs(pci_dev);
972 
973 	err = genwqe_read_ids(cd);
974 	if (err)
975 		goto out_iounmap;
976 
977 	return 0;
978 
979  out_iounmap:
980 	pci_iounmap(pci_dev, cd->mmio);
981  out_release_resources:
982 	pci_release_selected_regions(pci_dev, bars);
983  err_disable_device:
984 	pci_disable_device(pci_dev);
985  err_out:
986 	return err;
987 }
988 
989 /**
990  * genwqe_pci_remove() - Free PCIe related resources for our card
991  */
992 static void genwqe_pci_remove(struct genwqe_dev *cd)
993 {
994 	int bars;
995 	struct pci_dev *pci_dev = cd->pci_dev;
996 
997 	if (cd->mmio)
998 		pci_iounmap(pci_dev, cd->mmio);
999 
1000 	bars = pci_select_bars(pci_dev, IORESOURCE_MEM);
1001 	pci_release_selected_regions(pci_dev, bars);
1002 	pci_disable_device(pci_dev);
1003 }
1004 
1005 /**
1006  * genwqe_probe() - Device initialization
1007  * @pdev:	PCI device information struct
1008  *
1009  * Callable for multiple cards. This function is called on bind.
1010  *
1011  * Return: 0 if succeeded, < 0 when failed
1012  */
1013 static int genwqe_probe(struct pci_dev *pci_dev,
1014 			const struct pci_device_id *id)
1015 {
1016 	int err;
1017 	struct genwqe_dev *cd;
1018 
1019 	genwqe_init_crc32();
1020 
1021 	cd = genwqe_dev_alloc();
1022 	if (IS_ERR(cd)) {
1023 		dev_err(&pci_dev->dev, "err: could not alloc mem (err=%d)!\n",
1024 			(int)PTR_ERR(cd));
1025 		return PTR_ERR(cd);
1026 	}
1027 
1028 	dev_set_drvdata(&pci_dev->dev, cd);
1029 	cd->pci_dev = pci_dev;
1030 
1031 	err = genwqe_pci_setup(cd);
1032 	if (err < 0) {
1033 		dev_err(&pci_dev->dev,
1034 			"err: problems with PCI setup (err=%d)\n", err);
1035 		goto out_free_dev;
1036 	}
1037 
1038 	err = genwqe_start(cd);
1039 	if (err < 0) {
1040 		dev_err(&pci_dev->dev,
1041 			"err: cannot start card services! (err=%d)\n", err);
1042 		goto out_pci_remove;
1043 	}
1044 
1045 	if (genwqe_is_privileged(cd)) {
1046 		err = genwqe_health_check_start(cd);
1047 		if (err < 0) {
1048 			dev_err(&pci_dev->dev,
1049 				"err: cannot start health checking! "
1050 				"(err=%d)\n", err);
1051 			goto out_stop_services;
1052 		}
1053 	}
1054 	return 0;
1055 
1056  out_stop_services:
1057 	genwqe_stop(cd);
1058  out_pci_remove:
1059 	genwqe_pci_remove(cd);
1060  out_free_dev:
1061 	genwqe_dev_free(cd);
1062 	return err;
1063 }
1064 
1065 /**
1066  * genwqe_remove() - Called when device is removed (hot-plugable)
1067  *
1068  * Or when driver is unloaded respecitively when unbind is done.
1069  */
1070 static void genwqe_remove(struct pci_dev *pci_dev)
1071 {
1072 	struct genwqe_dev *cd = dev_get_drvdata(&pci_dev->dev);
1073 
1074 	genwqe_health_check_stop(cd);
1075 
1076 	/*
1077 	 * genwqe_stop() must survive if it is called twice
1078 	 * sequentially. This happens when the health thread calls it
1079 	 * and fails on genwqe_bus_reset().
1080 	 */
1081 	genwqe_stop(cd);
1082 	genwqe_pci_remove(cd);
1083 	genwqe_dev_free(cd);
1084 }
1085 
1086 /*
1087  * genwqe_err_error_detected() - Error detection callback
1088  *
1089  * This callback is called by the PCI subsystem whenever a PCI bus
1090  * error is detected.
1091  */
1092 static pci_ers_result_t genwqe_err_error_detected(struct pci_dev *pci_dev,
1093 						 enum pci_channel_state state)
1094 {
1095 	struct genwqe_dev *cd;
1096 
1097 	dev_err(&pci_dev->dev, "[%s] state=%d\n", __func__, state);
1098 
1099 	if (pci_dev == NULL)
1100 		return PCI_ERS_RESULT_NEED_RESET;
1101 
1102 	cd = dev_get_drvdata(&pci_dev->dev);
1103 	if (cd == NULL)
1104 		return PCI_ERS_RESULT_NEED_RESET;
1105 
1106 	switch (state) {
1107 	case pci_channel_io_normal:
1108 		return PCI_ERS_RESULT_CAN_RECOVER;
1109 	case pci_channel_io_frozen:
1110 		return PCI_ERS_RESULT_NEED_RESET;
1111 	case pci_channel_io_perm_failure:
1112 		return PCI_ERS_RESULT_DISCONNECT;
1113 	}
1114 
1115 	return PCI_ERS_RESULT_NEED_RESET;
1116 }
1117 
1118 static pci_ers_result_t genwqe_err_result_none(struct pci_dev *dev)
1119 {
1120 	return PCI_ERS_RESULT_NONE;
1121 }
1122 
1123 static void genwqe_err_resume(struct pci_dev *dev)
1124 {
1125 }
1126 
1127 static int genwqe_sriov_configure(struct pci_dev *dev, int numvfs)
1128 {
1129 	struct genwqe_dev *cd = dev_get_drvdata(&dev->dev);
1130 
1131 	if (numvfs > 0) {
1132 		genwqe_setup_vf_jtimer(cd);
1133 		pci_enable_sriov(dev, numvfs);
1134 		return numvfs;
1135 	}
1136 	if (numvfs == 0) {
1137 		pci_disable_sriov(dev);
1138 		return 0;
1139 	}
1140 	return 0;
1141 }
1142 
1143 static struct pci_error_handlers genwqe_err_handler = {
1144 	.error_detected = genwqe_err_error_detected,
1145 	.mmio_enabled	= genwqe_err_result_none,
1146 	.link_reset	= genwqe_err_result_none,
1147 	.slot_reset	= genwqe_err_result_none,
1148 	.resume		= genwqe_err_resume,
1149 };
1150 
1151 static struct pci_driver genwqe_driver = {
1152 	.name	  = genwqe_driver_name,
1153 	.id_table = genwqe_device_table,
1154 	.probe	  = genwqe_probe,
1155 	.remove	  = genwqe_remove,
1156 	.sriov_configure = genwqe_sriov_configure,
1157 	.err_handler = &genwqe_err_handler,
1158 };
1159 
1160 /**
1161  * genwqe_init_module() - Driver registration and initialization
1162  */
1163 static int __init genwqe_init_module(void)
1164 {
1165 	int rc;
1166 
1167 	class_genwqe = class_create(THIS_MODULE, GENWQE_DEVNAME);
1168 	if (IS_ERR(class_genwqe)) {
1169 		pr_err("[%s] create class failed\n", __func__);
1170 		return -ENOMEM;
1171 	}
1172 
1173 	debugfs_genwqe = debugfs_create_dir(GENWQE_DEVNAME, NULL);
1174 	if (!debugfs_genwqe) {
1175 		rc = -ENOMEM;
1176 		goto err_out;
1177 	}
1178 
1179 	rc = pci_register_driver(&genwqe_driver);
1180 	if (rc != 0) {
1181 		pr_err("[%s] pci_reg_driver (rc=%d)\n", __func__, rc);
1182 		goto err_out0;
1183 	}
1184 
1185 	return rc;
1186 
1187  err_out0:
1188 	debugfs_remove(debugfs_genwqe);
1189  err_out:
1190 	class_destroy(class_genwqe);
1191 	return rc;
1192 }
1193 
1194 /**
1195  * genwqe_exit_module() - Driver exit
1196  */
1197 static void __exit genwqe_exit_module(void)
1198 {
1199 	pci_unregister_driver(&genwqe_driver);
1200 	debugfs_remove(debugfs_genwqe);
1201 	class_destroy(class_genwqe);
1202 }
1203 
1204 module_init(genwqe_init_module);
1205 module_exit(genwqe_exit_module);
1206