1 /*
2  * Driver for the Micron P320 SSD
3  *   Copyright (C) 2011 Micron Technology, Inc.
4  *
5  * Portions of this code were derived from works subjected to the
6  * following copyright:
7  *    Copyright (C) 2009 Integrated Device Technology, Inc.
8  *
9  * This program is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU General Public License as published by
11  * the Free Software Foundation; either version 2 of the License, or
12  * (at your option) any later version.
13  *
14  * This program is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  * GNU General Public License for more details.
18  *
19  */
20 
21 #include <linux/pci.h>
22 #include <linux/interrupt.h>
23 #include <linux/ata.h>
24 #include <linux/delay.h>
25 #include <linux/hdreg.h>
26 #include <linux/uaccess.h>
27 #include <linux/random.h>
28 #include <linux/smp.h>
29 #include <linux/compat.h>
30 #include <linux/fs.h>
31 #include <linux/module.h>
32 #include <linux/genhd.h>
33 #include <linux/blkdev.h>
34 #include <linux/bio.h>
35 #include <linux/dma-mapping.h>
36 #include <linux/idr.h>
37 #include <linux/kthread.h>
38 #include <../drivers/ata/ahci.h>
39 #include <linux/export.h>
40 #include <linux/debugfs.h>
41 #include "mtip32xx.h"
42 
43 #define HW_CMD_SLOT_SZ		(MTIP_MAX_COMMAND_SLOTS * 32)
44 #define HW_CMD_TBL_SZ		(AHCI_CMD_TBL_HDR_SZ + (MTIP_MAX_SG * 16))
45 #define HW_CMD_TBL_AR_SZ	(HW_CMD_TBL_SZ * MTIP_MAX_COMMAND_SLOTS)
46 #define HW_PORT_PRIV_DMA_SZ \
47 		(HW_CMD_SLOT_SZ + HW_CMD_TBL_AR_SZ + AHCI_RX_FIS_SZ)
48 
49 #define HOST_CAP_NZDMA		(1 << 19)
50 #define HOST_HSORG		0xFC
51 #define HSORG_DISABLE_SLOTGRP_INTR (1<<24)
52 #define HSORG_DISABLE_SLOTGRP_PXIS (1<<16)
53 #define HSORG_HWREV		0xFF00
54 #define HSORG_STYLE		0x8
55 #define HSORG_SLOTGROUPS	0x7
56 
57 #define PORT_COMMAND_ISSUE	0x38
58 #define PORT_SDBV		0x7C
59 
60 #define PORT_OFFSET		0x100
61 #define PORT_MEM_SIZE		0x80
62 
63 #define PORT_IRQ_ERR \
64 	(PORT_IRQ_HBUS_ERR | PORT_IRQ_IF_ERR | PORT_IRQ_CONNECT | \
65 	 PORT_IRQ_PHYRDY | PORT_IRQ_UNK_FIS | PORT_IRQ_BAD_PMP | \
66 	 PORT_IRQ_TF_ERR | PORT_IRQ_HBUS_DATA_ERR | PORT_IRQ_IF_NONFATAL | \
67 	 PORT_IRQ_OVERFLOW)
68 #define PORT_IRQ_LEGACY \
69 	(PORT_IRQ_PIOS_FIS | PORT_IRQ_D2H_REG_FIS)
70 #define PORT_IRQ_HANDLED \
71 	(PORT_IRQ_SDB_FIS | PORT_IRQ_LEGACY | \
72 	 PORT_IRQ_TF_ERR | PORT_IRQ_IF_ERR | \
73 	 PORT_IRQ_CONNECT | PORT_IRQ_PHYRDY)
74 #define DEF_PORT_IRQ \
75 	(PORT_IRQ_ERR | PORT_IRQ_LEGACY | PORT_IRQ_SDB_FIS)
76 
77 /* product numbers */
78 #define MTIP_PRODUCT_UNKNOWN	0x00
79 #define MTIP_PRODUCT_ASICFPGA	0x11
80 
81 /* Device instance number, incremented each time a device is probed. */
82 static int instance;
83 
84 struct list_head online_list;
85 struct list_head removing_list;
86 spinlock_t dev_lock;
87 
88 /*
89  * Global variable used to hold the major block device number
90  * allocated in mtip_init().
91  */
92 static int mtip_major;
93 static struct dentry *dfs_parent;
94 static struct dentry *dfs_device_status;
95 
96 static u32 cpu_use[NR_CPUS];
97 
98 static DEFINE_SPINLOCK(rssd_index_lock);
99 static DEFINE_IDA(rssd_index_ida);
100 
101 static int mtip_block_initialize(struct driver_data *dd);
102 
103 #ifdef CONFIG_COMPAT
104 struct mtip_compat_ide_task_request_s {
105 	__u8		io_ports[8];
106 	__u8		hob_ports[8];
107 	ide_reg_valid_t	out_flags;
108 	ide_reg_valid_t	in_flags;
109 	int		data_phase;
110 	int		req_cmd;
111 	compat_ulong_t	out_size;
112 	compat_ulong_t	in_size;
113 };
114 #endif
115 
116 /*
117  * This function check_for_surprise_removal is called
118  * while card is removed from the system and it will
119  * read the vendor id from the configration space
120  *
121  * @pdev Pointer to the pci_dev structure.
122  *
123  * return value
124  *	 true if device removed, else false
125  */
126 static bool mtip_check_surprise_removal(struct pci_dev *pdev)
127 {
128 	u16 vendor_id = 0;
129 	struct driver_data *dd = pci_get_drvdata(pdev);
130 
131 	if (dd->sr)
132 		return true;
133 
134        /* Read the vendorID from the configuration space */
135 	pci_read_config_word(pdev, 0x00, &vendor_id);
136 	if (vendor_id == 0xFFFF) {
137 		dd->sr = true;
138 		if (dd->queue)
139 			set_bit(QUEUE_FLAG_DEAD, &dd->queue->queue_flags);
140 		else
141 			dev_warn(&dd->pdev->dev,
142 				"%s: dd->queue is NULL\n", __func__);
143 		if (dd->port) {
144 			set_bit(MTIP_PF_SR_CLEANUP_BIT, &dd->port->flags);
145 			wake_up_interruptible(&dd->port->svc_wait);
146 		} else
147 			dev_warn(&dd->pdev->dev,
148 				"%s: dd->port is NULL\n", __func__);
149 		return true; /* device removed */
150 	}
151 
152 	return false; /* device present */
153 }
154 
155 /*
156  * Obtain an empty command slot.
157  *
158  * This function needs to be reentrant since it could be called
159  * at the same time on multiple CPUs. The allocation of the
160  * command slot must be atomic.
161  *
162  * @port Pointer to the port data structure.
163  *
164  * return value
165  *	>= 0	Index of command slot obtained.
166  *	-1	No command slots available.
167  */
168 static int get_slot(struct mtip_port *port)
169 {
170 	int slot, i;
171 	unsigned int num_command_slots = port->dd->slot_groups * 32;
172 
173 	/*
174 	 * Try 10 times, because there is a small race here.
175 	 *  that's ok, because it's still cheaper than a lock.
176 	 *
177 	 * Race: Since this section is not protected by lock, same bit
178 	 * could be chosen by different process contexts running in
179 	 * different processor. So instead of costly lock, we are going
180 	 * with loop.
181 	 */
182 	for (i = 0; i < 10; i++) {
183 		slot = find_next_zero_bit(port->allocated,
184 					 num_command_slots, 1);
185 		if ((slot < num_command_slots) &&
186 		    (!test_and_set_bit(slot, port->allocated)))
187 			return slot;
188 	}
189 	dev_warn(&port->dd->pdev->dev, "Failed to get a tag.\n");
190 
191 	mtip_check_surprise_removal(port->dd->pdev);
192 	return -1;
193 }
194 
195 /*
196  * Release a command slot.
197  *
198  * @port Pointer to the port data structure.
199  * @tag  Tag of command to release
200  *
201  * return value
202  *	None
203  */
204 static inline void release_slot(struct mtip_port *port, int tag)
205 {
206 	smp_mb__before_clear_bit();
207 	clear_bit(tag, port->allocated);
208 	smp_mb__after_clear_bit();
209 }
210 
211 /*
212  * IO completion function.
213  *
214  * This completion function is called by the driver ISR when a
215  * command that was issued by the kernel completes. It first calls the
216  * asynchronous completion function which normally calls back into the block
217  * layer passing the asynchronous callback data, then unmaps the
218  * scatter list associated with the completed command, and finally
219  * clears the allocated bit associated with the completed command.
220  *
221  * @port   Pointer to the port data structure.
222  * @tag    Tag of the command.
223  * @data   Pointer to driver_data.
224  * @status Completion status.
225  *
226  * return value
227  *	None
228  */
229 static void mtip_async_complete(struct mtip_port *port,
230 				int tag,
231 				void *data,
232 				int status)
233 {
234 	struct mtip_cmd *command;
235 	struct driver_data *dd = data;
236 	int cb_status = status ? -EIO : 0;
237 
238 	if (unlikely(!dd) || unlikely(!port))
239 		return;
240 
241 	command = &port->commands[tag];
242 
243 	if (unlikely(status == PORT_IRQ_TF_ERR)) {
244 		dev_warn(&port->dd->pdev->dev,
245 			"Command tag %d failed due to TFE\n", tag);
246 	}
247 
248 	/* Upper layer callback */
249 	if (likely(command->async_callback))
250 		command->async_callback(command->async_data, cb_status);
251 
252 	command->async_callback = NULL;
253 	command->comp_func = NULL;
254 
255 	/* Unmap the DMA scatter list entries */
256 	dma_unmap_sg(&dd->pdev->dev,
257 		command->sg,
258 		command->scatter_ents,
259 		command->direction);
260 
261 	/* Clear the allocated and active bits for the command */
262 	atomic_set(&port->commands[tag].active, 0);
263 	release_slot(port, tag);
264 
265 	up(&port->cmd_slot);
266 }
267 
268 /*
269  * This function is called for clean the pending command in the
270  * command slot during the surprise removal of device and return
271  * error to the upper layer.
272  *
273  * @dd Pointer to the DRIVER_DATA structure.
274  *
275  * return value
276  *	None
277  */
278 static void mtip_command_cleanup(struct driver_data *dd)
279 {
280 	int tag = 0;
281 	struct mtip_cmd *cmd;
282 	struct mtip_port *port = dd->port;
283 	unsigned int num_cmd_slots = dd->slot_groups * 32;
284 
285 	if (!test_bit(MTIP_DDF_INIT_DONE_BIT, &dd->dd_flag))
286 		return;
287 
288 	if (!port)
289 		return;
290 
291 	cmd = &port->commands[MTIP_TAG_INTERNAL];
292 	if (atomic_read(&cmd->active))
293 		if (readl(port->cmd_issue[MTIP_TAG_INTERNAL]) &
294 					(1 << MTIP_TAG_INTERNAL))
295 			if (cmd->comp_func)
296 				cmd->comp_func(port, MTIP_TAG_INTERNAL,
297 					 cmd->comp_data, -ENODEV);
298 
299 	while (1) {
300 		tag = find_next_bit(port->allocated, num_cmd_slots, tag);
301 		if (tag >= num_cmd_slots)
302 			break;
303 
304 		cmd = &port->commands[tag];
305 		if (atomic_read(&cmd->active))
306 			mtip_async_complete(port, tag, dd, -ENODEV);
307 	}
308 
309 	set_bit(MTIP_DDF_CLEANUP_BIT, &dd->dd_flag);
310 }
311 
312 /*
313  * Reset the HBA (without sleeping)
314  *
315  * @dd Pointer to the driver data structure.
316  *
317  * return value
318  *	0	The reset was successful.
319  *	-1	The HBA Reset bit did not clear.
320  */
321 static int mtip_hba_reset(struct driver_data *dd)
322 {
323 	unsigned long timeout;
324 
325 	/* Set the reset bit */
326 	writel(HOST_RESET, dd->mmio + HOST_CTL);
327 
328 	/* Flush */
329 	readl(dd->mmio + HOST_CTL);
330 
331 	/* Spin for up to 2 seconds, waiting for reset acknowledgement */
332 	timeout = jiffies + msecs_to_jiffies(2000);
333 	do {
334 		mdelay(10);
335 		if (test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag))
336 			return -1;
337 
338 	} while ((readl(dd->mmio + HOST_CTL) & HOST_RESET)
339 		 && time_before(jiffies, timeout));
340 
341 	if (readl(dd->mmio + HOST_CTL) & HOST_RESET)
342 		return -1;
343 
344 	return 0;
345 }
346 
347 /*
348  * Issue a command to the hardware.
349  *
350  * Set the appropriate bit in the s_active and Command Issue hardware
351  * registers, causing hardware command processing to begin.
352  *
353  * @port Pointer to the port structure.
354  * @tag  The tag of the command to be issued.
355  *
356  * return value
357  *      None
358  */
359 static inline void mtip_issue_ncq_command(struct mtip_port *port, int tag)
360 {
361 	int group = tag >> 5;
362 
363 	atomic_set(&port->commands[tag].active, 1);
364 
365 	/* guard SACT and CI registers */
366 	spin_lock(&port->cmd_issue_lock[group]);
367 	writel((1 << MTIP_TAG_BIT(tag)),
368 			port->s_active[MTIP_TAG_INDEX(tag)]);
369 	writel((1 << MTIP_TAG_BIT(tag)),
370 			port->cmd_issue[MTIP_TAG_INDEX(tag)]);
371 	spin_unlock(&port->cmd_issue_lock[group]);
372 
373 	/* Set the command's timeout value.*/
374 	port->commands[tag].comp_time = jiffies + msecs_to_jiffies(
375 					MTIP_NCQ_COMMAND_TIMEOUT_MS);
376 }
377 
378 /*
379  * Enable/disable the reception of FIS
380  *
381  * @port   Pointer to the port data structure
382  * @enable 1 to enable, 0 to disable
383  *
384  * return value
385  *	Previous state: 1 enabled, 0 disabled
386  */
387 static int mtip_enable_fis(struct mtip_port *port, int enable)
388 {
389 	u32 tmp;
390 
391 	/* enable FIS reception */
392 	tmp = readl(port->mmio + PORT_CMD);
393 	if (enable)
394 		writel(tmp | PORT_CMD_FIS_RX, port->mmio + PORT_CMD);
395 	else
396 		writel(tmp & ~PORT_CMD_FIS_RX, port->mmio + PORT_CMD);
397 
398 	/* Flush */
399 	readl(port->mmio + PORT_CMD);
400 
401 	return (((tmp & PORT_CMD_FIS_RX) == PORT_CMD_FIS_RX));
402 }
403 
404 /*
405  * Enable/disable the DMA engine
406  *
407  * @port   Pointer to the port data structure
408  * @enable 1 to enable, 0 to disable
409  *
410  * return value
411  *	Previous state: 1 enabled, 0 disabled.
412  */
413 static int mtip_enable_engine(struct mtip_port *port, int enable)
414 {
415 	u32 tmp;
416 
417 	/* enable FIS reception */
418 	tmp = readl(port->mmio + PORT_CMD);
419 	if (enable)
420 		writel(tmp | PORT_CMD_START, port->mmio + PORT_CMD);
421 	else
422 		writel(tmp & ~PORT_CMD_START, port->mmio + PORT_CMD);
423 
424 	readl(port->mmio + PORT_CMD);
425 	return (((tmp & PORT_CMD_START) == PORT_CMD_START));
426 }
427 
428 /*
429  * Enables the port DMA engine and FIS reception.
430  *
431  * return value
432  *	None
433  */
434 static inline void mtip_start_port(struct mtip_port *port)
435 {
436 	/* Enable FIS reception */
437 	mtip_enable_fis(port, 1);
438 
439 	/* Enable the DMA engine */
440 	mtip_enable_engine(port, 1);
441 }
442 
443 /*
444  * Deinitialize a port by disabling port interrupts, the DMA engine,
445  * and FIS reception.
446  *
447  * @port Pointer to the port structure
448  *
449  * return value
450  *	None
451  */
452 static inline void mtip_deinit_port(struct mtip_port *port)
453 {
454 	/* Disable interrupts on this port */
455 	writel(0, port->mmio + PORT_IRQ_MASK);
456 
457 	/* Disable the DMA engine */
458 	mtip_enable_engine(port, 0);
459 
460 	/* Disable FIS reception */
461 	mtip_enable_fis(port, 0);
462 }
463 
464 /*
465  * Initialize a port.
466  *
467  * This function deinitializes the port by calling mtip_deinit_port() and
468  * then initializes it by setting the command header and RX FIS addresses,
469  * clearing the SError register and any pending port interrupts before
470  * re-enabling the default set of port interrupts.
471  *
472  * @port Pointer to the port structure.
473  *
474  * return value
475  *	None
476  */
477 static void mtip_init_port(struct mtip_port *port)
478 {
479 	int i;
480 	mtip_deinit_port(port);
481 
482 	/* Program the command list base and FIS base addresses */
483 	if (readl(port->dd->mmio + HOST_CAP) & HOST_CAP_64) {
484 		writel((port->command_list_dma >> 16) >> 16,
485 			 port->mmio + PORT_LST_ADDR_HI);
486 		writel((port->rxfis_dma >> 16) >> 16,
487 			 port->mmio + PORT_FIS_ADDR_HI);
488 	}
489 
490 	writel(port->command_list_dma & 0xFFFFFFFF,
491 			port->mmio + PORT_LST_ADDR);
492 	writel(port->rxfis_dma & 0xFFFFFFFF, port->mmio + PORT_FIS_ADDR);
493 
494 	/* Clear SError */
495 	writel(readl(port->mmio + PORT_SCR_ERR), port->mmio + PORT_SCR_ERR);
496 
497 	/* reset the completed registers.*/
498 	for (i = 0; i < port->dd->slot_groups; i++)
499 		writel(0xFFFFFFFF, port->completed[i]);
500 
501 	/* Clear any pending interrupts for this port */
502 	writel(readl(port->mmio + PORT_IRQ_STAT), port->mmio + PORT_IRQ_STAT);
503 
504 	/* Clear any pending interrupts on the HBA. */
505 	writel(readl(port->dd->mmio + HOST_IRQ_STAT),
506 					port->dd->mmio + HOST_IRQ_STAT);
507 
508 	/* Enable port interrupts */
509 	writel(DEF_PORT_IRQ, port->mmio + PORT_IRQ_MASK);
510 }
511 
512 /*
513  * Restart a port
514  *
515  * @port Pointer to the port data structure.
516  *
517  * return value
518  *	None
519  */
520 static void mtip_restart_port(struct mtip_port *port)
521 {
522 	unsigned long timeout;
523 
524 	/* Disable the DMA engine */
525 	mtip_enable_engine(port, 0);
526 
527 	/* Chip quirk: wait up to 500ms for PxCMD.CR == 0 */
528 	timeout = jiffies + msecs_to_jiffies(500);
529 	while ((readl(port->mmio + PORT_CMD) & PORT_CMD_LIST_ON)
530 		 && time_before(jiffies, timeout))
531 		;
532 
533 	if (test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &port->dd->dd_flag))
534 		return;
535 
536 	/*
537 	 * Chip quirk: escalate to hba reset if
538 	 * PxCMD.CR not clear after 500 ms
539 	 */
540 	if (readl(port->mmio + PORT_CMD) & PORT_CMD_LIST_ON) {
541 		dev_warn(&port->dd->pdev->dev,
542 			"PxCMD.CR not clear, escalating reset\n");
543 
544 		if (mtip_hba_reset(port->dd))
545 			dev_err(&port->dd->pdev->dev,
546 				"HBA reset escalation failed.\n");
547 
548 		/* 30 ms delay before com reset to quiesce chip */
549 		mdelay(30);
550 	}
551 
552 	dev_warn(&port->dd->pdev->dev, "Issuing COM reset\n");
553 
554 	/* Set PxSCTL.DET */
555 	writel(readl(port->mmio + PORT_SCR_CTL) |
556 			 1, port->mmio + PORT_SCR_CTL);
557 	readl(port->mmio + PORT_SCR_CTL);
558 
559 	/* Wait 1 ms to quiesce chip function */
560 	timeout = jiffies + msecs_to_jiffies(1);
561 	while (time_before(jiffies, timeout))
562 		;
563 
564 	if (test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &port->dd->dd_flag))
565 		return;
566 
567 	/* Clear PxSCTL.DET */
568 	writel(readl(port->mmio + PORT_SCR_CTL) & ~1,
569 			 port->mmio + PORT_SCR_CTL);
570 	readl(port->mmio + PORT_SCR_CTL);
571 
572 	/* Wait 500 ms for bit 0 of PORT_SCR_STS to be set */
573 	timeout = jiffies + msecs_to_jiffies(500);
574 	while (((readl(port->mmio + PORT_SCR_STAT) & 0x01) == 0)
575 			 && time_before(jiffies, timeout))
576 		;
577 
578 	if (test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &port->dd->dd_flag))
579 		return;
580 
581 	if ((readl(port->mmio + PORT_SCR_STAT) & 0x01) == 0)
582 		dev_warn(&port->dd->pdev->dev,
583 			"COM reset failed\n");
584 
585 	mtip_init_port(port);
586 	mtip_start_port(port);
587 
588 }
589 
590 static int mtip_device_reset(struct driver_data *dd)
591 {
592 	int rv = 0;
593 
594 	if (mtip_check_surprise_removal(dd->pdev))
595 		return 0;
596 
597 	if (mtip_hba_reset(dd) < 0)
598 		rv = -EFAULT;
599 
600 	mdelay(1);
601 	mtip_init_port(dd->port);
602 	mtip_start_port(dd->port);
603 
604 	/* Enable interrupts on the HBA. */
605 	writel(readl(dd->mmio + HOST_CTL) | HOST_IRQ_EN,
606 					dd->mmio + HOST_CTL);
607 	return rv;
608 }
609 
610 /*
611  * Helper function for tag logging
612  */
613 static void print_tags(struct driver_data *dd,
614 			char *msg,
615 			unsigned long *tagbits,
616 			int cnt)
617 {
618 	unsigned char tagmap[128];
619 	int group, tagmap_len = 0;
620 
621 	memset(tagmap, 0, sizeof(tagmap));
622 	for (group = SLOTBITS_IN_LONGS; group > 0; group--)
623 		tagmap_len = sprintf(tagmap + tagmap_len, "%016lX ",
624 						tagbits[group-1]);
625 	dev_warn(&dd->pdev->dev,
626 			"%d command(s) %s: tagmap [%s]", cnt, msg, tagmap);
627 }
628 
629 /*
630  * Called periodically to see if any read/write commands are
631  * taking too long to complete.
632  *
633  * @data Pointer to the PORT data structure.
634  *
635  * return value
636  *	None
637  */
638 static void mtip_timeout_function(unsigned long int data)
639 {
640 	struct mtip_port *port = (struct mtip_port *) data;
641 	struct host_to_dev_fis *fis;
642 	struct mtip_cmd *command;
643 	int tag, cmdto_cnt = 0;
644 	unsigned int bit, group;
645 	unsigned int num_command_slots;
646 	unsigned long to, tagaccum[SLOTBITS_IN_LONGS];
647 
648 	if (unlikely(!port))
649 		return;
650 
651 	if (unlikely(port->dd->sr))
652 		return;
653 
654 	if (test_bit(MTIP_DDF_RESUME_BIT, &port->dd->dd_flag)) {
655 		mod_timer(&port->cmd_timer,
656 			jiffies + msecs_to_jiffies(30000));
657 		return;
658 	}
659 	/* clear the tag accumulator */
660 	memset(tagaccum, 0, SLOTBITS_IN_LONGS * sizeof(long));
661 	num_command_slots = port->dd->slot_groups * 32;
662 
663 	for (tag = 0; tag < num_command_slots; tag++) {
664 		/*
665 		 * Skip internal command slot as it has
666 		 * its own timeout mechanism
667 		 */
668 		if (tag == MTIP_TAG_INTERNAL)
669 			continue;
670 
671 		if (atomic_read(&port->commands[tag].active) &&
672 		   (time_after(jiffies, port->commands[tag].comp_time))) {
673 			group = tag >> 5;
674 			bit = tag & 0x1F;
675 
676 			command = &port->commands[tag];
677 			fis = (struct host_to_dev_fis *) command->command;
678 
679 			set_bit(tag, tagaccum);
680 			cmdto_cnt++;
681 			if (cmdto_cnt == 1)
682 				set_bit(MTIP_PF_EH_ACTIVE_BIT, &port->flags);
683 
684 			/*
685 			 * Clear the completed bit. This should prevent
686 			 *  any interrupt handlers from trying to retire
687 			 *  the command.
688 			 */
689 			writel(1 << bit, port->completed[group]);
690 
691 			/* Call the async completion callback. */
692 			if (likely(command->async_callback))
693 				command->async_callback(command->async_data,
694 							 -EIO);
695 			command->async_callback = NULL;
696 			command->comp_func = NULL;
697 
698 			/* Unmap the DMA scatter list entries */
699 			dma_unmap_sg(&port->dd->pdev->dev,
700 					command->sg,
701 					command->scatter_ents,
702 					command->direction);
703 
704 			/*
705 			 * Clear the allocated bit and active tag for the
706 			 * command.
707 			 */
708 			atomic_set(&port->commands[tag].active, 0);
709 			release_slot(port, tag);
710 
711 			up(&port->cmd_slot);
712 		}
713 	}
714 
715 	if (cmdto_cnt) {
716 		print_tags(port->dd, "timed out", tagaccum, cmdto_cnt);
717 		if (!test_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags)) {
718 			mtip_device_reset(port->dd);
719 			wake_up_interruptible(&port->svc_wait);
720 		}
721 		clear_bit(MTIP_PF_EH_ACTIVE_BIT, &port->flags);
722 	}
723 
724 	if (port->ic_pause_timer) {
725 		to  = port->ic_pause_timer + msecs_to_jiffies(1000);
726 		if (time_after(jiffies, to)) {
727 			if (!test_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags)) {
728 				port->ic_pause_timer = 0;
729 				clear_bit(MTIP_PF_SE_ACTIVE_BIT, &port->flags);
730 				clear_bit(MTIP_PF_DM_ACTIVE_BIT, &port->flags);
731 				clear_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags);
732 				wake_up_interruptible(&port->svc_wait);
733 			}
734 
735 
736 		}
737 	}
738 
739 	/* Restart the timer */
740 	mod_timer(&port->cmd_timer,
741 		jiffies + msecs_to_jiffies(MTIP_TIMEOUT_CHECK_PERIOD));
742 }
743 
744 /*
745  * Internal command completion callback function.
746  *
747  * This function is normally called by the driver ISR when an internal
748  * command completed. This function signals the command completion by
749  * calling complete().
750  *
751  * @port   Pointer to the port data structure.
752  * @tag    Tag of the command that has completed.
753  * @data   Pointer to a completion structure.
754  * @status Completion status.
755  *
756  * return value
757  *	None
758  */
759 static void mtip_completion(struct mtip_port *port,
760 			    int tag,
761 			    void *data,
762 			    int status)
763 {
764 	struct mtip_cmd *command = &port->commands[tag];
765 	struct completion *waiting = data;
766 	if (unlikely(status == PORT_IRQ_TF_ERR))
767 		dev_warn(&port->dd->pdev->dev,
768 			"Internal command %d completed with TFE\n", tag);
769 
770 	command->async_callback = NULL;
771 	command->comp_func = NULL;
772 
773 	complete(waiting);
774 }
775 
776 static void mtip_null_completion(struct mtip_port *port,
777 			    int tag,
778 			    void *data,
779 			    int status)
780 {
781 	return;
782 }
783 
784 static int mtip_read_log_page(struct mtip_port *port, u8 page, u16 *buffer,
785 				dma_addr_t buffer_dma, unsigned int sectors);
786 static int mtip_get_smart_attr(struct mtip_port *port, unsigned int id,
787 						struct smart_attr *attrib);
788 /*
789  * Handle an error.
790  *
791  * @dd Pointer to the DRIVER_DATA structure.
792  *
793  * return value
794  *	None
795  */
796 static void mtip_handle_tfe(struct driver_data *dd)
797 {
798 	int group, tag, bit, reissue, rv;
799 	struct mtip_port *port;
800 	struct mtip_cmd  *cmd;
801 	u32 completed;
802 	struct host_to_dev_fis *fis;
803 	unsigned long tagaccum[SLOTBITS_IN_LONGS];
804 	unsigned int cmd_cnt = 0;
805 	unsigned char *buf;
806 	char *fail_reason = NULL;
807 	int fail_all_ncq_write = 0, fail_all_ncq_cmds = 0;
808 
809 	dev_warn(&dd->pdev->dev, "Taskfile error\n");
810 
811 	port = dd->port;
812 
813 	/* Stop the timer to prevent command timeouts. */
814 	del_timer(&port->cmd_timer);
815 	set_bit(MTIP_PF_EH_ACTIVE_BIT, &port->flags);
816 
817 	if (test_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags) &&
818 			test_bit(MTIP_TAG_INTERNAL, port->allocated)) {
819 		cmd = &port->commands[MTIP_TAG_INTERNAL];
820 		dbg_printk(MTIP_DRV_NAME " TFE for the internal command\n");
821 
822 		atomic_inc(&cmd->active); /* active > 1 indicates error */
823 		if (cmd->comp_data && cmd->comp_func) {
824 			cmd->comp_func(port, MTIP_TAG_INTERNAL,
825 					cmd->comp_data, PORT_IRQ_TF_ERR);
826 		}
827 		goto handle_tfe_exit;
828 	}
829 
830 	/* clear the tag accumulator */
831 	memset(tagaccum, 0, SLOTBITS_IN_LONGS * sizeof(long));
832 
833 	/* Loop through all the groups */
834 	for (group = 0; group < dd->slot_groups; group++) {
835 		completed = readl(port->completed[group]);
836 
837 		/* clear completed status register in the hardware.*/
838 		writel(completed, port->completed[group]);
839 
840 		/* Process successfully completed commands */
841 		for (bit = 0; bit < 32 && completed; bit++) {
842 			if (!(completed & (1<<bit)))
843 				continue;
844 			tag = (group << 5) + bit;
845 
846 			/* Skip the internal command slot */
847 			if (tag == MTIP_TAG_INTERNAL)
848 				continue;
849 
850 			cmd = &port->commands[tag];
851 			if (likely(cmd->comp_func)) {
852 				set_bit(tag, tagaccum);
853 				cmd_cnt++;
854 				atomic_set(&cmd->active, 0);
855 				cmd->comp_func(port,
856 					 tag,
857 					 cmd->comp_data,
858 					 0);
859 			} else {
860 				dev_err(&port->dd->pdev->dev,
861 					"Missing completion func for tag %d",
862 					tag);
863 				if (mtip_check_surprise_removal(dd->pdev)) {
864 					/* don't proceed further */
865 					return;
866 				}
867 			}
868 		}
869 	}
870 
871 	print_tags(dd, "completed (TFE)", tagaccum, cmd_cnt);
872 
873 	/* Restart the port */
874 	mdelay(20);
875 	mtip_restart_port(port);
876 
877 	/* Trying to determine the cause of the error */
878 	rv = mtip_read_log_page(dd->port, ATA_LOG_SATA_NCQ,
879 				dd->port->log_buf,
880 				dd->port->log_buf_dma, 1);
881 	if (rv) {
882 		dev_warn(&dd->pdev->dev,
883 			"Error in READ LOG EXT (10h) command\n");
884 		/* non-critical error, don't fail the load */
885 	} else {
886 		buf = (unsigned char *)dd->port->log_buf;
887 		if (buf[259] & 0x1) {
888 			dev_info(&dd->pdev->dev,
889 				"Write protect bit is set.\n");
890 			set_bit(MTIP_DDF_WRITE_PROTECT_BIT, &dd->dd_flag);
891 			fail_all_ncq_write = 1;
892 			fail_reason = "write protect";
893 		}
894 		if (buf[288] == 0xF7) {
895 			dev_info(&dd->pdev->dev,
896 				"Exceeded Tmax, drive in thermal shutdown.\n");
897 			set_bit(MTIP_DDF_OVER_TEMP_BIT, &dd->dd_flag);
898 			fail_all_ncq_cmds = 1;
899 			fail_reason = "thermal shutdown";
900 		}
901 		if (buf[288] == 0xBF) {
902 			dev_info(&dd->pdev->dev,
903 				"Drive indicates rebuild has failed.\n");
904 			fail_all_ncq_cmds = 1;
905 			fail_reason = "rebuild failed";
906 		}
907 	}
908 
909 	/* clear the tag accumulator */
910 	memset(tagaccum, 0, SLOTBITS_IN_LONGS * sizeof(long));
911 
912 	/* Loop through all the groups */
913 	for (group = 0; group < dd->slot_groups; group++) {
914 		for (bit = 0; bit < 32; bit++) {
915 			reissue = 1;
916 			tag = (group << 5) + bit;
917 			cmd = &port->commands[tag];
918 
919 			/* If the active bit is set re-issue the command */
920 			if (atomic_read(&cmd->active) == 0)
921 				continue;
922 
923 			fis = (struct host_to_dev_fis *)cmd->command;
924 
925 			/* Should re-issue? */
926 			if (tag == MTIP_TAG_INTERNAL ||
927 			    fis->command == ATA_CMD_SET_FEATURES)
928 				reissue = 0;
929 			else {
930 				if (fail_all_ncq_cmds ||
931 					(fail_all_ncq_write &&
932 					fis->command == ATA_CMD_FPDMA_WRITE)) {
933 					dev_warn(&dd->pdev->dev,
934 					"  Fail: %s w/tag %d [%s].\n",
935 					fis->command == ATA_CMD_FPDMA_WRITE ?
936 						"write" : "read",
937 					tag,
938 					fail_reason != NULL ?
939 						fail_reason : "unknown");
940 					atomic_set(&cmd->active, 0);
941 					if (cmd->comp_func) {
942 						cmd->comp_func(port, tag,
943 							cmd->comp_data,
944 							-ENODATA);
945 					}
946 					continue;
947 				}
948 			}
949 
950 			/*
951 			 * First check if this command has
952 			 *  exceeded its retries.
953 			 */
954 			if (reissue && (cmd->retries-- > 0)) {
955 
956 				set_bit(tag, tagaccum);
957 
958 				/* Re-issue the command. */
959 				mtip_issue_ncq_command(port, tag);
960 
961 				continue;
962 			}
963 
964 			/* Retire a command that will not be reissued */
965 			dev_warn(&port->dd->pdev->dev,
966 				"retiring tag %d\n", tag);
967 			atomic_set(&cmd->active, 0);
968 
969 			if (cmd->comp_func)
970 				cmd->comp_func(
971 					port,
972 					tag,
973 					cmd->comp_data,
974 					PORT_IRQ_TF_ERR);
975 			else
976 				dev_warn(&port->dd->pdev->dev,
977 					"Bad completion for tag %d\n",
978 					tag);
979 		}
980 	}
981 	print_tags(dd, "reissued (TFE)", tagaccum, cmd_cnt);
982 
983 handle_tfe_exit:
984 	/* clear eh_active */
985 	clear_bit(MTIP_PF_EH_ACTIVE_BIT, &port->flags);
986 	wake_up_interruptible(&port->svc_wait);
987 
988 	mod_timer(&port->cmd_timer,
989 		 jiffies + msecs_to_jiffies(MTIP_TIMEOUT_CHECK_PERIOD));
990 }
991 
992 /*
993  * Handle a set device bits interrupt
994  */
995 static inline void mtip_workq_sdbfx(struct mtip_port *port, int group,
996 							u32 completed)
997 {
998 	struct driver_data *dd = port->dd;
999 	int tag, bit;
1000 	struct mtip_cmd *command;
1001 
1002 	if (!completed) {
1003 		WARN_ON_ONCE(!completed);
1004 		return;
1005 	}
1006 	/* clear completed status register in the hardware.*/
1007 	writel(completed, port->completed[group]);
1008 
1009 	/* Process completed commands. */
1010 	for (bit = 0; (bit < 32) && completed; bit++) {
1011 		if (completed & 0x01) {
1012 			tag = (group << 5) | bit;
1013 
1014 			/* skip internal command slot. */
1015 			if (unlikely(tag == MTIP_TAG_INTERNAL))
1016 				continue;
1017 
1018 			command = &port->commands[tag];
1019 			/* make internal callback */
1020 			if (likely(command->comp_func)) {
1021 				command->comp_func(
1022 					port,
1023 					tag,
1024 					command->comp_data,
1025 					0);
1026 			} else {
1027 				dev_dbg(&dd->pdev->dev,
1028 					"Null completion for tag %d",
1029 					tag);
1030 
1031 				if (mtip_check_surprise_removal(
1032 					dd->pdev)) {
1033 					return;
1034 				}
1035 			}
1036 		}
1037 		completed >>= 1;
1038 	}
1039 
1040 	/* If last, re-enable interrupts */
1041 	if (atomic_dec_return(&dd->irq_workers_active) == 0)
1042 		writel(0xffffffff, dd->mmio + HOST_IRQ_STAT);
1043 }
1044 
1045 /*
1046  * Process legacy pio and d2h interrupts
1047  */
1048 static inline void mtip_process_legacy(struct driver_data *dd, u32 port_stat)
1049 {
1050 	struct mtip_port *port = dd->port;
1051 	struct mtip_cmd *cmd = &port->commands[MTIP_TAG_INTERNAL];
1052 
1053 	if (test_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags) &&
1054 	    (cmd != NULL) && !(readl(port->cmd_issue[MTIP_TAG_INTERNAL])
1055 		& (1 << MTIP_TAG_INTERNAL))) {
1056 		if (cmd->comp_func) {
1057 			cmd->comp_func(port,
1058 				MTIP_TAG_INTERNAL,
1059 				cmd->comp_data,
1060 				0);
1061 			return;
1062 		}
1063 	}
1064 
1065 	return;
1066 }
1067 
1068 /*
1069  * Demux and handle errors
1070  */
1071 static inline void mtip_process_errors(struct driver_data *dd, u32 port_stat)
1072 {
1073 	if (likely(port_stat & (PORT_IRQ_TF_ERR | PORT_IRQ_IF_ERR)))
1074 		mtip_handle_tfe(dd);
1075 
1076 	if (unlikely(port_stat & PORT_IRQ_CONNECT)) {
1077 		dev_warn(&dd->pdev->dev,
1078 			"Clearing PxSERR.DIAG.x\n");
1079 		writel((1 << 26), dd->port->mmio + PORT_SCR_ERR);
1080 	}
1081 
1082 	if (unlikely(port_stat & PORT_IRQ_PHYRDY)) {
1083 		dev_warn(&dd->pdev->dev,
1084 			"Clearing PxSERR.DIAG.n\n");
1085 		writel((1 << 16), dd->port->mmio + PORT_SCR_ERR);
1086 	}
1087 
1088 	if (unlikely(port_stat & ~PORT_IRQ_HANDLED)) {
1089 		dev_warn(&dd->pdev->dev,
1090 			"Port stat errors %x unhandled\n",
1091 			(port_stat & ~PORT_IRQ_HANDLED));
1092 	}
1093 }
1094 
1095 static inline irqreturn_t mtip_handle_irq(struct driver_data *data)
1096 {
1097 	struct driver_data *dd = (struct driver_data *) data;
1098 	struct mtip_port *port = dd->port;
1099 	u32 hba_stat, port_stat;
1100 	int rv = IRQ_NONE;
1101 	int do_irq_enable = 1, i, workers;
1102 	struct mtip_work *twork;
1103 
1104 	hba_stat = readl(dd->mmio + HOST_IRQ_STAT);
1105 	if (hba_stat) {
1106 		rv = IRQ_HANDLED;
1107 
1108 		/* Acknowledge the interrupt status on the port.*/
1109 		port_stat = readl(port->mmio + PORT_IRQ_STAT);
1110 		writel(port_stat, port->mmio + PORT_IRQ_STAT);
1111 
1112 		/* Demux port status */
1113 		if (likely(port_stat & PORT_IRQ_SDB_FIS)) {
1114 			do_irq_enable = 0;
1115 			WARN_ON_ONCE(atomic_read(&dd->irq_workers_active) != 0);
1116 
1117 			/* Start at 1: group zero is always local? */
1118 			for (i = 0, workers = 0; i < MTIP_MAX_SLOT_GROUPS;
1119 									i++) {
1120 				twork = &dd->work[i];
1121 				twork->completed = readl(port->completed[i]);
1122 				if (twork->completed)
1123 					workers++;
1124 			}
1125 
1126 			atomic_set(&dd->irq_workers_active, workers);
1127 			if (workers) {
1128 				for (i = 1; i < MTIP_MAX_SLOT_GROUPS; i++) {
1129 					twork = &dd->work[i];
1130 					if (twork->completed)
1131 						queue_work_on(
1132 							twork->cpu_binding,
1133 							dd->isr_workq,
1134 							&twork->work);
1135 				}
1136 
1137 				if (likely(dd->work[0].completed))
1138 					mtip_workq_sdbfx(port, 0,
1139 							dd->work[0].completed);
1140 
1141 			} else {
1142 				/*
1143 				 * Chip quirk: SDB interrupt but nothing
1144 				 * to complete
1145 				 */
1146 				do_irq_enable = 1;
1147 			}
1148 		}
1149 
1150 		if (unlikely(port_stat & PORT_IRQ_ERR)) {
1151 			if (unlikely(mtip_check_surprise_removal(dd->pdev))) {
1152 				/* don't proceed further */
1153 				return IRQ_HANDLED;
1154 			}
1155 			if (test_bit(MTIP_DDF_REMOVE_PENDING_BIT,
1156 							&dd->dd_flag))
1157 				return rv;
1158 
1159 			mtip_process_errors(dd, port_stat & PORT_IRQ_ERR);
1160 		}
1161 
1162 		if (unlikely(port_stat & PORT_IRQ_LEGACY))
1163 			mtip_process_legacy(dd, port_stat & PORT_IRQ_LEGACY);
1164 	}
1165 
1166 	/* acknowledge interrupt */
1167 	if (unlikely(do_irq_enable))
1168 		writel(hba_stat, dd->mmio + HOST_IRQ_STAT);
1169 
1170 	return rv;
1171 }
1172 
1173 /*
1174  * HBA interrupt subroutine.
1175  *
1176  * @irq		IRQ number.
1177  * @instance	Pointer to the driver data structure.
1178  *
1179  * return value
1180  *	IRQ_HANDLED	A HBA interrupt was pending and handled.
1181  *	IRQ_NONE	This interrupt was not for the HBA.
1182  */
1183 static irqreturn_t mtip_irq_handler(int irq, void *instance)
1184 {
1185 	struct driver_data *dd = instance;
1186 
1187 	return mtip_handle_irq(dd);
1188 }
1189 
1190 static void mtip_issue_non_ncq_command(struct mtip_port *port, int tag)
1191 {
1192 	atomic_set(&port->commands[tag].active, 1);
1193 	writel(1 << MTIP_TAG_BIT(tag),
1194 		port->cmd_issue[MTIP_TAG_INDEX(tag)]);
1195 }
1196 
1197 static bool mtip_pause_ncq(struct mtip_port *port,
1198 				struct host_to_dev_fis *fis)
1199 {
1200 	struct host_to_dev_fis *reply;
1201 	unsigned long task_file_data;
1202 
1203 	reply = port->rxfis + RX_FIS_D2H_REG;
1204 	task_file_data = readl(port->mmio+PORT_TFDATA);
1205 
1206 	if (fis->command == ATA_CMD_SEC_ERASE_UNIT)
1207 		clear_bit(MTIP_DDF_SEC_LOCK_BIT, &port->dd->dd_flag);
1208 
1209 	if ((task_file_data & 1))
1210 		return false;
1211 
1212 	if (fis->command == ATA_CMD_SEC_ERASE_PREP) {
1213 		set_bit(MTIP_PF_SE_ACTIVE_BIT, &port->flags);
1214 		set_bit(MTIP_DDF_SEC_LOCK_BIT, &port->dd->dd_flag);
1215 		port->ic_pause_timer = jiffies;
1216 		return true;
1217 	} else if ((fis->command == ATA_CMD_DOWNLOAD_MICRO) &&
1218 					(fis->features == 0x03)) {
1219 		set_bit(MTIP_PF_DM_ACTIVE_BIT, &port->flags);
1220 		port->ic_pause_timer = jiffies;
1221 		return true;
1222 	} else if ((fis->command == ATA_CMD_SEC_ERASE_UNIT) ||
1223 		((fis->command == 0xFC) &&
1224 			(fis->features == 0x27 || fis->features == 0x72 ||
1225 			 fis->features == 0x62 || fis->features == 0x26))) {
1226 		/* Com reset after secure erase or lowlevel format */
1227 		mtip_restart_port(port);
1228 		return false;
1229 	}
1230 
1231 	return false;
1232 }
1233 
1234 /*
1235  * Wait for port to quiesce
1236  *
1237  * @port    Pointer to port data structure
1238  * @timeout Max duration to wait (ms)
1239  *
1240  * return value
1241  *	0	Success
1242  *	-EBUSY  Commands still active
1243  */
1244 static int mtip_quiesce_io(struct mtip_port *port, unsigned long timeout)
1245 {
1246 	unsigned long to;
1247 	unsigned int n;
1248 	unsigned int active = 1;
1249 
1250 	to = jiffies + msecs_to_jiffies(timeout);
1251 	do {
1252 		if (test_bit(MTIP_PF_SVC_THD_ACTIVE_BIT, &port->flags) &&
1253 			test_bit(MTIP_PF_ISSUE_CMDS_BIT, &port->flags)) {
1254 			msleep(20);
1255 			continue; /* svc thd is actively issuing commands */
1256 		}
1257 		if (test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &port->dd->dd_flag))
1258 			return -EFAULT;
1259 		/*
1260 		 * Ignore s_active bit 0 of array element 0.
1261 		 * This bit will always be set
1262 		 */
1263 		active = readl(port->s_active[0]) & 0xFFFFFFFE;
1264 		for (n = 1; n < port->dd->slot_groups; n++)
1265 			active |= readl(port->s_active[n]);
1266 
1267 		if (!active)
1268 			break;
1269 
1270 		msleep(20);
1271 	} while (time_before(jiffies, to));
1272 
1273 	return active ? -EBUSY : 0;
1274 }
1275 
1276 /*
1277  * Execute an internal command and wait for the completion.
1278  *
1279  * @port    Pointer to the port data structure.
1280  * @fis     Pointer to the FIS that describes the command.
1281  * @fis_len  Length in WORDS of the FIS.
1282  * @buffer  DMA accessible for command data.
1283  * @buf_len  Length, in bytes, of the data buffer.
1284  * @opts    Command header options, excluding the FIS length
1285  *             and the number of PRD entries.
1286  * @timeout Time in ms to wait for the command to complete.
1287  *
1288  * return value
1289  *	0	 Command completed successfully.
1290  *	-EFAULT  The buffer address is not correctly aligned.
1291  *	-EBUSY   Internal command or other IO in progress.
1292  *	-EAGAIN  Time out waiting for command to complete.
1293  */
1294 static int mtip_exec_internal_command(struct mtip_port *port,
1295 					struct host_to_dev_fis *fis,
1296 					int fis_len,
1297 					dma_addr_t buffer,
1298 					int buf_len,
1299 					u32 opts,
1300 					gfp_t atomic,
1301 					unsigned long timeout)
1302 {
1303 	struct mtip_cmd_sg *command_sg;
1304 	DECLARE_COMPLETION_ONSTACK(wait);
1305 	int rv = 0, ready2go = 1;
1306 	struct mtip_cmd *int_cmd = &port->commands[MTIP_TAG_INTERNAL];
1307 	unsigned long to;
1308 	struct driver_data *dd = port->dd;
1309 
1310 	/* Make sure the buffer is 8 byte aligned. This is asic specific. */
1311 	if (buffer & 0x00000007) {
1312 		dev_err(&dd->pdev->dev, "SG buffer is not 8 byte aligned\n");
1313 		return -EFAULT;
1314 	}
1315 
1316 	to = jiffies + msecs_to_jiffies(timeout);
1317 	do {
1318 		ready2go = !test_and_set_bit(MTIP_TAG_INTERNAL,
1319 						port->allocated);
1320 		if (ready2go)
1321 			break;
1322 		mdelay(100);
1323 	} while (time_before(jiffies, to));
1324 	if (!ready2go) {
1325 		dev_warn(&dd->pdev->dev,
1326 			"Internal cmd active. new cmd [%02X]\n", fis->command);
1327 		return -EBUSY;
1328 	}
1329 	set_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags);
1330 	port->ic_pause_timer = 0;
1331 
1332 	clear_bit(MTIP_PF_SE_ACTIVE_BIT, &port->flags);
1333 	clear_bit(MTIP_PF_DM_ACTIVE_BIT, &port->flags);
1334 
1335 	if (atomic == GFP_KERNEL) {
1336 		if (fis->command != ATA_CMD_STANDBYNOW1) {
1337 			/* wait for io to complete if non atomic */
1338 			if (mtip_quiesce_io(port, 5000) < 0) {
1339 				dev_warn(&dd->pdev->dev,
1340 					"Failed to quiesce IO\n");
1341 				release_slot(port, MTIP_TAG_INTERNAL);
1342 				clear_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags);
1343 				wake_up_interruptible(&port->svc_wait);
1344 				return -EBUSY;
1345 			}
1346 		}
1347 
1348 		/* Set the completion function and data for the command. */
1349 		int_cmd->comp_data = &wait;
1350 		int_cmd->comp_func = mtip_completion;
1351 
1352 	} else {
1353 		/* Clear completion - we're going to poll */
1354 		int_cmd->comp_data = NULL;
1355 		int_cmd->comp_func = mtip_null_completion;
1356 	}
1357 
1358 	/* Copy the command to the command table */
1359 	memcpy(int_cmd->command, fis, fis_len*4);
1360 
1361 	/* Populate the SG list */
1362 	int_cmd->command_header->opts =
1363 		 __force_bit2int cpu_to_le32(opts | fis_len);
1364 	if (buf_len) {
1365 		command_sg = int_cmd->command + AHCI_CMD_TBL_HDR_SZ;
1366 
1367 		command_sg->info =
1368 			__force_bit2int cpu_to_le32((buf_len-1) & 0x3FFFFF);
1369 		command_sg->dba	=
1370 			__force_bit2int cpu_to_le32(buffer & 0xFFFFFFFF);
1371 		command_sg->dba_upper =
1372 			__force_bit2int cpu_to_le32((buffer >> 16) >> 16);
1373 
1374 		int_cmd->command_header->opts |=
1375 			__force_bit2int cpu_to_le32((1 << 16));
1376 	}
1377 
1378 	/* Populate the command header */
1379 	int_cmd->command_header->byte_count = 0;
1380 
1381 	/* Issue the command to the hardware */
1382 	mtip_issue_non_ncq_command(port, MTIP_TAG_INTERNAL);
1383 
1384 	if (atomic == GFP_KERNEL) {
1385 		/* Wait for the command to complete or timeout. */
1386 		if (wait_for_completion_interruptible_timeout(
1387 				&wait,
1388 				msecs_to_jiffies(timeout)) <= 0) {
1389 			if (rv == -ERESTARTSYS) { /* interrupted */
1390 				dev_err(&dd->pdev->dev,
1391 					"Internal command [%02X] was interrupted after %lu ms\n",
1392 					fis->command, timeout);
1393 				rv = -EINTR;
1394 				goto exec_ic_exit;
1395 			} else if (rv == 0) /* timeout */
1396 				dev_err(&dd->pdev->dev,
1397 					"Internal command did not complete [%02X] within timeout of  %lu ms\n",
1398 					fis->command, timeout);
1399 			else
1400 				dev_err(&dd->pdev->dev,
1401 					"Internal command [%02X] wait returned code [%d] after %lu ms - unhandled\n",
1402 					fis->command, rv, timeout);
1403 
1404 			if (mtip_check_surprise_removal(dd->pdev) ||
1405 				test_bit(MTIP_DDF_REMOVE_PENDING_BIT,
1406 						&dd->dd_flag)) {
1407 				dev_err(&dd->pdev->dev,
1408 					"Internal command [%02X] wait returned due to SR\n",
1409 					fis->command);
1410 				rv = -ENXIO;
1411 				goto exec_ic_exit;
1412 			}
1413 			mtip_device_reset(dd); /* recover from timeout issue */
1414 			rv = -EAGAIN;
1415 			goto exec_ic_exit;
1416 		}
1417 	} else {
1418 		u32 hba_stat, port_stat;
1419 
1420 		/* Spin for <timeout> checking if command still outstanding */
1421 		timeout = jiffies + msecs_to_jiffies(timeout);
1422 		while ((readl(port->cmd_issue[MTIP_TAG_INTERNAL])
1423 				& (1 << MTIP_TAG_INTERNAL))
1424 				&& time_before(jiffies, timeout)) {
1425 			if (mtip_check_surprise_removal(dd->pdev)) {
1426 				rv = -ENXIO;
1427 				goto exec_ic_exit;
1428 			}
1429 			if ((fis->command != ATA_CMD_STANDBYNOW1) &&
1430 				test_bit(MTIP_DDF_REMOVE_PENDING_BIT,
1431 						&dd->dd_flag)) {
1432 				rv = -ENXIO;
1433 				goto exec_ic_exit;
1434 			}
1435 			port_stat = readl(port->mmio + PORT_IRQ_STAT);
1436 			if (!port_stat)
1437 				continue;
1438 
1439 			if (port_stat & PORT_IRQ_ERR) {
1440 				dev_err(&dd->pdev->dev,
1441 					"Internal command [%02X] failed\n",
1442 					fis->command);
1443 				mtip_device_reset(dd);
1444 				rv = -EIO;
1445 				goto exec_ic_exit;
1446 			} else {
1447 				writel(port_stat, port->mmio + PORT_IRQ_STAT);
1448 				hba_stat = readl(dd->mmio + HOST_IRQ_STAT);
1449 				if (hba_stat)
1450 					writel(hba_stat,
1451 						dd->mmio + HOST_IRQ_STAT);
1452 			}
1453 			break;
1454 		}
1455 	}
1456 
1457 	if (readl(port->cmd_issue[MTIP_TAG_INTERNAL])
1458 			& (1 << MTIP_TAG_INTERNAL)) {
1459 		rv = -ENXIO;
1460 		if (!test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag)) {
1461 			mtip_device_reset(dd);
1462 			rv = -EAGAIN;
1463 		}
1464 	}
1465 exec_ic_exit:
1466 	/* Clear the allocated and active bits for the internal command. */
1467 	atomic_set(&int_cmd->active, 0);
1468 	release_slot(port, MTIP_TAG_INTERNAL);
1469 	if (rv >= 0 && mtip_pause_ncq(port, fis)) {
1470 		/* NCQ paused */
1471 		return rv;
1472 	}
1473 	clear_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags);
1474 	wake_up_interruptible(&port->svc_wait);
1475 
1476 	return rv;
1477 }
1478 
1479 /*
1480  * Byte-swap ATA ID strings.
1481  *
1482  * ATA identify data contains strings in byte-swapped 16-bit words.
1483  * They must be swapped (on all architectures) to be usable as C strings.
1484  * This function swaps bytes in-place.
1485  *
1486  * @buf The buffer location of the string
1487  * @len The number of bytes to swap
1488  *
1489  * return value
1490  *	None
1491  */
1492 static inline void ata_swap_string(u16 *buf, unsigned int len)
1493 {
1494 	int i;
1495 	for (i = 0; i < (len/2); i++)
1496 		be16_to_cpus(&buf[i]);
1497 }
1498 
1499 /*
1500  * Request the device identity information.
1501  *
1502  * If a user space buffer is not specified, i.e. is NULL, the
1503  * identify information is still read from the drive and placed
1504  * into the identify data buffer (@e port->identify) in the
1505  * port data structure.
1506  * When the identify buffer contains valid identify information @e
1507  * port->identify_valid is non-zero.
1508  *
1509  * @port	 Pointer to the port structure.
1510  * @user_buffer  A user space buffer where the identify data should be
1511  *                    copied.
1512  *
1513  * return value
1514  *	0	Command completed successfully.
1515  *	-EFAULT An error occurred while coping data to the user buffer.
1516  *	-1	Command failed.
1517  */
1518 static int mtip_get_identify(struct mtip_port *port, void __user *user_buffer)
1519 {
1520 	int rv = 0;
1521 	struct host_to_dev_fis fis;
1522 
1523 	if (test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &port->dd->dd_flag))
1524 		return -EFAULT;
1525 
1526 	/* Build the FIS. */
1527 	memset(&fis, 0, sizeof(struct host_to_dev_fis));
1528 	fis.type	= 0x27;
1529 	fis.opts	= 1 << 7;
1530 	fis.command	= ATA_CMD_ID_ATA;
1531 
1532 	/* Set the identify information as invalid. */
1533 	port->identify_valid = 0;
1534 
1535 	/* Clear the identify information. */
1536 	memset(port->identify, 0, sizeof(u16) * ATA_ID_WORDS);
1537 
1538 	/* Execute the command. */
1539 	if (mtip_exec_internal_command(port,
1540 				&fis,
1541 				5,
1542 				port->identify_dma,
1543 				sizeof(u16) * ATA_ID_WORDS,
1544 				0,
1545 				GFP_KERNEL,
1546 				MTIP_INTERNAL_COMMAND_TIMEOUT_MS)
1547 				< 0) {
1548 		rv = -1;
1549 		goto out;
1550 	}
1551 
1552 	/*
1553 	 * Perform any necessary byte-swapping.  Yes, the kernel does in fact
1554 	 * perform field-sensitive swapping on the string fields.
1555 	 * See the kernel use of ata_id_string() for proof of this.
1556 	 */
1557 #ifdef __LITTLE_ENDIAN
1558 	ata_swap_string(port->identify + 27, 40);  /* model string*/
1559 	ata_swap_string(port->identify + 23, 8);   /* firmware string*/
1560 	ata_swap_string(port->identify + 10, 20);  /* serial# string*/
1561 #else
1562 	{
1563 		int i;
1564 		for (i = 0; i < ATA_ID_WORDS; i++)
1565 			port->identify[i] = le16_to_cpu(port->identify[i]);
1566 	}
1567 #endif
1568 
1569 #ifdef MTIP_TRIM /* Disabling TRIM support temporarily */
1570 	/* Demux ID.DRAT & ID.RZAT to determine trim support */
1571 	if (port->identify[69] & (1 << 14) && port->identify[69] & (1 << 5))
1572 		port->dd->trim_supp = true;
1573 	else
1574 #endif
1575 		port->dd->trim_supp = false;
1576 
1577 	/* Set the identify buffer as valid. */
1578 	port->identify_valid = 1;
1579 
1580 	if (user_buffer) {
1581 		if (copy_to_user(
1582 			user_buffer,
1583 			port->identify,
1584 			ATA_ID_WORDS * sizeof(u16))) {
1585 			rv = -EFAULT;
1586 			goto out;
1587 		}
1588 	}
1589 
1590 out:
1591 	return rv;
1592 }
1593 
1594 /*
1595  * Issue a standby immediate command to the device.
1596  *
1597  * @port Pointer to the port structure.
1598  *
1599  * return value
1600  *	0	Command was executed successfully.
1601  *	-1	An error occurred while executing the command.
1602  */
1603 static int mtip_standby_immediate(struct mtip_port *port)
1604 {
1605 	int rv;
1606 	struct host_to_dev_fis	fis;
1607 	unsigned long start;
1608 
1609 	/* Build the FIS. */
1610 	memset(&fis, 0, sizeof(struct host_to_dev_fis));
1611 	fis.type	= 0x27;
1612 	fis.opts	= 1 << 7;
1613 	fis.command	= ATA_CMD_STANDBYNOW1;
1614 
1615 	start = jiffies;
1616 	rv = mtip_exec_internal_command(port,
1617 					&fis,
1618 					5,
1619 					0,
1620 					0,
1621 					0,
1622 					GFP_ATOMIC,
1623 					15000);
1624 	dbg_printk(MTIP_DRV_NAME "Time taken to complete standby cmd: %d ms\n",
1625 			jiffies_to_msecs(jiffies - start));
1626 	if (rv)
1627 		dev_warn(&port->dd->pdev->dev,
1628 			"STANDBY IMMEDIATE command failed.\n");
1629 
1630 	return rv;
1631 }
1632 
1633 /*
1634  * Issue a READ LOG EXT command to the device.
1635  *
1636  * @port	pointer to the port structure.
1637  * @page	page number to fetch
1638  * @buffer	pointer to buffer
1639  * @buffer_dma	dma address corresponding to @buffer
1640  * @sectors	page length to fetch, in sectors
1641  *
1642  * return value
1643  *	@rv	return value from mtip_exec_internal_command()
1644  */
1645 static int mtip_read_log_page(struct mtip_port *port, u8 page, u16 *buffer,
1646 				dma_addr_t buffer_dma, unsigned int sectors)
1647 {
1648 	struct host_to_dev_fis fis;
1649 
1650 	memset(&fis, 0, sizeof(struct host_to_dev_fis));
1651 	fis.type	= 0x27;
1652 	fis.opts	= 1 << 7;
1653 	fis.command	= ATA_CMD_READ_LOG_EXT;
1654 	fis.sect_count	= sectors & 0xFF;
1655 	fis.sect_cnt_ex	= (sectors >> 8) & 0xFF;
1656 	fis.lba_low	= page;
1657 	fis.lba_mid	= 0;
1658 	fis.device	= ATA_DEVICE_OBS;
1659 
1660 	memset(buffer, 0, sectors * ATA_SECT_SIZE);
1661 
1662 	return mtip_exec_internal_command(port,
1663 					&fis,
1664 					5,
1665 					buffer_dma,
1666 					sectors * ATA_SECT_SIZE,
1667 					0,
1668 					GFP_ATOMIC,
1669 					MTIP_INTERNAL_COMMAND_TIMEOUT_MS);
1670 }
1671 
1672 /*
1673  * Issue a SMART READ DATA command to the device.
1674  *
1675  * @port	pointer to the port structure.
1676  * @buffer	pointer to buffer
1677  * @buffer_dma	dma address corresponding to @buffer
1678  *
1679  * return value
1680  *	@rv	return value from mtip_exec_internal_command()
1681  */
1682 static int mtip_get_smart_data(struct mtip_port *port, u8 *buffer,
1683 					dma_addr_t buffer_dma)
1684 {
1685 	struct host_to_dev_fis fis;
1686 
1687 	memset(&fis, 0, sizeof(struct host_to_dev_fis));
1688 	fis.type	= 0x27;
1689 	fis.opts	= 1 << 7;
1690 	fis.command	= ATA_CMD_SMART;
1691 	fis.features	= 0xD0;
1692 	fis.sect_count	= 1;
1693 	fis.lba_mid	= 0x4F;
1694 	fis.lba_hi	= 0xC2;
1695 	fis.device	= ATA_DEVICE_OBS;
1696 
1697 	return mtip_exec_internal_command(port,
1698 					&fis,
1699 					5,
1700 					buffer_dma,
1701 					ATA_SECT_SIZE,
1702 					0,
1703 					GFP_ATOMIC,
1704 					15000);
1705 }
1706 
1707 /*
1708  * Get the value of a smart attribute
1709  *
1710  * @port	pointer to the port structure
1711  * @id		attribute number
1712  * @attrib	pointer to return attrib information corresponding to @id
1713  *
1714  * return value
1715  *	-EINVAL	NULL buffer passed or unsupported attribute @id.
1716  *	-EPERM	Identify data not valid, SMART not supported or not enabled
1717  */
1718 static int mtip_get_smart_attr(struct mtip_port *port, unsigned int id,
1719 						struct smart_attr *attrib)
1720 {
1721 	int rv, i;
1722 	struct smart_attr *pattr;
1723 
1724 	if (!attrib)
1725 		return -EINVAL;
1726 
1727 	if (!port->identify_valid) {
1728 		dev_warn(&port->dd->pdev->dev, "IDENTIFY DATA not valid\n");
1729 		return -EPERM;
1730 	}
1731 	if (!(port->identify[82] & 0x1)) {
1732 		dev_warn(&port->dd->pdev->dev, "SMART not supported\n");
1733 		return -EPERM;
1734 	}
1735 	if (!(port->identify[85] & 0x1)) {
1736 		dev_warn(&port->dd->pdev->dev, "SMART not enabled\n");
1737 		return -EPERM;
1738 	}
1739 
1740 	memset(port->smart_buf, 0, ATA_SECT_SIZE);
1741 	rv = mtip_get_smart_data(port, port->smart_buf, port->smart_buf_dma);
1742 	if (rv) {
1743 		dev_warn(&port->dd->pdev->dev, "Failed to ge SMART data\n");
1744 		return rv;
1745 	}
1746 
1747 	pattr = (struct smart_attr *)(port->smart_buf + 2);
1748 	for (i = 0; i < 29; i++, pattr++)
1749 		if (pattr->attr_id == id) {
1750 			memcpy(attrib, pattr, sizeof(struct smart_attr));
1751 			break;
1752 		}
1753 
1754 	if (i == 29) {
1755 		dev_warn(&port->dd->pdev->dev,
1756 			"Query for invalid SMART attribute ID\n");
1757 		rv = -EINVAL;
1758 	}
1759 
1760 	return rv;
1761 }
1762 
1763 /*
1764  * Trim unused sectors
1765  *
1766  * @dd		pointer to driver_data structure
1767  * @lba		starting lba
1768  * @len		# of 512b sectors to trim
1769  *
1770  * return value
1771  *      -ENOMEM		Out of dma memory
1772  *      -EINVAL		Invalid parameters passed in, trim not supported
1773  *      -EIO		Error submitting trim request to hw
1774  */
1775 static int mtip_send_trim(struct driver_data *dd, unsigned int lba,
1776 				unsigned int len)
1777 {
1778 	int i, rv = 0;
1779 	u64 tlba, tlen, sect_left;
1780 	struct mtip_trim_entry *buf;
1781 	dma_addr_t dma_addr;
1782 	struct host_to_dev_fis fis;
1783 
1784 	if (!len || dd->trim_supp == false)
1785 		return -EINVAL;
1786 
1787 	/* Trim request too big */
1788 	WARN_ON(len > (MTIP_MAX_TRIM_ENTRY_LEN * MTIP_MAX_TRIM_ENTRIES));
1789 
1790 	/* Trim request not aligned on 4k boundary */
1791 	WARN_ON(len % 8 != 0);
1792 
1793 	/* Warn if vu_trim structure is too big */
1794 	WARN_ON(sizeof(struct mtip_trim) > ATA_SECT_SIZE);
1795 
1796 	/* Allocate a DMA buffer for the trim structure */
1797 	buf = dmam_alloc_coherent(&dd->pdev->dev, ATA_SECT_SIZE, &dma_addr,
1798 								GFP_KERNEL);
1799 	if (!buf)
1800 		return -ENOMEM;
1801 	memset(buf, 0, ATA_SECT_SIZE);
1802 
1803 	for (i = 0, sect_left = len, tlba = lba;
1804 			i < MTIP_MAX_TRIM_ENTRIES && sect_left;
1805 			i++) {
1806 		tlen = (sect_left >= MTIP_MAX_TRIM_ENTRY_LEN ?
1807 					MTIP_MAX_TRIM_ENTRY_LEN :
1808 					sect_left);
1809 		buf[i].lba = __force_bit2int cpu_to_le32(tlba);
1810 		buf[i].range = __force_bit2int cpu_to_le16(tlen);
1811 		tlba += tlen;
1812 		sect_left -= tlen;
1813 	}
1814 	WARN_ON(sect_left != 0);
1815 
1816 	/* Build the fis */
1817 	memset(&fis, 0, sizeof(struct host_to_dev_fis));
1818 	fis.type       = 0x27;
1819 	fis.opts       = 1 << 7;
1820 	fis.command    = 0xfb;
1821 	fis.features   = 0x60;
1822 	fis.sect_count = 1;
1823 	fis.device     = ATA_DEVICE_OBS;
1824 
1825 	if (mtip_exec_internal_command(dd->port,
1826 					&fis,
1827 					5,
1828 					dma_addr,
1829 					ATA_SECT_SIZE,
1830 					0,
1831 					GFP_KERNEL,
1832 					MTIP_TRIM_TIMEOUT_MS) < 0)
1833 		rv = -EIO;
1834 
1835 	dmam_free_coherent(&dd->pdev->dev, ATA_SECT_SIZE, buf, dma_addr);
1836 	return rv;
1837 }
1838 
1839 /*
1840  * Get the drive capacity.
1841  *
1842  * @dd      Pointer to the device data structure.
1843  * @sectors Pointer to the variable that will receive the sector count.
1844  *
1845  * return value
1846  *	1 Capacity was returned successfully.
1847  *	0 The identify information is invalid.
1848  */
1849 static bool mtip_hw_get_capacity(struct driver_data *dd, sector_t *sectors)
1850 {
1851 	struct mtip_port *port = dd->port;
1852 	u64 total, raw0, raw1, raw2, raw3;
1853 	raw0 = port->identify[100];
1854 	raw1 = port->identify[101];
1855 	raw2 = port->identify[102];
1856 	raw3 = port->identify[103];
1857 	total = raw0 | raw1<<16 | raw2<<32 | raw3<<48;
1858 	*sectors = total;
1859 	return (bool) !!port->identify_valid;
1860 }
1861 
1862 /*
1863  * Display the identify command data.
1864  *
1865  * @port Pointer to the port data structure.
1866  *
1867  * return value
1868  *	None
1869  */
1870 static void mtip_dump_identify(struct mtip_port *port)
1871 {
1872 	sector_t sectors;
1873 	unsigned short revid;
1874 	char cbuf[42];
1875 
1876 	if (!port->identify_valid)
1877 		return;
1878 
1879 	strlcpy(cbuf, (char *)(port->identify+10), 21);
1880 	dev_info(&port->dd->pdev->dev,
1881 		"Serial No.: %s\n", cbuf);
1882 
1883 	strlcpy(cbuf, (char *)(port->identify+23), 9);
1884 	dev_info(&port->dd->pdev->dev,
1885 		"Firmware Ver.: %s\n", cbuf);
1886 
1887 	strlcpy(cbuf, (char *)(port->identify+27), 41);
1888 	dev_info(&port->dd->pdev->dev, "Model: %s\n", cbuf);
1889 
1890 	if (mtip_hw_get_capacity(port->dd, &sectors))
1891 		dev_info(&port->dd->pdev->dev,
1892 			"Capacity: %llu sectors (%llu MB)\n",
1893 			 (u64)sectors,
1894 			 ((u64)sectors) * ATA_SECT_SIZE >> 20);
1895 
1896 	pci_read_config_word(port->dd->pdev, PCI_REVISION_ID, &revid);
1897 	switch (revid & 0xFF) {
1898 	case 0x1:
1899 		strlcpy(cbuf, "A0", 3);
1900 		break;
1901 	case 0x3:
1902 		strlcpy(cbuf, "A2", 3);
1903 		break;
1904 	default:
1905 		strlcpy(cbuf, "?", 2);
1906 		break;
1907 	}
1908 	dev_info(&port->dd->pdev->dev,
1909 		"Card Type: %s\n", cbuf);
1910 }
1911 
1912 /*
1913  * Map the commands scatter list into the command table.
1914  *
1915  * @command Pointer to the command.
1916  * @nents Number of scatter list entries.
1917  *
1918  * return value
1919  *	None
1920  */
1921 static inline void fill_command_sg(struct driver_data *dd,
1922 				struct mtip_cmd *command,
1923 				int nents)
1924 {
1925 	int n;
1926 	unsigned int dma_len;
1927 	struct mtip_cmd_sg *command_sg;
1928 	struct scatterlist *sg = command->sg;
1929 
1930 	command_sg = command->command + AHCI_CMD_TBL_HDR_SZ;
1931 
1932 	for (n = 0; n < nents; n++) {
1933 		dma_len = sg_dma_len(sg);
1934 		if (dma_len > 0x400000)
1935 			dev_err(&dd->pdev->dev,
1936 				"DMA segment length truncated\n");
1937 		command_sg->info = __force_bit2int
1938 			cpu_to_le32((dma_len-1) & 0x3FFFFF);
1939 		command_sg->dba	= __force_bit2int
1940 			cpu_to_le32(sg_dma_address(sg));
1941 		command_sg->dba_upper = __force_bit2int
1942 			cpu_to_le32((sg_dma_address(sg) >> 16) >> 16);
1943 		command_sg++;
1944 		sg++;
1945 	}
1946 }
1947 
1948 /*
1949  * @brief Execute a drive command.
1950  *
1951  * return value 0 The command completed successfully.
1952  * return value -1 An error occurred while executing the command.
1953  */
1954 static int exec_drive_task(struct mtip_port *port, u8 *command)
1955 {
1956 	struct host_to_dev_fis	fis;
1957 	struct host_to_dev_fis *reply = (port->rxfis + RX_FIS_D2H_REG);
1958 
1959 	/* Build the FIS. */
1960 	memset(&fis, 0, sizeof(struct host_to_dev_fis));
1961 	fis.type	= 0x27;
1962 	fis.opts	= 1 << 7;
1963 	fis.command	= command[0];
1964 	fis.features	= command[1];
1965 	fis.sect_count	= command[2];
1966 	fis.sector	= command[3];
1967 	fis.cyl_low	= command[4];
1968 	fis.cyl_hi	= command[5];
1969 	fis.device	= command[6] & ~0x10; /* Clear the dev bit*/
1970 
1971 	dbg_printk(MTIP_DRV_NAME " %s: User Command: cmd %x, feat %x, nsect %x, sect %x, lcyl %x, hcyl %x, sel %x\n",
1972 		__func__,
1973 		command[0],
1974 		command[1],
1975 		command[2],
1976 		command[3],
1977 		command[4],
1978 		command[5],
1979 		command[6]);
1980 
1981 	/* Execute the command. */
1982 	if (mtip_exec_internal_command(port,
1983 				 &fis,
1984 				 5,
1985 				 0,
1986 				 0,
1987 				 0,
1988 				 GFP_KERNEL,
1989 				 MTIP_IOCTL_COMMAND_TIMEOUT_MS) < 0) {
1990 		return -1;
1991 	}
1992 
1993 	command[0] = reply->command; /* Status*/
1994 	command[1] = reply->features; /* Error*/
1995 	command[4] = reply->cyl_low;
1996 	command[5] = reply->cyl_hi;
1997 
1998 	dbg_printk(MTIP_DRV_NAME " %s: Completion Status: stat %x, err %x , cyl_lo %x cyl_hi %x\n",
1999 		__func__,
2000 		command[0],
2001 		command[1],
2002 		command[4],
2003 		command[5]);
2004 
2005 	return 0;
2006 }
2007 
2008 /*
2009  * @brief Execute a drive command.
2010  *
2011  * @param port Pointer to the port data structure.
2012  * @param command Pointer to the user specified command parameters.
2013  * @param user_buffer Pointer to the user space buffer where read sector
2014  *                   data should be copied.
2015  *
2016  * return value 0 The command completed successfully.
2017  * return value -EFAULT An error occurred while copying the completion
2018  *                 data to the user space buffer.
2019  * return value -1 An error occurred while executing the command.
2020  */
2021 static int exec_drive_command(struct mtip_port *port, u8 *command,
2022 				void __user *user_buffer)
2023 {
2024 	struct host_to_dev_fis	fis;
2025 	struct host_to_dev_fis *reply;
2026 	u8 *buf = NULL;
2027 	dma_addr_t dma_addr = 0;
2028 	int rv = 0, xfer_sz = command[3];
2029 
2030 	if (xfer_sz) {
2031 		if (!user_buffer)
2032 			return -EFAULT;
2033 
2034 		buf = dmam_alloc_coherent(&port->dd->pdev->dev,
2035 				ATA_SECT_SIZE * xfer_sz,
2036 				&dma_addr,
2037 				GFP_KERNEL);
2038 		if (!buf) {
2039 			dev_err(&port->dd->pdev->dev,
2040 				"Memory allocation failed (%d bytes)\n",
2041 				ATA_SECT_SIZE * xfer_sz);
2042 			return -ENOMEM;
2043 		}
2044 		memset(buf, 0, ATA_SECT_SIZE * xfer_sz);
2045 	}
2046 
2047 	/* Build the FIS. */
2048 	memset(&fis, 0, sizeof(struct host_to_dev_fis));
2049 	fis.type	= 0x27;
2050 	fis.opts	= 1 << 7;
2051 	fis.command	= command[0];
2052 	fis.features	= command[2];
2053 	fis.sect_count	= command[3];
2054 	if (fis.command == ATA_CMD_SMART) {
2055 		fis.sector	= command[1];
2056 		fis.cyl_low	= 0x4F;
2057 		fis.cyl_hi	= 0xC2;
2058 	}
2059 
2060 	if (xfer_sz)
2061 		reply = (port->rxfis + RX_FIS_PIO_SETUP);
2062 	else
2063 		reply = (port->rxfis + RX_FIS_D2H_REG);
2064 
2065 	dbg_printk(MTIP_DRV_NAME
2066 		" %s: User Command: cmd %x, sect %x, "
2067 		"feat %x, sectcnt %x\n",
2068 		__func__,
2069 		command[0],
2070 		command[1],
2071 		command[2],
2072 		command[3]);
2073 
2074 	/* Execute the command. */
2075 	if (mtip_exec_internal_command(port,
2076 				&fis,
2077 				 5,
2078 				 (xfer_sz ? dma_addr : 0),
2079 				 (xfer_sz ? ATA_SECT_SIZE * xfer_sz : 0),
2080 				 0,
2081 				 GFP_KERNEL,
2082 				 MTIP_IOCTL_COMMAND_TIMEOUT_MS)
2083 				 < 0) {
2084 		rv = -EFAULT;
2085 		goto exit_drive_command;
2086 	}
2087 
2088 	/* Collect the completion status. */
2089 	command[0] = reply->command; /* Status*/
2090 	command[1] = reply->features; /* Error*/
2091 	command[2] = reply->sect_count;
2092 
2093 	dbg_printk(MTIP_DRV_NAME
2094 		" %s: Completion Status: stat %x, "
2095 		"err %x, nsect %x\n",
2096 		__func__,
2097 		command[0],
2098 		command[1],
2099 		command[2]);
2100 
2101 	if (xfer_sz) {
2102 		if (copy_to_user(user_buffer,
2103 				 buf,
2104 				 ATA_SECT_SIZE * command[3])) {
2105 			rv = -EFAULT;
2106 			goto exit_drive_command;
2107 		}
2108 	}
2109 exit_drive_command:
2110 	if (buf)
2111 		dmam_free_coherent(&port->dd->pdev->dev,
2112 				ATA_SECT_SIZE * xfer_sz, buf, dma_addr);
2113 	return rv;
2114 }
2115 
2116 /*
2117  *  Indicates whether a command has a single sector payload.
2118  *
2119  *  @command passed to the device to perform the certain event.
2120  *  @features passed to the device to perform the certain event.
2121  *
2122  *  return value
2123  *	1	command is one that always has a single sector payload,
2124  *		regardless of the value in the Sector Count field.
2125  *      0       otherwise
2126  *
2127  */
2128 static unsigned int implicit_sector(unsigned char command,
2129 				    unsigned char features)
2130 {
2131 	unsigned int rv = 0;
2132 
2133 	/* list of commands that have an implicit sector count of 1 */
2134 	switch (command) {
2135 	case ATA_CMD_SEC_SET_PASS:
2136 	case ATA_CMD_SEC_UNLOCK:
2137 	case ATA_CMD_SEC_ERASE_PREP:
2138 	case ATA_CMD_SEC_ERASE_UNIT:
2139 	case ATA_CMD_SEC_FREEZE_LOCK:
2140 	case ATA_CMD_SEC_DISABLE_PASS:
2141 	case ATA_CMD_PMP_READ:
2142 	case ATA_CMD_PMP_WRITE:
2143 		rv = 1;
2144 		break;
2145 	case ATA_CMD_SET_MAX:
2146 		if (features == ATA_SET_MAX_UNLOCK)
2147 			rv = 1;
2148 		break;
2149 	case ATA_CMD_SMART:
2150 		if ((features == ATA_SMART_READ_VALUES) ||
2151 				(features == ATA_SMART_READ_THRESHOLDS))
2152 			rv = 1;
2153 		break;
2154 	case ATA_CMD_CONF_OVERLAY:
2155 		if ((features == ATA_DCO_IDENTIFY) ||
2156 				(features == ATA_DCO_SET))
2157 			rv = 1;
2158 		break;
2159 	}
2160 	return rv;
2161 }
2162 static void mtip_set_timeout(struct driver_data *dd,
2163 					struct host_to_dev_fis *fis,
2164 					unsigned int *timeout, u8 erasemode)
2165 {
2166 	switch (fis->command) {
2167 	case ATA_CMD_DOWNLOAD_MICRO:
2168 		*timeout = 120000; /* 2 minutes */
2169 		break;
2170 	case ATA_CMD_SEC_ERASE_UNIT:
2171 	case 0xFC:
2172 		if (erasemode)
2173 			*timeout = ((*(dd->port->identify + 90) * 2) * 60000);
2174 		else
2175 			*timeout = ((*(dd->port->identify + 89) * 2) * 60000);
2176 		break;
2177 	case ATA_CMD_STANDBYNOW1:
2178 		*timeout = 120000;  /* 2 minutes */
2179 		break;
2180 	case 0xF7:
2181 	case 0xFA:
2182 		*timeout = 60000;  /* 60 seconds */
2183 		break;
2184 	case ATA_CMD_SMART:
2185 		*timeout = 15000;  /* 15 seconds */
2186 		break;
2187 	default:
2188 		*timeout = MTIP_IOCTL_COMMAND_TIMEOUT_MS;
2189 		break;
2190 	}
2191 }
2192 
2193 /*
2194  * Executes a taskfile
2195  * See ide_taskfile_ioctl() for derivation
2196  */
2197 static int exec_drive_taskfile(struct driver_data *dd,
2198 			       void __user *buf,
2199 			       ide_task_request_t *req_task,
2200 			       int outtotal)
2201 {
2202 	struct host_to_dev_fis	fis;
2203 	struct host_to_dev_fis *reply;
2204 	u8 *outbuf = NULL;
2205 	u8 *inbuf = NULL;
2206 	dma_addr_t outbuf_dma = 0;
2207 	dma_addr_t inbuf_dma = 0;
2208 	dma_addr_t dma_buffer = 0;
2209 	int err = 0;
2210 	unsigned int taskin = 0;
2211 	unsigned int taskout = 0;
2212 	u8 nsect = 0;
2213 	unsigned int timeout;
2214 	unsigned int force_single_sector;
2215 	unsigned int transfer_size;
2216 	unsigned long task_file_data;
2217 	int intotal = outtotal + req_task->out_size;
2218 	int erasemode = 0;
2219 
2220 	taskout = req_task->out_size;
2221 	taskin = req_task->in_size;
2222 	/* 130560 = 512 * 0xFF*/
2223 	if (taskin > 130560 || taskout > 130560) {
2224 		err = -EINVAL;
2225 		goto abort;
2226 	}
2227 
2228 	if (taskout) {
2229 		outbuf = kzalloc(taskout, GFP_KERNEL);
2230 		if (outbuf == NULL) {
2231 			err = -ENOMEM;
2232 			goto abort;
2233 		}
2234 		if (copy_from_user(outbuf, buf + outtotal, taskout)) {
2235 			err = -EFAULT;
2236 			goto abort;
2237 		}
2238 		outbuf_dma = pci_map_single(dd->pdev,
2239 					 outbuf,
2240 					 taskout,
2241 					 DMA_TO_DEVICE);
2242 		if (outbuf_dma == 0) {
2243 			err = -ENOMEM;
2244 			goto abort;
2245 		}
2246 		dma_buffer = outbuf_dma;
2247 	}
2248 
2249 	if (taskin) {
2250 		inbuf = kzalloc(taskin, GFP_KERNEL);
2251 		if (inbuf == NULL) {
2252 			err = -ENOMEM;
2253 			goto abort;
2254 		}
2255 
2256 		if (copy_from_user(inbuf, buf + intotal, taskin)) {
2257 			err = -EFAULT;
2258 			goto abort;
2259 		}
2260 		inbuf_dma = pci_map_single(dd->pdev,
2261 					 inbuf,
2262 					 taskin, DMA_FROM_DEVICE);
2263 		if (inbuf_dma == 0) {
2264 			err = -ENOMEM;
2265 			goto abort;
2266 		}
2267 		dma_buffer = inbuf_dma;
2268 	}
2269 
2270 	/* only supports PIO and non-data commands from this ioctl. */
2271 	switch (req_task->data_phase) {
2272 	case TASKFILE_OUT:
2273 		nsect = taskout / ATA_SECT_SIZE;
2274 		reply = (dd->port->rxfis + RX_FIS_PIO_SETUP);
2275 		break;
2276 	case TASKFILE_IN:
2277 		reply = (dd->port->rxfis + RX_FIS_PIO_SETUP);
2278 		break;
2279 	case TASKFILE_NO_DATA:
2280 		reply = (dd->port->rxfis + RX_FIS_D2H_REG);
2281 		break;
2282 	default:
2283 		err = -EINVAL;
2284 		goto abort;
2285 	}
2286 
2287 	/* Build the FIS. */
2288 	memset(&fis, 0, sizeof(struct host_to_dev_fis));
2289 
2290 	fis.type	= 0x27;
2291 	fis.opts	= 1 << 7;
2292 	fis.command	= req_task->io_ports[7];
2293 	fis.features	= req_task->io_ports[1];
2294 	fis.sect_count	= req_task->io_ports[2];
2295 	fis.lba_low	= req_task->io_ports[3];
2296 	fis.lba_mid	= req_task->io_ports[4];
2297 	fis.lba_hi	= req_task->io_ports[5];
2298 	 /* Clear the dev bit*/
2299 	fis.device	= req_task->io_ports[6] & ~0x10;
2300 
2301 	if ((req_task->in_flags.all == 0) && (req_task->out_flags.all & 1)) {
2302 		req_task->in_flags.all	=
2303 			IDE_TASKFILE_STD_IN_FLAGS |
2304 			(IDE_HOB_STD_IN_FLAGS << 8);
2305 		fis.lba_low_ex		= req_task->hob_ports[3];
2306 		fis.lba_mid_ex		= req_task->hob_ports[4];
2307 		fis.lba_hi_ex		= req_task->hob_ports[5];
2308 		fis.features_ex		= req_task->hob_ports[1];
2309 		fis.sect_cnt_ex		= req_task->hob_ports[2];
2310 
2311 	} else {
2312 		req_task->in_flags.all = IDE_TASKFILE_STD_IN_FLAGS;
2313 	}
2314 
2315 	force_single_sector = implicit_sector(fis.command, fis.features);
2316 
2317 	if ((taskin || taskout) && (!fis.sect_count)) {
2318 		if (nsect)
2319 			fis.sect_count = nsect;
2320 		else {
2321 			if (!force_single_sector) {
2322 				dev_warn(&dd->pdev->dev,
2323 					"data movement but "
2324 					"sect_count is 0\n");
2325 					err = -EINVAL;
2326 					goto abort;
2327 			}
2328 		}
2329 	}
2330 
2331 	dbg_printk(MTIP_DRV_NAME
2332 		" %s: cmd %x, feat %x, nsect %x,"
2333 		" sect/lbal %x, lcyl/lbam %x, hcyl/lbah %x,"
2334 		" head/dev %x\n",
2335 		__func__,
2336 		fis.command,
2337 		fis.features,
2338 		fis.sect_count,
2339 		fis.lba_low,
2340 		fis.lba_mid,
2341 		fis.lba_hi,
2342 		fis.device);
2343 
2344 	/* check for erase mode support during secure erase.*/
2345 	if ((fis.command == ATA_CMD_SEC_ERASE_UNIT) && outbuf &&
2346 					(outbuf[0] & MTIP_SEC_ERASE_MODE)) {
2347 		erasemode = 1;
2348 	}
2349 
2350 	mtip_set_timeout(dd, &fis, &timeout, erasemode);
2351 
2352 	/* Determine the correct transfer size.*/
2353 	if (force_single_sector)
2354 		transfer_size = ATA_SECT_SIZE;
2355 	else
2356 		transfer_size = ATA_SECT_SIZE * fis.sect_count;
2357 
2358 	/* Execute the command.*/
2359 	if (mtip_exec_internal_command(dd->port,
2360 				 &fis,
2361 				 5,
2362 				 dma_buffer,
2363 				 transfer_size,
2364 				 0,
2365 				 GFP_KERNEL,
2366 				 timeout) < 0) {
2367 		err = -EIO;
2368 		goto abort;
2369 	}
2370 
2371 	task_file_data = readl(dd->port->mmio+PORT_TFDATA);
2372 
2373 	if ((req_task->data_phase == TASKFILE_IN) && !(task_file_data & 1)) {
2374 		reply = dd->port->rxfis + RX_FIS_PIO_SETUP;
2375 		req_task->io_ports[7] = reply->control;
2376 	} else {
2377 		reply = dd->port->rxfis + RX_FIS_D2H_REG;
2378 		req_task->io_ports[7] = reply->command;
2379 	}
2380 
2381 	/* reclaim the DMA buffers.*/
2382 	if (inbuf_dma)
2383 		pci_unmap_single(dd->pdev, inbuf_dma,
2384 			taskin, DMA_FROM_DEVICE);
2385 	if (outbuf_dma)
2386 		pci_unmap_single(dd->pdev, outbuf_dma,
2387 			taskout, DMA_TO_DEVICE);
2388 	inbuf_dma  = 0;
2389 	outbuf_dma = 0;
2390 
2391 	/* return the ATA registers to the caller.*/
2392 	req_task->io_ports[1] = reply->features;
2393 	req_task->io_ports[2] = reply->sect_count;
2394 	req_task->io_ports[3] = reply->lba_low;
2395 	req_task->io_ports[4] = reply->lba_mid;
2396 	req_task->io_ports[5] = reply->lba_hi;
2397 	req_task->io_ports[6] = reply->device;
2398 
2399 	if (req_task->out_flags.all & 1)  {
2400 
2401 		req_task->hob_ports[3] = reply->lba_low_ex;
2402 		req_task->hob_ports[4] = reply->lba_mid_ex;
2403 		req_task->hob_ports[5] = reply->lba_hi_ex;
2404 		req_task->hob_ports[1] = reply->features_ex;
2405 		req_task->hob_ports[2] = reply->sect_cnt_ex;
2406 	}
2407 	dbg_printk(MTIP_DRV_NAME
2408 		" %s: Completion: stat %x,"
2409 		"err %x, sect_cnt %x, lbalo %x,"
2410 		"lbamid %x, lbahi %x, dev %x\n",
2411 		__func__,
2412 		req_task->io_ports[7],
2413 		req_task->io_ports[1],
2414 		req_task->io_ports[2],
2415 		req_task->io_ports[3],
2416 		req_task->io_ports[4],
2417 		req_task->io_ports[5],
2418 		req_task->io_ports[6]);
2419 
2420 	if (taskout) {
2421 		if (copy_to_user(buf + outtotal, outbuf, taskout)) {
2422 			err = -EFAULT;
2423 			goto abort;
2424 		}
2425 	}
2426 	if (taskin) {
2427 		if (copy_to_user(buf + intotal, inbuf, taskin)) {
2428 			err = -EFAULT;
2429 			goto abort;
2430 		}
2431 	}
2432 abort:
2433 	if (inbuf_dma)
2434 		pci_unmap_single(dd->pdev, inbuf_dma,
2435 					taskin, DMA_FROM_DEVICE);
2436 	if (outbuf_dma)
2437 		pci_unmap_single(dd->pdev, outbuf_dma,
2438 					taskout, DMA_TO_DEVICE);
2439 	kfree(outbuf);
2440 	kfree(inbuf);
2441 
2442 	return err;
2443 }
2444 
2445 /*
2446  * Handle IOCTL calls from the Block Layer.
2447  *
2448  * This function is called by the Block Layer when it receives an IOCTL
2449  * command that it does not understand. If the IOCTL command is not supported
2450  * this function returns -ENOTTY.
2451  *
2452  * @dd  Pointer to the driver data structure.
2453  * @cmd IOCTL command passed from the Block Layer.
2454  * @arg IOCTL argument passed from the Block Layer.
2455  *
2456  * return value
2457  *	0	The IOCTL completed successfully.
2458  *	-ENOTTY The specified command is not supported.
2459  *	-EFAULT An error occurred copying data to a user space buffer.
2460  *	-EIO	An error occurred while executing the command.
2461  */
2462 static int mtip_hw_ioctl(struct driver_data *dd, unsigned int cmd,
2463 			 unsigned long arg)
2464 {
2465 	switch (cmd) {
2466 	case HDIO_GET_IDENTITY:
2467 	{
2468 		if (copy_to_user((void __user *)arg, dd->port->identify,
2469 						sizeof(u16) * ATA_ID_WORDS))
2470 			return -EFAULT;
2471 		break;
2472 	}
2473 	case HDIO_DRIVE_CMD:
2474 	{
2475 		u8 drive_command[4];
2476 
2477 		/* Copy the user command info to our buffer. */
2478 		if (copy_from_user(drive_command,
2479 					 (void __user *) arg,
2480 					 sizeof(drive_command)))
2481 			return -EFAULT;
2482 
2483 		/* Execute the drive command. */
2484 		if (exec_drive_command(dd->port,
2485 					 drive_command,
2486 					 (void __user *) (arg+4)))
2487 			return -EIO;
2488 
2489 		/* Copy the status back to the users buffer. */
2490 		if (copy_to_user((void __user *) arg,
2491 					 drive_command,
2492 					 sizeof(drive_command)))
2493 			return -EFAULT;
2494 
2495 		break;
2496 	}
2497 	case HDIO_DRIVE_TASK:
2498 	{
2499 		u8 drive_command[7];
2500 
2501 		/* Copy the user command info to our buffer. */
2502 		if (copy_from_user(drive_command,
2503 					 (void __user *) arg,
2504 					 sizeof(drive_command)))
2505 			return -EFAULT;
2506 
2507 		/* Execute the drive command. */
2508 		if (exec_drive_task(dd->port, drive_command))
2509 			return -EIO;
2510 
2511 		/* Copy the status back to the users buffer. */
2512 		if (copy_to_user((void __user *) arg,
2513 					 drive_command,
2514 					 sizeof(drive_command)))
2515 			return -EFAULT;
2516 
2517 		break;
2518 	}
2519 	case HDIO_DRIVE_TASKFILE: {
2520 		ide_task_request_t req_task;
2521 		int ret, outtotal;
2522 
2523 		if (copy_from_user(&req_task, (void __user *) arg,
2524 					sizeof(req_task)))
2525 			return -EFAULT;
2526 
2527 		outtotal = sizeof(req_task);
2528 
2529 		ret = exec_drive_taskfile(dd, (void __user *) arg,
2530 						&req_task, outtotal);
2531 
2532 		if (copy_to_user((void __user *) arg, &req_task,
2533 							sizeof(req_task)))
2534 			return -EFAULT;
2535 
2536 		return ret;
2537 	}
2538 
2539 	default:
2540 		return -EINVAL;
2541 	}
2542 	return 0;
2543 }
2544 
2545 /*
2546  * Submit an IO to the hw
2547  *
2548  * This function is called by the block layer to issue an io
2549  * to the device. Upon completion, the callback function will
2550  * be called with the data parameter passed as the callback data.
2551  *
2552  * @dd       Pointer to the driver data structure.
2553  * @start    First sector to read.
2554  * @nsect    Number of sectors to read.
2555  * @nents    Number of entries in scatter list for the read command.
2556  * @tag      The tag of this read command.
2557  * @callback Pointer to the function that should be called
2558  *	     when the read completes.
2559  * @data     Callback data passed to the callback function
2560  *	     when the read completes.
2561  * @dir      Direction (read or write)
2562  *
2563  * return value
2564  *	None
2565  */
2566 static void mtip_hw_submit_io(struct driver_data *dd, sector_t sector,
2567 			      int nsect, int nents, int tag, void *callback,
2568 			      void *data, int dir, int unaligned)
2569 {
2570 	struct host_to_dev_fis	*fis;
2571 	struct mtip_port *port = dd->port;
2572 	struct mtip_cmd *command = &port->commands[tag];
2573 	int dma_dir = (dir == READ) ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
2574 	u64 start = sector;
2575 
2576 	/* Map the scatter list for DMA access */
2577 	nents = dma_map_sg(&dd->pdev->dev, command->sg, nents, dma_dir);
2578 
2579 	command->scatter_ents = nents;
2580 
2581 	command->unaligned = unaligned;
2582 	/*
2583 	 * The number of retries for this command before it is
2584 	 * reported as a failure to the upper layers.
2585 	 */
2586 	command->retries = MTIP_MAX_RETRIES;
2587 
2588 	/* Fill out fis */
2589 	fis = command->command;
2590 	fis->type        = 0x27;
2591 	fis->opts        = 1 << 7;
2592 	fis->command     =
2593 		(dir == READ ? ATA_CMD_FPDMA_READ : ATA_CMD_FPDMA_WRITE);
2594 	fis->lba_low     = start & 0xFF;
2595 	fis->lba_mid     = (start >> 8) & 0xFF;
2596 	fis->lba_hi      = (start >> 16) & 0xFF;
2597 	fis->lba_low_ex  = (start >> 24) & 0xFF;
2598 	fis->lba_mid_ex  = (start >> 32) & 0xFF;
2599 	fis->lba_hi_ex   = (start >> 40) & 0xFF;
2600 	fis->device	 = 1 << 6;
2601 	fis->features    = nsect & 0xFF;
2602 	fis->features_ex = (nsect >> 8) & 0xFF;
2603 	fis->sect_count  = ((tag << 3) | (tag >> 5));
2604 	fis->sect_cnt_ex = 0;
2605 	fis->control     = 0;
2606 	fis->res2        = 0;
2607 	fis->res3        = 0;
2608 	fill_command_sg(dd, command, nents);
2609 
2610 	if (unaligned)
2611 		fis->device |= 1 << 7;
2612 
2613 	/* Populate the command header */
2614 	command->command_header->opts =
2615 			__force_bit2int cpu_to_le32(
2616 				(nents << 16) | 5 | AHCI_CMD_PREFETCH);
2617 	command->command_header->byte_count = 0;
2618 
2619 	/*
2620 	 * Set the completion function and data for the command
2621 	 * within this layer.
2622 	 */
2623 	command->comp_data = dd;
2624 	command->comp_func = mtip_async_complete;
2625 	command->direction = dma_dir;
2626 
2627 	/*
2628 	 * Set the completion function and data for the command passed
2629 	 * from the upper layer.
2630 	 */
2631 	command->async_data = data;
2632 	command->async_callback = callback;
2633 
2634 	/*
2635 	 * To prevent this command from being issued
2636 	 * if an internal command is in progress or error handling is active.
2637 	 */
2638 	if (port->flags & MTIP_PF_PAUSE_IO) {
2639 		set_bit(tag, port->cmds_to_issue);
2640 		set_bit(MTIP_PF_ISSUE_CMDS_BIT, &port->flags);
2641 		return;
2642 	}
2643 
2644 	/* Issue the command to the hardware */
2645 	mtip_issue_ncq_command(port, tag);
2646 
2647 	return;
2648 }
2649 
2650 /*
2651  * Release a command slot.
2652  *
2653  * @dd  Pointer to the driver data structure.
2654  * @tag Slot tag
2655  *
2656  * return value
2657  *      None
2658  */
2659 static void mtip_hw_release_scatterlist(struct driver_data *dd, int tag,
2660 								int unaligned)
2661 {
2662 	struct semaphore *sem = unaligned ? &dd->port->cmd_slot_unal :
2663 							&dd->port->cmd_slot;
2664 	release_slot(dd->port, tag);
2665 	up(sem);
2666 }
2667 
2668 /*
2669  * Obtain a command slot and return its associated scatter list.
2670  *
2671  * @dd  Pointer to the driver data structure.
2672  * @tag Pointer to an int that will receive the allocated command
2673  *            slot tag.
2674  *
2675  * return value
2676  *	Pointer to the scatter list for the allocated command slot
2677  *	or NULL if no command slots are available.
2678  */
2679 static struct scatterlist *mtip_hw_get_scatterlist(struct driver_data *dd,
2680 						   int *tag, int unaligned)
2681 {
2682 	struct semaphore *sem = unaligned ? &dd->port->cmd_slot_unal :
2683 							&dd->port->cmd_slot;
2684 
2685 	/*
2686 	 * It is possible that, even with this semaphore, a thread
2687 	 * may think that no command slots are available. Therefore, we
2688 	 * need to make an attempt to get_slot().
2689 	 */
2690 	down(sem);
2691 	*tag = get_slot(dd->port);
2692 
2693 	if (unlikely(test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag))) {
2694 		up(sem);
2695 		return NULL;
2696 	}
2697 	if (unlikely(*tag < 0)) {
2698 		up(sem);
2699 		return NULL;
2700 	}
2701 
2702 	return dd->port->commands[*tag].sg;
2703 }
2704 
2705 /*
2706  * Sysfs status dump.
2707  *
2708  * @dev  Pointer to the device structure, passed by the kernrel.
2709  * @attr Pointer to the device_attribute structure passed by the kernel.
2710  * @buf  Pointer to the char buffer that will receive the stats info.
2711  *
2712  * return value
2713  *	The size, in bytes, of the data copied into buf.
2714  */
2715 static ssize_t mtip_hw_show_status(struct device *dev,
2716 				struct device_attribute *attr,
2717 				char *buf)
2718 {
2719 	struct driver_data *dd = dev_to_disk(dev)->private_data;
2720 	int size = 0;
2721 
2722 	if (test_bit(MTIP_DDF_OVER_TEMP_BIT, &dd->dd_flag))
2723 		size += sprintf(buf, "%s", "thermal_shutdown\n");
2724 	else if (test_bit(MTIP_DDF_WRITE_PROTECT_BIT, &dd->dd_flag))
2725 		size += sprintf(buf, "%s", "write_protect\n");
2726 	else
2727 		size += sprintf(buf, "%s", "online\n");
2728 
2729 	return size;
2730 }
2731 
2732 static DEVICE_ATTR(status, S_IRUGO, mtip_hw_show_status, NULL);
2733 
2734 /* debugsfs entries */
2735 
2736 static ssize_t show_device_status(struct device_driver *drv, char *buf)
2737 {
2738 	int size = 0;
2739 	struct driver_data *dd, *tmp;
2740 	unsigned long flags;
2741 	char id_buf[42];
2742 	u16 status = 0;
2743 
2744 	spin_lock_irqsave(&dev_lock, flags);
2745 	size += sprintf(&buf[size], "Devices Present:\n");
2746 	list_for_each_entry_safe(dd, tmp, &online_list, online_list) {
2747 		if (dd->pdev) {
2748 			if (dd->port &&
2749 			    dd->port->identify &&
2750 			    dd->port->identify_valid) {
2751 				strlcpy(id_buf,
2752 					(char *) (dd->port->identify + 10), 21);
2753 				status = *(dd->port->identify + 141);
2754 			} else {
2755 				memset(id_buf, 0, 42);
2756 				status = 0;
2757 			}
2758 
2759 			if (dd->port &&
2760 			    test_bit(MTIP_PF_REBUILD_BIT, &dd->port->flags)) {
2761 				size += sprintf(&buf[size],
2762 					" device %s %s (ftl rebuild %d %%)\n",
2763 					dev_name(&dd->pdev->dev),
2764 					id_buf,
2765 					status);
2766 			} else {
2767 				size += sprintf(&buf[size],
2768 					" device %s %s\n",
2769 					dev_name(&dd->pdev->dev),
2770 					id_buf);
2771 			}
2772 		}
2773 	}
2774 
2775 	size += sprintf(&buf[size], "Devices Being Removed:\n");
2776 	list_for_each_entry_safe(dd, tmp, &removing_list, remove_list) {
2777 		if (dd->pdev) {
2778 			if (dd->port &&
2779 			    dd->port->identify &&
2780 			    dd->port->identify_valid) {
2781 				strlcpy(id_buf,
2782 					(char *) (dd->port->identify+10), 21);
2783 				status = *(dd->port->identify + 141);
2784 			} else {
2785 				memset(id_buf, 0, 42);
2786 				status = 0;
2787 			}
2788 
2789 			if (dd->port &&
2790 			    test_bit(MTIP_PF_REBUILD_BIT, &dd->port->flags)) {
2791 				size += sprintf(&buf[size],
2792 					" device %s %s (ftl rebuild %d %%)\n",
2793 					dev_name(&dd->pdev->dev),
2794 					id_buf,
2795 					status);
2796 			} else {
2797 				size += sprintf(&buf[size],
2798 					" device %s %s\n",
2799 					dev_name(&dd->pdev->dev),
2800 					id_buf);
2801 			}
2802 		}
2803 	}
2804 	spin_unlock_irqrestore(&dev_lock, flags);
2805 
2806 	return size;
2807 }
2808 
2809 static ssize_t mtip_hw_read_device_status(struct file *f, char __user *ubuf,
2810 						size_t len, loff_t *offset)
2811 {
2812 	struct driver_data *dd =  (struct driver_data *)f->private_data;
2813 	int size = *offset;
2814 	char *buf;
2815 	int rv = 0;
2816 
2817 	if (!len || *offset)
2818 		return 0;
2819 
2820 	buf = kzalloc(MTIP_DFS_MAX_BUF_SIZE, GFP_KERNEL);
2821 	if (!buf) {
2822 		dev_err(&dd->pdev->dev,
2823 			"Memory allocation: status buffer\n");
2824 		return -ENOMEM;
2825 	}
2826 
2827 	size += show_device_status(NULL, buf);
2828 
2829 	*offset = size <= len ? size : len;
2830 	size = copy_to_user(ubuf, buf, *offset);
2831 	if (size)
2832 		rv = -EFAULT;
2833 
2834 	kfree(buf);
2835 	return rv ? rv : *offset;
2836 }
2837 
2838 static ssize_t mtip_hw_read_registers(struct file *f, char __user *ubuf,
2839 				  size_t len, loff_t *offset)
2840 {
2841 	struct driver_data *dd =  (struct driver_data *)f->private_data;
2842 	char *buf;
2843 	u32 group_allocated;
2844 	int size = *offset;
2845 	int n, rv = 0;
2846 
2847 	if (!len || size)
2848 		return 0;
2849 
2850 	buf = kzalloc(MTIP_DFS_MAX_BUF_SIZE, GFP_KERNEL);
2851 	if (!buf) {
2852 		dev_err(&dd->pdev->dev,
2853 			"Memory allocation: register buffer\n");
2854 		return -ENOMEM;
2855 	}
2856 
2857 	size += sprintf(&buf[size], "H/ S ACTive      : [ 0x");
2858 
2859 	for (n = dd->slot_groups-1; n >= 0; n--)
2860 		size += sprintf(&buf[size], "%08X ",
2861 					 readl(dd->port->s_active[n]));
2862 
2863 	size += sprintf(&buf[size], "]\n");
2864 	size += sprintf(&buf[size], "H/ Command Issue : [ 0x");
2865 
2866 	for (n = dd->slot_groups-1; n >= 0; n--)
2867 		size += sprintf(&buf[size], "%08X ",
2868 					readl(dd->port->cmd_issue[n]));
2869 
2870 	size += sprintf(&buf[size], "]\n");
2871 	size += sprintf(&buf[size], "H/ Completed     : [ 0x");
2872 
2873 	for (n = dd->slot_groups-1; n >= 0; n--)
2874 		size += sprintf(&buf[size], "%08X ",
2875 				readl(dd->port->completed[n]));
2876 
2877 	size += sprintf(&buf[size], "]\n");
2878 	size += sprintf(&buf[size], "H/ PORT IRQ STAT : [ 0x%08X ]\n",
2879 				readl(dd->port->mmio + PORT_IRQ_STAT));
2880 	size += sprintf(&buf[size], "H/ HOST IRQ STAT : [ 0x%08X ]\n",
2881 				readl(dd->mmio + HOST_IRQ_STAT));
2882 	size += sprintf(&buf[size], "\n");
2883 
2884 	size += sprintf(&buf[size], "L/ Allocated     : [ 0x");
2885 
2886 	for (n = dd->slot_groups-1; n >= 0; n--) {
2887 		if (sizeof(long) > sizeof(u32))
2888 			group_allocated =
2889 				dd->port->allocated[n/2] >> (32*(n&1));
2890 		else
2891 			group_allocated = dd->port->allocated[n];
2892 		size += sprintf(&buf[size], "%08X ", group_allocated);
2893 	}
2894 	size += sprintf(&buf[size], "]\n");
2895 
2896 	size += sprintf(&buf[size], "L/ Commands in Q : [ 0x");
2897 
2898 	for (n = dd->slot_groups-1; n >= 0; n--) {
2899 		if (sizeof(long) > sizeof(u32))
2900 			group_allocated =
2901 				dd->port->cmds_to_issue[n/2] >> (32*(n&1));
2902 		else
2903 			group_allocated = dd->port->cmds_to_issue[n];
2904 		size += sprintf(&buf[size], "%08X ", group_allocated);
2905 	}
2906 	size += sprintf(&buf[size], "]\n");
2907 
2908 	*offset = size <= len ? size : len;
2909 	size = copy_to_user(ubuf, buf, *offset);
2910 	if (size)
2911 		rv = -EFAULT;
2912 
2913 	kfree(buf);
2914 	return rv ? rv : *offset;
2915 }
2916 
2917 static ssize_t mtip_hw_read_flags(struct file *f, char __user *ubuf,
2918 				  size_t len, loff_t *offset)
2919 {
2920 	struct driver_data *dd =  (struct driver_data *)f->private_data;
2921 	char *buf;
2922 	int size = *offset;
2923 	int rv = 0;
2924 
2925 	if (!len || size)
2926 		return 0;
2927 
2928 	buf = kzalloc(MTIP_DFS_MAX_BUF_SIZE, GFP_KERNEL);
2929 	if (!buf) {
2930 		dev_err(&dd->pdev->dev,
2931 			"Memory allocation: flag buffer\n");
2932 		return -ENOMEM;
2933 	}
2934 
2935 	size += sprintf(&buf[size], "Flag-port : [ %08lX ]\n",
2936 							dd->port->flags);
2937 	size += sprintf(&buf[size], "Flag-dd   : [ %08lX ]\n",
2938 							dd->dd_flag);
2939 
2940 	*offset = size <= len ? size : len;
2941 	size = copy_to_user(ubuf, buf, *offset);
2942 	if (size)
2943 		rv = -EFAULT;
2944 
2945 	kfree(buf);
2946 	return rv ? rv : *offset;
2947 }
2948 
2949 static const struct file_operations mtip_device_status_fops = {
2950 	.owner  = THIS_MODULE,
2951 	.open   = simple_open,
2952 	.read   = mtip_hw_read_device_status,
2953 	.llseek = no_llseek,
2954 };
2955 
2956 static const struct file_operations mtip_regs_fops = {
2957 	.owner  = THIS_MODULE,
2958 	.open   = simple_open,
2959 	.read   = mtip_hw_read_registers,
2960 	.llseek = no_llseek,
2961 };
2962 
2963 static const struct file_operations mtip_flags_fops = {
2964 	.owner  = THIS_MODULE,
2965 	.open   = simple_open,
2966 	.read   = mtip_hw_read_flags,
2967 	.llseek = no_llseek,
2968 };
2969 
2970 /*
2971  * Create the sysfs related attributes.
2972  *
2973  * @dd   Pointer to the driver data structure.
2974  * @kobj Pointer to the kobj for the block device.
2975  *
2976  * return value
2977  *	0	Operation completed successfully.
2978  *	-EINVAL Invalid parameter.
2979  */
2980 static int mtip_hw_sysfs_init(struct driver_data *dd, struct kobject *kobj)
2981 {
2982 	if (!kobj || !dd)
2983 		return -EINVAL;
2984 
2985 	if (sysfs_create_file(kobj, &dev_attr_status.attr))
2986 		dev_warn(&dd->pdev->dev,
2987 			"Error creating 'status' sysfs entry\n");
2988 	return 0;
2989 }
2990 
2991 /*
2992  * Remove the sysfs related attributes.
2993  *
2994  * @dd   Pointer to the driver data structure.
2995  * @kobj Pointer to the kobj for the block device.
2996  *
2997  * return value
2998  *	0	Operation completed successfully.
2999  *	-EINVAL Invalid parameter.
3000  */
3001 static int mtip_hw_sysfs_exit(struct driver_data *dd, struct kobject *kobj)
3002 {
3003 	if (!kobj || !dd)
3004 		return -EINVAL;
3005 
3006 	sysfs_remove_file(kobj, &dev_attr_status.attr);
3007 
3008 	return 0;
3009 }
3010 
3011 static int mtip_hw_debugfs_init(struct driver_data *dd)
3012 {
3013 	if (!dfs_parent)
3014 		return -1;
3015 
3016 	dd->dfs_node = debugfs_create_dir(dd->disk->disk_name, dfs_parent);
3017 	if (IS_ERR_OR_NULL(dd->dfs_node)) {
3018 		dev_warn(&dd->pdev->dev,
3019 			"Error creating node %s under debugfs\n",
3020 						dd->disk->disk_name);
3021 		dd->dfs_node = NULL;
3022 		return -1;
3023 	}
3024 
3025 	debugfs_create_file("flags", S_IRUGO, dd->dfs_node, dd,
3026 							&mtip_flags_fops);
3027 	debugfs_create_file("registers", S_IRUGO, dd->dfs_node, dd,
3028 							&mtip_regs_fops);
3029 
3030 	return 0;
3031 }
3032 
3033 static void mtip_hw_debugfs_exit(struct driver_data *dd)
3034 {
3035 	if (dd->dfs_node)
3036 		debugfs_remove_recursive(dd->dfs_node);
3037 }
3038 
3039 static int mtip_free_orphan(struct driver_data *dd)
3040 {
3041 	struct kobject *kobj;
3042 
3043 	if (dd->bdev) {
3044 		if (dd->bdev->bd_holders >= 1)
3045 			return -2;
3046 
3047 		bdput(dd->bdev);
3048 		dd->bdev = NULL;
3049 	}
3050 
3051 	mtip_hw_debugfs_exit(dd);
3052 
3053 	spin_lock(&rssd_index_lock);
3054 	ida_remove(&rssd_index_ida, dd->index);
3055 	spin_unlock(&rssd_index_lock);
3056 
3057 	if (!test_bit(MTIP_DDF_INIT_DONE_BIT, &dd->dd_flag) &&
3058 			test_bit(MTIP_DDF_REBUILD_FAILED_BIT, &dd->dd_flag)) {
3059 		put_disk(dd->disk);
3060 	} else {
3061 		if (dd->disk) {
3062 			kobj = kobject_get(&disk_to_dev(dd->disk)->kobj);
3063 			if (kobj) {
3064 				mtip_hw_sysfs_exit(dd, kobj);
3065 				kobject_put(kobj);
3066 			}
3067 			del_gendisk(dd->disk);
3068 			dd->disk = NULL;
3069 		}
3070 		if (dd->queue) {
3071 			dd->queue->queuedata = NULL;
3072 			blk_cleanup_queue(dd->queue);
3073 			dd->queue = NULL;
3074 		}
3075 	}
3076 	kfree(dd);
3077 	return 0;
3078 }
3079 
3080 /*
3081  * Perform any init/resume time hardware setup
3082  *
3083  * @dd Pointer to the driver data structure.
3084  *
3085  * return value
3086  *	None
3087  */
3088 static inline void hba_setup(struct driver_data *dd)
3089 {
3090 	u32 hwdata;
3091 	hwdata = readl(dd->mmio + HOST_HSORG);
3092 
3093 	/* interrupt bug workaround: use only 1 IS bit.*/
3094 	writel(hwdata |
3095 		HSORG_DISABLE_SLOTGRP_INTR |
3096 		HSORG_DISABLE_SLOTGRP_PXIS,
3097 		dd->mmio + HOST_HSORG);
3098 }
3099 
3100 static int mtip_device_unaligned_constrained(struct driver_data *dd)
3101 {
3102 	return (dd->pdev->device == P420M_DEVICE_ID ? 1 : 0);
3103 }
3104 
3105 /*
3106  * Detect the details of the product, and store anything needed
3107  * into the driver data structure.  This includes product type and
3108  * version and number of slot groups.
3109  *
3110  * @dd Pointer to the driver data structure.
3111  *
3112  * return value
3113  *	None
3114  */
3115 static void mtip_detect_product(struct driver_data *dd)
3116 {
3117 	u32 hwdata;
3118 	unsigned int rev, slotgroups;
3119 
3120 	/*
3121 	 * HBA base + 0xFC [15:0] - vendor-specific hardware interface
3122 	 * info register:
3123 	 * [15:8] hardware/software interface rev#
3124 	 * [   3] asic-style interface
3125 	 * [ 2:0] number of slot groups, minus 1 (only valid for asic-style).
3126 	 */
3127 	hwdata = readl(dd->mmio + HOST_HSORG);
3128 
3129 	dd->product_type = MTIP_PRODUCT_UNKNOWN;
3130 	dd->slot_groups = 1;
3131 
3132 	if (hwdata & 0x8) {
3133 		dd->product_type = MTIP_PRODUCT_ASICFPGA;
3134 		rev = (hwdata & HSORG_HWREV) >> 8;
3135 		slotgroups = (hwdata & HSORG_SLOTGROUPS) + 1;
3136 		dev_info(&dd->pdev->dev,
3137 			"ASIC-FPGA design, HS rev 0x%x, "
3138 			"%i slot groups [%i slots]\n",
3139 			 rev,
3140 			 slotgroups,
3141 			 slotgroups * 32);
3142 
3143 		if (slotgroups > MTIP_MAX_SLOT_GROUPS) {
3144 			dev_warn(&dd->pdev->dev,
3145 				"Warning: driver only supports "
3146 				"%i slot groups.\n", MTIP_MAX_SLOT_GROUPS);
3147 			slotgroups = MTIP_MAX_SLOT_GROUPS;
3148 		}
3149 		dd->slot_groups = slotgroups;
3150 		return;
3151 	}
3152 
3153 	dev_warn(&dd->pdev->dev, "Unrecognized product id\n");
3154 }
3155 
3156 /*
3157  * Blocking wait for FTL rebuild to complete
3158  *
3159  * @dd Pointer to the DRIVER_DATA structure.
3160  *
3161  * return value
3162  *	0	FTL rebuild completed successfully
3163  *	-EFAULT FTL rebuild error/timeout/interruption
3164  */
3165 static int mtip_ftl_rebuild_poll(struct driver_data *dd)
3166 {
3167 	unsigned long timeout, cnt = 0, start;
3168 
3169 	dev_warn(&dd->pdev->dev,
3170 		"FTL rebuild in progress. Polling for completion.\n");
3171 
3172 	start = jiffies;
3173 	timeout = jiffies + msecs_to_jiffies(MTIP_FTL_REBUILD_TIMEOUT_MS);
3174 
3175 	do {
3176 		if (unlikely(test_bit(MTIP_DDF_REMOVE_PENDING_BIT,
3177 				&dd->dd_flag)))
3178 			return -EFAULT;
3179 		if (mtip_check_surprise_removal(dd->pdev))
3180 			return -EFAULT;
3181 
3182 		if (mtip_get_identify(dd->port, NULL) < 0)
3183 			return -EFAULT;
3184 
3185 		if (*(dd->port->identify + MTIP_FTL_REBUILD_OFFSET) ==
3186 			MTIP_FTL_REBUILD_MAGIC) {
3187 			ssleep(1);
3188 			/* Print message every 3 minutes */
3189 			if (cnt++ >= 180) {
3190 				dev_warn(&dd->pdev->dev,
3191 				"FTL rebuild in progress (%d secs).\n",
3192 				jiffies_to_msecs(jiffies - start) / 1000);
3193 				cnt = 0;
3194 			}
3195 		} else {
3196 			dev_warn(&dd->pdev->dev,
3197 				"FTL rebuild complete (%d secs).\n",
3198 			jiffies_to_msecs(jiffies - start) / 1000);
3199 			mtip_block_initialize(dd);
3200 			return 0;
3201 		}
3202 		ssleep(10);
3203 	} while (time_before(jiffies, timeout));
3204 
3205 	/* Check for timeout */
3206 	dev_err(&dd->pdev->dev,
3207 		"Timed out waiting for FTL rebuild to complete (%d secs).\n",
3208 		jiffies_to_msecs(jiffies - start) / 1000);
3209 	return -EFAULT;
3210 }
3211 
3212 /*
3213  * service thread to issue queued commands
3214  *
3215  * @data Pointer to the driver data structure.
3216  *
3217  * return value
3218  *	0
3219  */
3220 
3221 static int mtip_service_thread(void *data)
3222 {
3223 	struct driver_data *dd = (struct driver_data *)data;
3224 	unsigned long slot, slot_start, slot_wrap;
3225 	unsigned int num_cmd_slots = dd->slot_groups * 32;
3226 	struct mtip_port *port = dd->port;
3227 	int ret;
3228 
3229 	while (1) {
3230 		/*
3231 		 * the condition is to check neither an internal command is
3232 		 * is in progress nor error handling is active
3233 		 */
3234 		wait_event_interruptible(port->svc_wait, (port->flags) &&
3235 			!(port->flags & MTIP_PF_PAUSE_IO));
3236 
3237 		if (kthread_should_stop())
3238 			goto st_out;
3239 
3240 		set_bit(MTIP_PF_SVC_THD_ACTIVE_BIT, &port->flags);
3241 
3242 		/* If I am an orphan, start self cleanup */
3243 		if (test_bit(MTIP_PF_SR_CLEANUP_BIT, &port->flags))
3244 			break;
3245 
3246 		if (unlikely(test_bit(MTIP_DDF_REMOVE_PENDING_BIT,
3247 				&dd->dd_flag)))
3248 			goto st_out;
3249 
3250 		if (test_bit(MTIP_PF_ISSUE_CMDS_BIT, &port->flags)) {
3251 			slot = 1;
3252 			/* used to restrict the loop to one iteration */
3253 			slot_start = num_cmd_slots;
3254 			slot_wrap = 0;
3255 			while (1) {
3256 				slot = find_next_bit(port->cmds_to_issue,
3257 						num_cmd_slots, slot);
3258 				if (slot_wrap == 1) {
3259 					if ((slot_start >= slot) ||
3260 						(slot >= num_cmd_slots))
3261 						break;
3262 				}
3263 				if (unlikely(slot_start == num_cmd_slots))
3264 					slot_start = slot;
3265 
3266 				if (unlikely(slot == num_cmd_slots)) {
3267 					slot = 1;
3268 					slot_wrap = 1;
3269 					continue;
3270 				}
3271 
3272 				/* Issue the command to the hardware */
3273 				mtip_issue_ncq_command(port, slot);
3274 
3275 				clear_bit(slot, port->cmds_to_issue);
3276 			}
3277 
3278 			clear_bit(MTIP_PF_ISSUE_CMDS_BIT, &port->flags);
3279 		} else if (test_bit(MTIP_PF_REBUILD_BIT, &port->flags)) {
3280 			if (mtip_ftl_rebuild_poll(dd) < 0)
3281 				set_bit(MTIP_DDF_REBUILD_FAILED_BIT,
3282 							&dd->dd_flag);
3283 			clear_bit(MTIP_PF_REBUILD_BIT, &port->flags);
3284 		}
3285 		clear_bit(MTIP_PF_SVC_THD_ACTIVE_BIT, &port->flags);
3286 
3287 		if (test_bit(MTIP_PF_SVC_THD_STOP_BIT, &port->flags))
3288 			goto st_out;
3289 	}
3290 
3291 	/* wait for pci remove to exit */
3292 	while (1) {
3293 		if (test_bit(MTIP_DDF_REMOVE_DONE_BIT, &dd->dd_flag))
3294 			break;
3295 		msleep_interruptible(1000);
3296 		if (kthread_should_stop())
3297 			goto st_out;
3298 	}
3299 
3300 	while (1) {
3301 		ret = mtip_free_orphan(dd);
3302 		if (!ret) {
3303 			/* NOTE: All data structures are invalid, do not
3304 			 * access any here */
3305 			return 0;
3306 		}
3307 		msleep_interruptible(1000);
3308 		if (kthread_should_stop())
3309 			goto st_out;
3310 	}
3311 st_out:
3312 	return 0;
3313 }
3314 
3315 /*
3316  * Called once for each card.
3317  *
3318  * @dd Pointer to the driver data structure.
3319  *
3320  * return value
3321  *	0 on success, else an error code.
3322  */
3323 static int mtip_hw_init(struct driver_data *dd)
3324 {
3325 	int i;
3326 	int rv;
3327 	unsigned int num_command_slots;
3328 	unsigned long timeout, timetaken;
3329 	unsigned char *buf;
3330 	struct smart_attr attr242;
3331 
3332 	dd->mmio = pcim_iomap_table(dd->pdev)[MTIP_ABAR];
3333 
3334 	mtip_detect_product(dd);
3335 	if (dd->product_type == MTIP_PRODUCT_UNKNOWN) {
3336 		rv = -EIO;
3337 		goto out1;
3338 	}
3339 	num_command_slots = dd->slot_groups * 32;
3340 
3341 	hba_setup(dd);
3342 
3343 	dd->port = kzalloc_node(sizeof(struct mtip_port), GFP_KERNEL,
3344 				dd->numa_node);
3345 	if (!dd->port) {
3346 		dev_err(&dd->pdev->dev,
3347 			"Memory allocation: port structure\n");
3348 		return -ENOMEM;
3349 	}
3350 
3351 	/* Continue workqueue setup */
3352 	for (i = 0; i < MTIP_MAX_SLOT_GROUPS; i++)
3353 		dd->work[i].port = dd->port;
3354 
3355 	/* Enable unaligned IO constraints for some devices */
3356 	if (mtip_device_unaligned_constrained(dd))
3357 		dd->unal_qdepth = MTIP_MAX_UNALIGNED_SLOTS;
3358 	else
3359 		dd->unal_qdepth = 0;
3360 
3361 	/* Counting semaphore to track command slot usage */
3362 	sema_init(&dd->port->cmd_slot, num_command_slots - 1 - dd->unal_qdepth);
3363 	sema_init(&dd->port->cmd_slot_unal, dd->unal_qdepth);
3364 
3365 	/* Spinlock to prevent concurrent issue */
3366 	for (i = 0; i < MTIP_MAX_SLOT_GROUPS; i++)
3367 		spin_lock_init(&dd->port->cmd_issue_lock[i]);
3368 
3369 	/* Set the port mmio base address. */
3370 	dd->port->mmio	= dd->mmio + PORT_OFFSET;
3371 	dd->port->dd	= dd;
3372 
3373 	/* Allocate memory for the command list. */
3374 	dd->port->command_list =
3375 		dmam_alloc_coherent(&dd->pdev->dev,
3376 			HW_PORT_PRIV_DMA_SZ + (ATA_SECT_SIZE * 4),
3377 			&dd->port->command_list_dma,
3378 			GFP_KERNEL);
3379 	if (!dd->port->command_list) {
3380 		dev_err(&dd->pdev->dev,
3381 			"Memory allocation: command list\n");
3382 		rv = -ENOMEM;
3383 		goto out1;
3384 	}
3385 
3386 	/* Clear the memory we have allocated. */
3387 	memset(dd->port->command_list,
3388 		0,
3389 		HW_PORT_PRIV_DMA_SZ + (ATA_SECT_SIZE * 4));
3390 
3391 	/* Setup the addresse of the RX FIS. */
3392 	dd->port->rxfis	    = dd->port->command_list + HW_CMD_SLOT_SZ;
3393 	dd->port->rxfis_dma = dd->port->command_list_dma + HW_CMD_SLOT_SZ;
3394 
3395 	/* Setup the address of the command tables. */
3396 	dd->port->command_table	  = dd->port->rxfis + AHCI_RX_FIS_SZ;
3397 	dd->port->command_tbl_dma = dd->port->rxfis_dma + AHCI_RX_FIS_SZ;
3398 
3399 	/* Setup the address of the identify data. */
3400 	dd->port->identify     = dd->port->command_table +
3401 					HW_CMD_TBL_AR_SZ;
3402 	dd->port->identify_dma = dd->port->command_tbl_dma +
3403 					HW_CMD_TBL_AR_SZ;
3404 
3405 	/* Setup the address of the sector buffer - for some non-ncq cmds */
3406 	dd->port->sector_buffer	= (void *) dd->port->identify + ATA_SECT_SIZE;
3407 	dd->port->sector_buffer_dma = dd->port->identify_dma + ATA_SECT_SIZE;
3408 
3409 	/* Setup the address of the log buf - for read log command */
3410 	dd->port->log_buf = (void *)dd->port->sector_buffer  + ATA_SECT_SIZE;
3411 	dd->port->log_buf_dma = dd->port->sector_buffer_dma + ATA_SECT_SIZE;
3412 
3413 	/* Setup the address of the smart buf - for smart read data command */
3414 	dd->port->smart_buf = (void *)dd->port->log_buf  + ATA_SECT_SIZE;
3415 	dd->port->smart_buf_dma = dd->port->log_buf_dma + ATA_SECT_SIZE;
3416 
3417 
3418 	/* Point the command headers at the command tables. */
3419 	for (i = 0; i < num_command_slots; i++) {
3420 		dd->port->commands[i].command_header =
3421 					dd->port->command_list +
3422 					(sizeof(struct mtip_cmd_hdr) * i);
3423 		dd->port->commands[i].command_header_dma =
3424 					dd->port->command_list_dma +
3425 					(sizeof(struct mtip_cmd_hdr) * i);
3426 
3427 		dd->port->commands[i].command =
3428 			dd->port->command_table + (HW_CMD_TBL_SZ * i);
3429 		dd->port->commands[i].command_dma =
3430 			dd->port->command_tbl_dma + (HW_CMD_TBL_SZ * i);
3431 
3432 		if (readl(dd->mmio + HOST_CAP) & HOST_CAP_64)
3433 			dd->port->commands[i].command_header->ctbau =
3434 			__force_bit2int cpu_to_le32(
3435 			(dd->port->commands[i].command_dma >> 16) >> 16);
3436 		dd->port->commands[i].command_header->ctba =
3437 			__force_bit2int cpu_to_le32(
3438 			dd->port->commands[i].command_dma & 0xFFFFFFFF);
3439 
3440 		/*
3441 		 * If this is not done, a bug is reported by the stock
3442 		 * FC11 i386. Due to the fact that it has lots of kernel
3443 		 * debugging enabled.
3444 		 */
3445 		sg_init_table(dd->port->commands[i].sg, MTIP_MAX_SG);
3446 
3447 		/* Mark all commands as currently inactive.*/
3448 		atomic_set(&dd->port->commands[i].active, 0);
3449 	}
3450 
3451 	/* Setup the pointers to the extended s_active and CI registers. */
3452 	for (i = 0; i < dd->slot_groups; i++) {
3453 		dd->port->s_active[i] =
3454 			dd->port->mmio + i*0x80 + PORT_SCR_ACT;
3455 		dd->port->cmd_issue[i] =
3456 			dd->port->mmio + i*0x80 + PORT_COMMAND_ISSUE;
3457 		dd->port->completed[i] =
3458 			dd->port->mmio + i*0x80 + PORT_SDBV;
3459 	}
3460 
3461 	timetaken = jiffies;
3462 	timeout = jiffies + msecs_to_jiffies(30000);
3463 	while (((readl(dd->port->mmio + PORT_SCR_STAT) & 0x0F) != 0x03) &&
3464 		 time_before(jiffies, timeout)) {
3465 		mdelay(100);
3466 	}
3467 	if (unlikely(mtip_check_surprise_removal(dd->pdev))) {
3468 		timetaken = jiffies - timetaken;
3469 		dev_warn(&dd->pdev->dev,
3470 			"Surprise removal detected at %u ms\n",
3471 			jiffies_to_msecs(timetaken));
3472 		rv = -ENODEV;
3473 		goto out2 ;
3474 	}
3475 	if (unlikely(test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag))) {
3476 		timetaken = jiffies - timetaken;
3477 		dev_warn(&dd->pdev->dev,
3478 			"Removal detected at %u ms\n",
3479 			jiffies_to_msecs(timetaken));
3480 		rv = -EFAULT;
3481 		goto out2;
3482 	}
3483 
3484 	/* Conditionally reset the HBA. */
3485 	if (!(readl(dd->mmio + HOST_CAP) & HOST_CAP_NZDMA)) {
3486 		if (mtip_hba_reset(dd) < 0) {
3487 			dev_err(&dd->pdev->dev,
3488 				"Card did not reset within timeout\n");
3489 			rv = -EIO;
3490 			goto out2;
3491 		}
3492 	} else {
3493 		/* Clear any pending interrupts on the HBA */
3494 		writel(readl(dd->mmio + HOST_IRQ_STAT),
3495 			dd->mmio + HOST_IRQ_STAT);
3496 	}
3497 
3498 	mtip_init_port(dd->port);
3499 	mtip_start_port(dd->port);
3500 
3501 	/* Setup the ISR and enable interrupts. */
3502 	rv = devm_request_irq(&dd->pdev->dev,
3503 				dd->pdev->irq,
3504 				mtip_irq_handler,
3505 				IRQF_SHARED,
3506 				dev_driver_string(&dd->pdev->dev),
3507 				dd);
3508 
3509 	if (rv) {
3510 		dev_err(&dd->pdev->dev,
3511 			"Unable to allocate IRQ %d\n", dd->pdev->irq);
3512 		goto out2;
3513 	}
3514 	irq_set_affinity_hint(dd->pdev->irq, get_cpu_mask(dd->isr_binding));
3515 
3516 	/* Enable interrupts on the HBA. */
3517 	writel(readl(dd->mmio + HOST_CTL) | HOST_IRQ_EN,
3518 					dd->mmio + HOST_CTL);
3519 
3520 	init_timer(&dd->port->cmd_timer);
3521 	init_waitqueue_head(&dd->port->svc_wait);
3522 
3523 	dd->port->cmd_timer.data = (unsigned long int) dd->port;
3524 	dd->port->cmd_timer.function = mtip_timeout_function;
3525 	mod_timer(&dd->port->cmd_timer,
3526 		jiffies + msecs_to_jiffies(MTIP_TIMEOUT_CHECK_PERIOD));
3527 
3528 
3529 	if (test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag)) {
3530 		rv = -EFAULT;
3531 		goto out3;
3532 	}
3533 
3534 	if (mtip_get_identify(dd->port, NULL) < 0) {
3535 		rv = -EFAULT;
3536 		goto out3;
3537 	}
3538 	mtip_dump_identify(dd->port);
3539 
3540 	if (*(dd->port->identify + MTIP_FTL_REBUILD_OFFSET) ==
3541 		MTIP_FTL_REBUILD_MAGIC) {
3542 		set_bit(MTIP_PF_REBUILD_BIT, &dd->port->flags);
3543 		return MTIP_FTL_REBUILD_MAGIC;
3544 	}
3545 
3546 	/* check write protect, over temp and rebuild statuses */
3547 	rv = mtip_read_log_page(dd->port, ATA_LOG_SATA_NCQ,
3548 				dd->port->log_buf,
3549 				dd->port->log_buf_dma, 1);
3550 	if (rv) {
3551 		dev_warn(&dd->pdev->dev,
3552 			"Error in READ LOG EXT (10h) command\n");
3553 		/* non-critical error, don't fail the load */
3554 	} else {
3555 		buf = (unsigned char *)dd->port->log_buf;
3556 		if (buf[259] & 0x1) {
3557 			dev_info(&dd->pdev->dev,
3558 				"Write protect bit is set.\n");
3559 			set_bit(MTIP_DDF_WRITE_PROTECT_BIT, &dd->dd_flag);
3560 		}
3561 		if (buf[288] == 0xF7) {
3562 			dev_info(&dd->pdev->dev,
3563 				"Exceeded Tmax, drive in thermal shutdown.\n");
3564 			set_bit(MTIP_DDF_OVER_TEMP_BIT, &dd->dd_flag);
3565 		}
3566 		if (buf[288] == 0xBF) {
3567 			dev_info(&dd->pdev->dev,
3568 				"Drive is in security locked state.\n");
3569 			set_bit(MTIP_DDF_SEC_LOCK_BIT, &dd->dd_flag);
3570 		}
3571 	}
3572 
3573 	/* get write protect progess */
3574 	memset(&attr242, 0, sizeof(struct smart_attr));
3575 	if (mtip_get_smart_attr(dd->port, 242, &attr242))
3576 		dev_warn(&dd->pdev->dev,
3577 				"Unable to check write protect progress\n");
3578 	else
3579 		dev_info(&dd->pdev->dev,
3580 				"Write protect progress: %u%% (%u blocks)\n",
3581 				attr242.cur, le32_to_cpu(attr242.data));
3582 	return rv;
3583 
3584 out3:
3585 	del_timer_sync(&dd->port->cmd_timer);
3586 
3587 	/* Disable interrupts on the HBA. */
3588 	writel(readl(dd->mmio + HOST_CTL) & ~HOST_IRQ_EN,
3589 			dd->mmio + HOST_CTL);
3590 
3591 	/* Release the IRQ. */
3592 	irq_set_affinity_hint(dd->pdev->irq, NULL);
3593 	devm_free_irq(&dd->pdev->dev, dd->pdev->irq, dd);
3594 
3595 out2:
3596 	mtip_deinit_port(dd->port);
3597 
3598 	/* Free the command/command header memory. */
3599 	dmam_free_coherent(&dd->pdev->dev,
3600 				HW_PORT_PRIV_DMA_SZ + (ATA_SECT_SIZE * 4),
3601 				dd->port->command_list,
3602 				dd->port->command_list_dma);
3603 out1:
3604 	/* Free the memory allocated for the for structure. */
3605 	kfree(dd->port);
3606 
3607 	return rv;
3608 }
3609 
3610 /*
3611  * Called to deinitialize an interface.
3612  *
3613  * @dd Pointer to the driver data structure.
3614  *
3615  * return value
3616  *	0
3617  */
3618 static int mtip_hw_exit(struct driver_data *dd)
3619 {
3620 	/*
3621 	 * Send standby immediate (E0h) to the drive so that it
3622 	 * saves its state.
3623 	 */
3624 	if (!dd->sr) {
3625 		if (!test_bit(MTIP_DDF_REBUILD_FAILED_BIT, &dd->dd_flag))
3626 			if (mtip_standby_immediate(dd->port))
3627 				dev_warn(&dd->pdev->dev,
3628 					"STANDBY IMMEDIATE failed\n");
3629 
3630 		/* de-initialize the port. */
3631 		mtip_deinit_port(dd->port);
3632 
3633 		/* Disable interrupts on the HBA. */
3634 		writel(readl(dd->mmio + HOST_CTL) & ~HOST_IRQ_EN,
3635 				dd->mmio + HOST_CTL);
3636 	}
3637 
3638 	del_timer_sync(&dd->port->cmd_timer);
3639 
3640 	/* Release the IRQ. */
3641 	irq_set_affinity_hint(dd->pdev->irq, NULL);
3642 	devm_free_irq(&dd->pdev->dev, dd->pdev->irq, dd);
3643 
3644 	/* Free the command/command header memory. */
3645 	dmam_free_coherent(&dd->pdev->dev,
3646 			HW_PORT_PRIV_DMA_SZ + (ATA_SECT_SIZE * 4),
3647 			dd->port->command_list,
3648 			dd->port->command_list_dma);
3649 	/* Free the memory allocated for the for structure. */
3650 	kfree(dd->port);
3651 	dd->port = NULL;
3652 
3653 	return 0;
3654 }
3655 
3656 /*
3657  * Issue a Standby Immediate command to the device.
3658  *
3659  * This function is called by the Block Layer just before the
3660  * system powers off during a shutdown.
3661  *
3662  * @dd Pointer to the driver data structure.
3663  *
3664  * return value
3665  *	0
3666  */
3667 static int mtip_hw_shutdown(struct driver_data *dd)
3668 {
3669 	/*
3670 	 * Send standby immediate (E0h) to the drive so that it
3671 	 * saves its state.
3672 	 */
3673 	if (!dd->sr && dd->port)
3674 		mtip_standby_immediate(dd->port);
3675 
3676 	return 0;
3677 }
3678 
3679 /*
3680  * Suspend function
3681  *
3682  * This function is called by the Block Layer just before the
3683  * system hibernates.
3684  *
3685  * @dd Pointer to the driver data structure.
3686  *
3687  * return value
3688  *	0	Suspend was successful
3689  *	-EFAULT Suspend was not successful
3690  */
3691 static int mtip_hw_suspend(struct driver_data *dd)
3692 {
3693 	/*
3694 	 * Send standby immediate (E0h) to the drive
3695 	 * so that it saves its state.
3696 	 */
3697 	if (mtip_standby_immediate(dd->port) != 0) {
3698 		dev_err(&dd->pdev->dev,
3699 			"Failed standby-immediate command\n");
3700 		return -EFAULT;
3701 	}
3702 
3703 	/* Disable interrupts on the HBA.*/
3704 	writel(readl(dd->mmio + HOST_CTL) & ~HOST_IRQ_EN,
3705 			dd->mmio + HOST_CTL);
3706 	mtip_deinit_port(dd->port);
3707 
3708 	return 0;
3709 }
3710 
3711 /*
3712  * Resume function
3713  *
3714  * This function is called by the Block Layer as the
3715  * system resumes.
3716  *
3717  * @dd Pointer to the driver data structure.
3718  *
3719  * return value
3720  *	0	Resume was successful
3721  *      -EFAULT Resume was not successful
3722  */
3723 static int mtip_hw_resume(struct driver_data *dd)
3724 {
3725 	/* Perform any needed hardware setup steps */
3726 	hba_setup(dd);
3727 
3728 	/* Reset the HBA */
3729 	if (mtip_hba_reset(dd) != 0) {
3730 		dev_err(&dd->pdev->dev,
3731 			"Unable to reset the HBA\n");
3732 		return -EFAULT;
3733 	}
3734 
3735 	/*
3736 	 * Enable the port, DMA engine, and FIS reception specific
3737 	 * h/w in controller.
3738 	 */
3739 	mtip_init_port(dd->port);
3740 	mtip_start_port(dd->port);
3741 
3742 	/* Enable interrupts on the HBA.*/
3743 	writel(readl(dd->mmio + HOST_CTL) | HOST_IRQ_EN,
3744 			dd->mmio + HOST_CTL);
3745 
3746 	return 0;
3747 }
3748 
3749 /*
3750  * Helper function for reusing disk name
3751  * upon hot insertion.
3752  */
3753 static int rssd_disk_name_format(char *prefix,
3754 				 int index,
3755 				 char *buf,
3756 				 int buflen)
3757 {
3758 	const int base = 'z' - 'a' + 1;
3759 	char *begin = buf + strlen(prefix);
3760 	char *end = buf + buflen;
3761 	char *p;
3762 	int unit;
3763 
3764 	p = end - 1;
3765 	*p = '\0';
3766 	unit = base;
3767 	do {
3768 		if (p == begin)
3769 			return -EINVAL;
3770 		*--p = 'a' + (index % unit);
3771 		index = (index / unit) - 1;
3772 	} while (index >= 0);
3773 
3774 	memmove(begin, p, end - p);
3775 	memcpy(buf, prefix, strlen(prefix));
3776 
3777 	return 0;
3778 }
3779 
3780 /*
3781  * Block layer IOCTL handler.
3782  *
3783  * @dev Pointer to the block_device structure.
3784  * @mode ignored
3785  * @cmd IOCTL command passed from the user application.
3786  * @arg Argument passed from the user application.
3787  *
3788  * return value
3789  *	0        IOCTL completed successfully.
3790  *	-ENOTTY  IOCTL not supported or invalid driver data
3791  *                 structure pointer.
3792  */
3793 static int mtip_block_ioctl(struct block_device *dev,
3794 			    fmode_t mode,
3795 			    unsigned cmd,
3796 			    unsigned long arg)
3797 {
3798 	struct driver_data *dd = dev->bd_disk->private_data;
3799 
3800 	if (!capable(CAP_SYS_ADMIN))
3801 		return -EACCES;
3802 
3803 	if (!dd)
3804 		return -ENOTTY;
3805 
3806 	if (unlikely(test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag)))
3807 		return -ENOTTY;
3808 
3809 	switch (cmd) {
3810 	case BLKFLSBUF:
3811 		return -ENOTTY;
3812 	default:
3813 		return mtip_hw_ioctl(dd, cmd, arg);
3814 	}
3815 }
3816 
3817 #ifdef CONFIG_COMPAT
3818 /*
3819  * Block layer compat IOCTL handler.
3820  *
3821  * @dev Pointer to the block_device structure.
3822  * @mode ignored
3823  * @cmd IOCTL command passed from the user application.
3824  * @arg Argument passed from the user application.
3825  *
3826  * return value
3827  *	0        IOCTL completed successfully.
3828  *	-ENOTTY  IOCTL not supported or invalid driver data
3829  *                 structure pointer.
3830  */
3831 static int mtip_block_compat_ioctl(struct block_device *dev,
3832 			    fmode_t mode,
3833 			    unsigned cmd,
3834 			    unsigned long arg)
3835 {
3836 	struct driver_data *dd = dev->bd_disk->private_data;
3837 
3838 	if (!capable(CAP_SYS_ADMIN))
3839 		return -EACCES;
3840 
3841 	if (!dd)
3842 		return -ENOTTY;
3843 
3844 	if (unlikely(test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag)))
3845 		return -ENOTTY;
3846 
3847 	switch (cmd) {
3848 	case BLKFLSBUF:
3849 		return -ENOTTY;
3850 	case HDIO_DRIVE_TASKFILE: {
3851 		struct mtip_compat_ide_task_request_s __user *compat_req_task;
3852 		ide_task_request_t req_task;
3853 		int compat_tasksize, outtotal, ret;
3854 
3855 		compat_tasksize =
3856 			sizeof(struct mtip_compat_ide_task_request_s);
3857 
3858 		compat_req_task =
3859 			(struct mtip_compat_ide_task_request_s __user *) arg;
3860 
3861 		if (copy_from_user(&req_task, (void __user *) arg,
3862 			compat_tasksize - (2 * sizeof(compat_long_t))))
3863 			return -EFAULT;
3864 
3865 		if (get_user(req_task.out_size, &compat_req_task->out_size))
3866 			return -EFAULT;
3867 
3868 		if (get_user(req_task.in_size, &compat_req_task->in_size))
3869 			return -EFAULT;
3870 
3871 		outtotal = sizeof(struct mtip_compat_ide_task_request_s);
3872 
3873 		ret = exec_drive_taskfile(dd, (void __user *) arg,
3874 						&req_task, outtotal);
3875 
3876 		if (copy_to_user((void __user *) arg, &req_task,
3877 				compat_tasksize -
3878 				(2 * sizeof(compat_long_t))))
3879 			return -EFAULT;
3880 
3881 		if (put_user(req_task.out_size, &compat_req_task->out_size))
3882 			return -EFAULT;
3883 
3884 		if (put_user(req_task.in_size, &compat_req_task->in_size))
3885 			return -EFAULT;
3886 
3887 		return ret;
3888 	}
3889 	default:
3890 		return mtip_hw_ioctl(dd, cmd, arg);
3891 	}
3892 }
3893 #endif
3894 
3895 /*
3896  * Obtain the geometry of the device.
3897  *
3898  * You may think that this function is obsolete, but some applications,
3899  * fdisk for example still used CHS values. This function describes the
3900  * device as having 224 heads and 56 sectors per cylinder. These values are
3901  * chosen so that each cylinder is aligned on a 4KB boundary. Since a
3902  * partition is described in terms of a start and end cylinder this means
3903  * that each partition is also 4KB aligned. Non-aligned partitions adversely
3904  * affects performance.
3905  *
3906  * @dev Pointer to the block_device strucutre.
3907  * @geo Pointer to a hd_geometry structure.
3908  *
3909  * return value
3910  *	0       Operation completed successfully.
3911  *	-ENOTTY An error occurred while reading the drive capacity.
3912  */
3913 static int mtip_block_getgeo(struct block_device *dev,
3914 				struct hd_geometry *geo)
3915 {
3916 	struct driver_data *dd = dev->bd_disk->private_data;
3917 	sector_t capacity;
3918 
3919 	if (!dd)
3920 		return -ENOTTY;
3921 
3922 	if (!(mtip_hw_get_capacity(dd, &capacity))) {
3923 		dev_warn(&dd->pdev->dev,
3924 			"Could not get drive capacity.\n");
3925 		return -ENOTTY;
3926 	}
3927 
3928 	geo->heads = 224;
3929 	geo->sectors = 56;
3930 	sector_div(capacity, (geo->heads * geo->sectors));
3931 	geo->cylinders = capacity;
3932 	return 0;
3933 }
3934 
3935 /*
3936  * Block device operation function.
3937  *
3938  * This structure contains pointers to the functions required by the block
3939  * layer.
3940  */
3941 static const struct block_device_operations mtip_block_ops = {
3942 	.ioctl		= mtip_block_ioctl,
3943 #ifdef CONFIG_COMPAT
3944 	.compat_ioctl	= mtip_block_compat_ioctl,
3945 #endif
3946 	.getgeo		= mtip_block_getgeo,
3947 	.owner		= THIS_MODULE
3948 };
3949 
3950 /*
3951  * Block layer make request function.
3952  *
3953  * This function is called by the kernel to process a BIO for
3954  * the P320 device.
3955  *
3956  * @queue Pointer to the request queue. Unused other than to obtain
3957  *              the driver data structure.
3958  * @bio   Pointer to the BIO.
3959  *
3960  */
3961 static void mtip_make_request(struct request_queue *queue, struct bio *bio)
3962 {
3963 	struct driver_data *dd = queue->queuedata;
3964 	struct scatterlist *sg;
3965 	struct bio_vec *bvec;
3966 	int i, nents = 0;
3967 	int tag = 0, unaligned = 0;
3968 
3969 	if (unlikely(dd->dd_flag & MTIP_DDF_STOP_IO)) {
3970 		if (unlikely(test_bit(MTIP_DDF_REMOVE_PENDING_BIT,
3971 							&dd->dd_flag))) {
3972 			bio_endio(bio, -ENXIO);
3973 			return;
3974 		}
3975 		if (unlikely(test_bit(MTIP_DDF_OVER_TEMP_BIT, &dd->dd_flag))) {
3976 			bio_endio(bio, -ENODATA);
3977 			return;
3978 		}
3979 		if (unlikely(test_bit(MTIP_DDF_WRITE_PROTECT_BIT,
3980 							&dd->dd_flag) &&
3981 				bio_data_dir(bio))) {
3982 			bio_endio(bio, -ENODATA);
3983 			return;
3984 		}
3985 		if (unlikely(test_bit(MTIP_DDF_SEC_LOCK_BIT, &dd->dd_flag))) {
3986 			bio_endio(bio, -ENODATA);
3987 			return;
3988 		}
3989 		if (test_bit(MTIP_DDF_REBUILD_FAILED_BIT, &dd->dd_flag)) {
3990 			bio_endio(bio, -ENXIO);
3991 			return;
3992 		}
3993 	}
3994 
3995 	if (unlikely(bio->bi_rw & REQ_DISCARD)) {
3996 		bio_endio(bio, mtip_send_trim(dd, bio->bi_sector,
3997 						bio_sectors(bio)));
3998 		return;
3999 	}
4000 
4001 	if (unlikely(!bio_has_data(bio))) {
4002 		blk_queue_flush(queue, 0);
4003 		bio_endio(bio, 0);
4004 		return;
4005 	}
4006 
4007 	if (bio_data_dir(bio) == WRITE && bio_sectors(bio) <= 64 &&
4008 							dd->unal_qdepth) {
4009 		if (bio->bi_sector % 8 != 0) /* Unaligned on 4k boundaries */
4010 			unaligned = 1;
4011 		else if (bio_sectors(bio) % 8 != 0) /* Aligned but not 4k/8k */
4012 			unaligned = 1;
4013 	}
4014 
4015 	sg = mtip_hw_get_scatterlist(dd, &tag, unaligned);
4016 	if (likely(sg != NULL)) {
4017 		blk_queue_bounce(queue, &bio);
4018 
4019 		if (unlikely((bio)->bi_vcnt > MTIP_MAX_SG)) {
4020 			dev_warn(&dd->pdev->dev,
4021 				"Maximum number of SGL entries exceeded\n");
4022 			bio_io_error(bio);
4023 			mtip_hw_release_scatterlist(dd, tag, unaligned);
4024 			return;
4025 		}
4026 
4027 		/* Create the scatter list for this bio. */
4028 		bio_for_each_segment(bvec, bio, i) {
4029 			sg_set_page(&sg[nents],
4030 					bvec->bv_page,
4031 					bvec->bv_len,
4032 					bvec->bv_offset);
4033 			nents++;
4034 		}
4035 
4036 		/* Issue the read/write. */
4037 		mtip_hw_submit_io(dd,
4038 				bio->bi_sector,
4039 				bio_sectors(bio),
4040 				nents,
4041 				tag,
4042 				bio_endio,
4043 				bio,
4044 				bio_data_dir(bio),
4045 				unaligned);
4046 	} else
4047 		bio_io_error(bio);
4048 }
4049 
4050 /*
4051  * Block layer initialization function.
4052  *
4053  * This function is called once by the PCI layer for each P320
4054  * device that is connected to the system.
4055  *
4056  * @dd Pointer to the driver data structure.
4057  *
4058  * return value
4059  *	0 on success else an error code.
4060  */
4061 static int mtip_block_initialize(struct driver_data *dd)
4062 {
4063 	int rv = 0, wait_for_rebuild = 0;
4064 	sector_t capacity;
4065 	unsigned int index = 0;
4066 	struct kobject *kobj;
4067 	unsigned char thd_name[16];
4068 
4069 	if (dd->disk)
4070 		goto skip_create_disk; /* hw init done, before rebuild */
4071 
4072 	/* Initialize the protocol layer. */
4073 	wait_for_rebuild = mtip_hw_init(dd);
4074 	if (wait_for_rebuild < 0) {
4075 		dev_err(&dd->pdev->dev,
4076 			"Protocol layer initialization failed\n");
4077 		rv = -EINVAL;
4078 		goto protocol_init_error;
4079 	}
4080 
4081 	dd->disk = alloc_disk_node(MTIP_MAX_MINORS, dd->numa_node);
4082 	if (dd->disk  == NULL) {
4083 		dev_err(&dd->pdev->dev,
4084 			"Unable to allocate gendisk structure\n");
4085 		rv = -EINVAL;
4086 		goto alloc_disk_error;
4087 	}
4088 
4089 	/* Generate the disk name, implemented same as in sd.c */
4090 	do {
4091 		if (!ida_pre_get(&rssd_index_ida, GFP_KERNEL))
4092 			goto ida_get_error;
4093 
4094 		spin_lock(&rssd_index_lock);
4095 		rv = ida_get_new(&rssd_index_ida, &index);
4096 		spin_unlock(&rssd_index_lock);
4097 	} while (rv == -EAGAIN);
4098 
4099 	if (rv)
4100 		goto ida_get_error;
4101 
4102 	rv = rssd_disk_name_format("rssd",
4103 				index,
4104 				dd->disk->disk_name,
4105 				DISK_NAME_LEN);
4106 	if (rv)
4107 		goto disk_index_error;
4108 
4109 	dd->disk->driverfs_dev	= &dd->pdev->dev;
4110 	dd->disk->major		= dd->major;
4111 	dd->disk->first_minor	= dd->instance * MTIP_MAX_MINORS;
4112 	dd->disk->fops		= &mtip_block_ops;
4113 	dd->disk->private_data	= dd;
4114 	dd->index		= index;
4115 
4116 	mtip_hw_debugfs_init(dd);
4117 
4118 	/*
4119 	 * if rebuild pending, start the service thread, and delay the block
4120 	 * queue creation and add_disk()
4121 	 */
4122 	if (wait_for_rebuild == MTIP_FTL_REBUILD_MAGIC)
4123 		goto start_service_thread;
4124 
4125 skip_create_disk:
4126 	/* Allocate the request queue. */
4127 	dd->queue = blk_alloc_queue_node(GFP_KERNEL, dd->numa_node);
4128 	if (dd->queue == NULL) {
4129 		dev_err(&dd->pdev->dev,
4130 			"Unable to allocate request queue\n");
4131 		rv = -ENOMEM;
4132 		goto block_queue_alloc_init_error;
4133 	}
4134 
4135 	/* Attach our request function to the request queue. */
4136 	blk_queue_make_request(dd->queue, mtip_make_request);
4137 
4138 	dd->disk->queue		= dd->queue;
4139 	dd->queue->queuedata	= dd;
4140 
4141 	/* Set device limits. */
4142 	set_bit(QUEUE_FLAG_NONROT, &dd->queue->queue_flags);
4143 	blk_queue_max_segments(dd->queue, MTIP_MAX_SG);
4144 	blk_queue_physical_block_size(dd->queue, 4096);
4145 	blk_queue_max_hw_sectors(dd->queue, 0xffff);
4146 	blk_queue_max_segment_size(dd->queue, 0x400000);
4147 	blk_queue_io_min(dd->queue, 4096);
4148 
4149 	/*
4150 	 * write back cache is not supported in the device. FUA depends on
4151 	 * write back cache support, hence setting flush support to zero.
4152 	 */
4153 	blk_queue_flush(dd->queue, 0);
4154 
4155 	/* Signal trim support */
4156 	if (dd->trim_supp == true) {
4157 		set_bit(QUEUE_FLAG_DISCARD, &dd->queue->queue_flags);
4158 		dd->queue->limits.discard_granularity = 4096;
4159 		blk_queue_max_discard_sectors(dd->queue,
4160 			MTIP_MAX_TRIM_ENTRY_LEN * MTIP_MAX_TRIM_ENTRIES);
4161 		dd->queue->limits.discard_zeroes_data = 0;
4162 	}
4163 
4164 	/* Set the capacity of the device in 512 byte sectors. */
4165 	if (!(mtip_hw_get_capacity(dd, &capacity))) {
4166 		dev_warn(&dd->pdev->dev,
4167 			"Could not read drive capacity\n");
4168 		rv = -EIO;
4169 		goto read_capacity_error;
4170 	}
4171 	set_capacity(dd->disk, capacity);
4172 
4173 	/* Enable the block device and add it to /dev */
4174 	add_disk(dd->disk);
4175 
4176 	dd->bdev = bdget_disk(dd->disk, 0);
4177 	/*
4178 	 * Now that the disk is active, initialize any sysfs attributes
4179 	 * managed by the protocol layer.
4180 	 */
4181 	kobj = kobject_get(&disk_to_dev(dd->disk)->kobj);
4182 	if (kobj) {
4183 		mtip_hw_sysfs_init(dd, kobj);
4184 		kobject_put(kobj);
4185 	}
4186 
4187 	if (dd->mtip_svc_handler) {
4188 		set_bit(MTIP_DDF_INIT_DONE_BIT, &dd->dd_flag);
4189 		return rv; /* service thread created for handling rebuild */
4190 	}
4191 
4192 start_service_thread:
4193 	sprintf(thd_name, "mtip_svc_thd_%02d", index);
4194 	dd->mtip_svc_handler = kthread_create_on_node(mtip_service_thread,
4195 						dd, dd->numa_node, "%s",
4196 						thd_name);
4197 
4198 	if (IS_ERR(dd->mtip_svc_handler)) {
4199 		dev_err(&dd->pdev->dev, "service thread failed to start\n");
4200 		dd->mtip_svc_handler = NULL;
4201 		rv = -EFAULT;
4202 		goto kthread_run_error;
4203 	}
4204 	wake_up_process(dd->mtip_svc_handler);
4205 	if (wait_for_rebuild == MTIP_FTL_REBUILD_MAGIC)
4206 		rv = wait_for_rebuild;
4207 
4208 	return rv;
4209 
4210 kthread_run_error:
4211 	bdput(dd->bdev);
4212 	dd->bdev = NULL;
4213 
4214 	/* Delete our gendisk. This also removes the device from /dev */
4215 	del_gendisk(dd->disk);
4216 
4217 read_capacity_error:
4218 	blk_cleanup_queue(dd->queue);
4219 
4220 block_queue_alloc_init_error:
4221 	mtip_hw_debugfs_exit(dd);
4222 disk_index_error:
4223 	spin_lock(&rssd_index_lock);
4224 	ida_remove(&rssd_index_ida, index);
4225 	spin_unlock(&rssd_index_lock);
4226 
4227 ida_get_error:
4228 	put_disk(dd->disk);
4229 
4230 alloc_disk_error:
4231 	mtip_hw_exit(dd); /* De-initialize the protocol layer. */
4232 
4233 protocol_init_error:
4234 	return rv;
4235 }
4236 
4237 /*
4238  * Block layer deinitialization function.
4239  *
4240  * Called by the PCI layer as each P320 device is removed.
4241  *
4242  * @dd Pointer to the driver data structure.
4243  *
4244  * return value
4245  *	0
4246  */
4247 static int mtip_block_remove(struct driver_data *dd)
4248 {
4249 	struct kobject *kobj;
4250 
4251 	if (!dd->sr) {
4252 		mtip_hw_debugfs_exit(dd);
4253 
4254 		if (dd->mtip_svc_handler) {
4255 			set_bit(MTIP_PF_SVC_THD_STOP_BIT, &dd->port->flags);
4256 			wake_up_interruptible(&dd->port->svc_wait);
4257 			kthread_stop(dd->mtip_svc_handler);
4258 		}
4259 
4260 		/* Clean up the sysfs attributes, if created */
4261 		if (test_bit(MTIP_DDF_INIT_DONE_BIT, &dd->dd_flag)) {
4262 			kobj = kobject_get(&disk_to_dev(dd->disk)->kobj);
4263 			if (kobj) {
4264 				mtip_hw_sysfs_exit(dd, kobj);
4265 				kobject_put(kobj);
4266 			}
4267 		}
4268 		/*
4269 		 * Delete our gendisk structure. This also removes the device
4270 		 * from /dev
4271 		 */
4272 		if (dd->bdev) {
4273 			bdput(dd->bdev);
4274 			dd->bdev = NULL;
4275 		}
4276 		if (dd->disk) {
4277 			if (dd->disk->queue) {
4278 				del_gendisk(dd->disk);
4279 				blk_cleanup_queue(dd->queue);
4280 				dd->queue = NULL;
4281 			} else
4282 				put_disk(dd->disk);
4283 		}
4284 		dd->disk  = NULL;
4285 
4286 		spin_lock(&rssd_index_lock);
4287 		ida_remove(&rssd_index_ida, dd->index);
4288 		spin_unlock(&rssd_index_lock);
4289 	} else {
4290 		dev_info(&dd->pdev->dev, "device %s surprise removal\n",
4291 						dd->disk->disk_name);
4292 	}
4293 
4294 	/* De-initialize the protocol layer. */
4295 	mtip_hw_exit(dd);
4296 
4297 	return 0;
4298 }
4299 
4300 /*
4301  * Function called by the PCI layer when just before the
4302  * machine shuts down.
4303  *
4304  * If a protocol layer shutdown function is present it will be called
4305  * by this function.
4306  *
4307  * @dd Pointer to the driver data structure.
4308  *
4309  * return value
4310  *	0
4311  */
4312 static int mtip_block_shutdown(struct driver_data *dd)
4313 {
4314 	/* Delete our gendisk structure, and cleanup the blk queue. */
4315 	if (dd->disk) {
4316 		dev_info(&dd->pdev->dev,
4317 			"Shutting down %s ...\n", dd->disk->disk_name);
4318 
4319 		if (dd->disk->queue) {
4320 			del_gendisk(dd->disk);
4321 			blk_cleanup_queue(dd->queue);
4322 		} else
4323 			put_disk(dd->disk);
4324 		dd->disk  = NULL;
4325 		dd->queue = NULL;
4326 	}
4327 
4328 	spin_lock(&rssd_index_lock);
4329 	ida_remove(&rssd_index_ida, dd->index);
4330 	spin_unlock(&rssd_index_lock);
4331 
4332 	mtip_hw_shutdown(dd);
4333 	return 0;
4334 }
4335 
4336 static int mtip_block_suspend(struct driver_data *dd)
4337 {
4338 	dev_info(&dd->pdev->dev,
4339 		"Suspending %s ...\n", dd->disk->disk_name);
4340 	mtip_hw_suspend(dd);
4341 	return 0;
4342 }
4343 
4344 static int mtip_block_resume(struct driver_data *dd)
4345 {
4346 	dev_info(&dd->pdev->dev, "Resuming %s ...\n",
4347 		dd->disk->disk_name);
4348 	mtip_hw_resume(dd);
4349 	return 0;
4350 }
4351 
4352 static void drop_cpu(int cpu)
4353 {
4354 	cpu_use[cpu]--;
4355 }
4356 
4357 static int get_least_used_cpu_on_node(int node)
4358 {
4359 	int cpu, least_used_cpu, least_cnt;
4360 	const struct cpumask *node_mask;
4361 
4362 	node_mask = cpumask_of_node(node);
4363 	least_used_cpu = cpumask_first(node_mask);
4364 	least_cnt = cpu_use[least_used_cpu];
4365 	cpu = least_used_cpu;
4366 
4367 	for_each_cpu(cpu, node_mask) {
4368 		if (cpu_use[cpu] < least_cnt) {
4369 			least_used_cpu = cpu;
4370 			least_cnt = cpu_use[cpu];
4371 		}
4372 	}
4373 	cpu_use[least_used_cpu]++;
4374 	return least_used_cpu;
4375 }
4376 
4377 /* Helper for selecting a node in round robin mode */
4378 static inline int mtip_get_next_rr_node(void)
4379 {
4380 	static int next_node = -1;
4381 
4382 	if (next_node == -1) {
4383 		next_node = first_online_node;
4384 		return next_node;
4385 	}
4386 
4387 	next_node = next_online_node(next_node);
4388 	if (next_node == MAX_NUMNODES)
4389 		next_node = first_online_node;
4390 	return next_node;
4391 }
4392 
4393 static DEFINE_HANDLER(0);
4394 static DEFINE_HANDLER(1);
4395 static DEFINE_HANDLER(2);
4396 static DEFINE_HANDLER(3);
4397 static DEFINE_HANDLER(4);
4398 static DEFINE_HANDLER(5);
4399 static DEFINE_HANDLER(6);
4400 static DEFINE_HANDLER(7);
4401 
4402 /*
4403  * Called for each supported PCI device detected.
4404  *
4405  * This function allocates the private data structure, enables the
4406  * PCI device and then calls the block layer initialization function.
4407  *
4408  * return value
4409  *	0 on success else an error code.
4410  */
4411 static int mtip_pci_probe(struct pci_dev *pdev,
4412 			const struct pci_device_id *ent)
4413 {
4414 	int rv = 0;
4415 	struct driver_data *dd = NULL;
4416 	char cpu_list[256];
4417 	const struct cpumask *node_mask;
4418 	int cpu, i = 0, j = 0;
4419 	int my_node = NUMA_NO_NODE;
4420 	unsigned long flags;
4421 
4422 	/* Allocate memory for this devices private data. */
4423 	my_node = pcibus_to_node(pdev->bus);
4424 	if (my_node != NUMA_NO_NODE) {
4425 		if (!node_online(my_node))
4426 			my_node = mtip_get_next_rr_node();
4427 	} else {
4428 		dev_info(&pdev->dev, "Kernel not reporting proximity, choosing a node\n");
4429 		my_node = mtip_get_next_rr_node();
4430 	}
4431 	dev_info(&pdev->dev, "NUMA node %d (closest: %d,%d, probe on %d:%d)\n",
4432 		my_node, pcibus_to_node(pdev->bus), dev_to_node(&pdev->dev),
4433 		cpu_to_node(smp_processor_id()), smp_processor_id());
4434 
4435 	dd = kzalloc_node(sizeof(struct driver_data), GFP_KERNEL, my_node);
4436 	if (dd == NULL) {
4437 		dev_err(&pdev->dev,
4438 			"Unable to allocate memory for driver data\n");
4439 		return -ENOMEM;
4440 	}
4441 
4442 	/* Attach the private data to this PCI device.  */
4443 	pci_set_drvdata(pdev, dd);
4444 
4445 	rv = pcim_enable_device(pdev);
4446 	if (rv < 0) {
4447 		dev_err(&pdev->dev, "Unable to enable device\n");
4448 		goto iomap_err;
4449 	}
4450 
4451 	/* Map BAR5 to memory. */
4452 	rv = pcim_iomap_regions(pdev, 1 << MTIP_ABAR, MTIP_DRV_NAME);
4453 	if (rv < 0) {
4454 		dev_err(&pdev->dev, "Unable to map regions\n");
4455 		goto iomap_err;
4456 	}
4457 
4458 	if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) {
4459 		rv = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
4460 
4461 		if (rv) {
4462 			rv = pci_set_consistent_dma_mask(pdev,
4463 						DMA_BIT_MASK(32));
4464 			if (rv) {
4465 				dev_warn(&pdev->dev,
4466 					"64-bit DMA enable failed\n");
4467 				goto setmask_err;
4468 			}
4469 		}
4470 	}
4471 
4472 	/* Copy the info we may need later into the private data structure. */
4473 	dd->major	= mtip_major;
4474 	dd->instance	= instance;
4475 	dd->pdev	= pdev;
4476 	dd->numa_node	= my_node;
4477 
4478 	INIT_LIST_HEAD(&dd->online_list);
4479 	INIT_LIST_HEAD(&dd->remove_list);
4480 
4481 	memset(dd->workq_name, 0, 32);
4482 	snprintf(dd->workq_name, 31, "mtipq%d", dd->instance);
4483 
4484 	dd->isr_workq = create_workqueue(dd->workq_name);
4485 	if (!dd->isr_workq) {
4486 		dev_warn(&pdev->dev, "Can't create wq %d\n", dd->instance);
4487 		rv = -ENOMEM;
4488 		goto block_initialize_err;
4489 	}
4490 
4491 	memset(cpu_list, 0, sizeof(cpu_list));
4492 
4493 	node_mask = cpumask_of_node(dd->numa_node);
4494 	if (!cpumask_empty(node_mask)) {
4495 		for_each_cpu(cpu, node_mask)
4496 		{
4497 			snprintf(&cpu_list[j], 256 - j, "%d ", cpu);
4498 			j = strlen(cpu_list);
4499 		}
4500 
4501 		dev_info(&pdev->dev, "Node %d on package %d has %d cpu(s): %s\n",
4502 			dd->numa_node,
4503 			topology_physical_package_id(cpumask_first(node_mask)),
4504 			nr_cpus_node(dd->numa_node),
4505 			cpu_list);
4506 	} else
4507 		dev_dbg(&pdev->dev, "mtip32xx: node_mask empty\n");
4508 
4509 	dd->isr_binding = get_least_used_cpu_on_node(dd->numa_node);
4510 	dev_info(&pdev->dev, "Initial IRQ binding node:cpu %d:%d\n",
4511 		cpu_to_node(dd->isr_binding), dd->isr_binding);
4512 
4513 	/* first worker context always runs in ISR */
4514 	dd->work[0].cpu_binding = dd->isr_binding;
4515 	dd->work[1].cpu_binding = get_least_used_cpu_on_node(dd->numa_node);
4516 	dd->work[2].cpu_binding = get_least_used_cpu_on_node(dd->numa_node);
4517 	dd->work[3].cpu_binding = dd->work[0].cpu_binding;
4518 	dd->work[4].cpu_binding = dd->work[1].cpu_binding;
4519 	dd->work[5].cpu_binding = dd->work[2].cpu_binding;
4520 	dd->work[6].cpu_binding = dd->work[2].cpu_binding;
4521 	dd->work[7].cpu_binding = dd->work[1].cpu_binding;
4522 
4523 	/* Log the bindings */
4524 	for_each_present_cpu(cpu) {
4525 		memset(cpu_list, 0, sizeof(cpu_list));
4526 		for (i = 0, j = 0; i < MTIP_MAX_SLOT_GROUPS; i++) {
4527 			if (dd->work[i].cpu_binding == cpu) {
4528 				snprintf(&cpu_list[j], 256 - j, "%d ", i);
4529 				j = strlen(cpu_list);
4530 			}
4531 		}
4532 		if (j)
4533 			dev_info(&pdev->dev, "CPU %d: WQs %s\n", cpu, cpu_list);
4534 	}
4535 
4536 	INIT_WORK(&dd->work[0].work, mtip_workq_sdbf0);
4537 	INIT_WORK(&dd->work[1].work, mtip_workq_sdbf1);
4538 	INIT_WORK(&dd->work[2].work, mtip_workq_sdbf2);
4539 	INIT_WORK(&dd->work[3].work, mtip_workq_sdbf3);
4540 	INIT_WORK(&dd->work[4].work, mtip_workq_sdbf4);
4541 	INIT_WORK(&dd->work[5].work, mtip_workq_sdbf5);
4542 	INIT_WORK(&dd->work[6].work, mtip_workq_sdbf6);
4543 	INIT_WORK(&dd->work[7].work, mtip_workq_sdbf7);
4544 
4545 	pci_set_master(pdev);
4546 	rv = pci_enable_msi(pdev);
4547 	if (rv) {
4548 		dev_warn(&pdev->dev,
4549 			"Unable to enable MSI interrupt.\n");
4550 		goto block_initialize_err;
4551 	}
4552 
4553 	/* Initialize the block layer. */
4554 	rv = mtip_block_initialize(dd);
4555 	if (rv < 0) {
4556 		dev_err(&pdev->dev,
4557 			"Unable to initialize block layer\n");
4558 		goto block_initialize_err;
4559 	}
4560 
4561 	/*
4562 	 * Increment the instance count so that each device has a unique
4563 	 * instance number.
4564 	 */
4565 	instance++;
4566 	if (rv != MTIP_FTL_REBUILD_MAGIC)
4567 		set_bit(MTIP_DDF_INIT_DONE_BIT, &dd->dd_flag);
4568 	else
4569 		rv = 0; /* device in rebuild state, return 0 from probe */
4570 
4571 	/* Add to online list even if in ftl rebuild */
4572 	spin_lock_irqsave(&dev_lock, flags);
4573 	list_add(&dd->online_list, &online_list);
4574 	spin_unlock_irqrestore(&dev_lock, flags);
4575 
4576 	goto done;
4577 
4578 block_initialize_err:
4579 	pci_disable_msi(pdev);
4580 	if (dd->isr_workq) {
4581 		flush_workqueue(dd->isr_workq);
4582 		destroy_workqueue(dd->isr_workq);
4583 		drop_cpu(dd->work[0].cpu_binding);
4584 		drop_cpu(dd->work[1].cpu_binding);
4585 		drop_cpu(dd->work[2].cpu_binding);
4586 	}
4587 setmask_err:
4588 	pcim_iounmap_regions(pdev, 1 << MTIP_ABAR);
4589 
4590 iomap_err:
4591 	kfree(dd);
4592 	pci_set_drvdata(pdev, NULL);
4593 	return rv;
4594 done:
4595 	return rv;
4596 }
4597 
4598 /*
4599  * Called for each probed device when the device is removed or the
4600  * driver is unloaded.
4601  *
4602  * return value
4603  *	None
4604  */
4605 static void mtip_pci_remove(struct pci_dev *pdev)
4606 {
4607 	struct driver_data *dd = pci_get_drvdata(pdev);
4608 	unsigned long flags, to;
4609 
4610 	set_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag);
4611 
4612 	spin_lock_irqsave(&dev_lock, flags);
4613 	list_del_init(&dd->online_list);
4614 	list_add(&dd->remove_list, &removing_list);
4615 	spin_unlock_irqrestore(&dev_lock, flags);
4616 
4617 	mtip_check_surprise_removal(pdev);
4618 	synchronize_irq(dd->pdev->irq);
4619 
4620 	/* Spin until workers are done */
4621 	to = jiffies + msecs_to_jiffies(4000);
4622 	do {
4623 		msleep(20);
4624 	} while (atomic_read(&dd->irq_workers_active) != 0 &&
4625 		time_before(jiffies, to));
4626 
4627 	if (atomic_read(&dd->irq_workers_active) != 0) {
4628 		dev_warn(&dd->pdev->dev,
4629 			"Completion workers still active!\n");
4630 	}
4631 	/* Cleanup the outstanding commands */
4632 	mtip_command_cleanup(dd);
4633 
4634 	/* Clean up the block layer. */
4635 	mtip_block_remove(dd);
4636 
4637 	if (dd->isr_workq) {
4638 		flush_workqueue(dd->isr_workq);
4639 		destroy_workqueue(dd->isr_workq);
4640 		drop_cpu(dd->work[0].cpu_binding);
4641 		drop_cpu(dd->work[1].cpu_binding);
4642 		drop_cpu(dd->work[2].cpu_binding);
4643 	}
4644 
4645 	pci_disable_msi(pdev);
4646 
4647 	spin_lock_irqsave(&dev_lock, flags);
4648 	list_del_init(&dd->remove_list);
4649 	spin_unlock_irqrestore(&dev_lock, flags);
4650 
4651 	if (!dd->sr)
4652 		kfree(dd);
4653 	else
4654 		set_bit(MTIP_DDF_REMOVE_DONE_BIT, &dd->dd_flag);
4655 
4656 	pcim_iounmap_regions(pdev, 1 << MTIP_ABAR);
4657 	pci_set_drvdata(pdev, NULL);
4658 	pci_dev_put(pdev);
4659 
4660 }
4661 
4662 /*
4663  * Called for each probed device when the device is suspended.
4664  *
4665  * return value
4666  *	0  Success
4667  *	<0 Error
4668  */
4669 static int mtip_pci_suspend(struct pci_dev *pdev, pm_message_t mesg)
4670 {
4671 	int rv = 0;
4672 	struct driver_data *dd = pci_get_drvdata(pdev);
4673 
4674 	if (!dd) {
4675 		dev_err(&pdev->dev,
4676 			"Driver private datastructure is NULL\n");
4677 		return -EFAULT;
4678 	}
4679 
4680 	set_bit(MTIP_DDF_RESUME_BIT, &dd->dd_flag);
4681 
4682 	/* Disable ports & interrupts then send standby immediate */
4683 	rv = mtip_block_suspend(dd);
4684 	if (rv < 0) {
4685 		dev_err(&pdev->dev,
4686 			"Failed to suspend controller\n");
4687 		return rv;
4688 	}
4689 
4690 	/*
4691 	 * Save the pci config space to pdev structure &
4692 	 * disable the device
4693 	 */
4694 	pci_save_state(pdev);
4695 	pci_disable_device(pdev);
4696 
4697 	/* Move to Low power state*/
4698 	pci_set_power_state(pdev, PCI_D3hot);
4699 
4700 	return rv;
4701 }
4702 
4703 /*
4704  * Called for each probed device when the device is resumed.
4705  *
4706  * return value
4707  *      0  Success
4708  *      <0 Error
4709  */
4710 static int mtip_pci_resume(struct pci_dev *pdev)
4711 {
4712 	int rv = 0;
4713 	struct driver_data *dd;
4714 
4715 	dd = pci_get_drvdata(pdev);
4716 	if (!dd) {
4717 		dev_err(&pdev->dev,
4718 			"Driver private datastructure is NULL\n");
4719 		return -EFAULT;
4720 	}
4721 
4722 	/* Move the device to active State */
4723 	pci_set_power_state(pdev, PCI_D0);
4724 
4725 	/* Restore PCI configuration space */
4726 	pci_restore_state(pdev);
4727 
4728 	/* Enable the PCI device*/
4729 	rv = pcim_enable_device(pdev);
4730 	if (rv < 0) {
4731 		dev_err(&pdev->dev,
4732 			"Failed to enable card during resume\n");
4733 		goto err;
4734 	}
4735 	pci_set_master(pdev);
4736 
4737 	/*
4738 	 * Calls hbaReset, initPort, & startPort function
4739 	 * then enables interrupts
4740 	 */
4741 	rv = mtip_block_resume(dd);
4742 	if (rv < 0)
4743 		dev_err(&pdev->dev, "Unable to resume\n");
4744 
4745 err:
4746 	clear_bit(MTIP_DDF_RESUME_BIT, &dd->dd_flag);
4747 
4748 	return rv;
4749 }
4750 
4751 /*
4752  * Shutdown routine
4753  *
4754  * return value
4755  *      None
4756  */
4757 static void mtip_pci_shutdown(struct pci_dev *pdev)
4758 {
4759 	struct driver_data *dd = pci_get_drvdata(pdev);
4760 	if (dd)
4761 		mtip_block_shutdown(dd);
4762 }
4763 
4764 /* Table of device ids supported by this driver. */
4765 static DEFINE_PCI_DEVICE_TABLE(mtip_pci_tbl) = {
4766 	{ PCI_DEVICE(PCI_VENDOR_ID_MICRON, P320H_DEVICE_ID) },
4767 	{ PCI_DEVICE(PCI_VENDOR_ID_MICRON, P320M_DEVICE_ID) },
4768 	{ PCI_DEVICE(PCI_VENDOR_ID_MICRON, P320S_DEVICE_ID) },
4769 	{ PCI_DEVICE(PCI_VENDOR_ID_MICRON, P325M_DEVICE_ID) },
4770 	{ PCI_DEVICE(PCI_VENDOR_ID_MICRON, P420H_DEVICE_ID) },
4771 	{ PCI_DEVICE(PCI_VENDOR_ID_MICRON, P420M_DEVICE_ID) },
4772 	{ PCI_DEVICE(PCI_VENDOR_ID_MICRON, P425M_DEVICE_ID) },
4773 	{ 0 }
4774 };
4775 
4776 /* Structure that describes the PCI driver functions. */
4777 static struct pci_driver mtip_pci_driver = {
4778 	.name			= MTIP_DRV_NAME,
4779 	.id_table		= mtip_pci_tbl,
4780 	.probe			= mtip_pci_probe,
4781 	.remove			= mtip_pci_remove,
4782 	.suspend		= mtip_pci_suspend,
4783 	.resume			= mtip_pci_resume,
4784 	.shutdown		= mtip_pci_shutdown,
4785 };
4786 
4787 MODULE_DEVICE_TABLE(pci, mtip_pci_tbl);
4788 
4789 /*
4790  * Module initialization function.
4791  *
4792  * Called once when the module is loaded. This function allocates a major
4793  * block device number to the Cyclone devices and registers the PCI layer
4794  * of the driver.
4795  *
4796  * Return value
4797  *      0 on success else error code.
4798  */
4799 static int __init mtip_init(void)
4800 {
4801 	int error;
4802 
4803 	pr_info(MTIP_DRV_NAME " Version " MTIP_DRV_VERSION "\n");
4804 
4805 	spin_lock_init(&dev_lock);
4806 
4807 	INIT_LIST_HEAD(&online_list);
4808 	INIT_LIST_HEAD(&removing_list);
4809 
4810 	/* Allocate a major block device number to use with this driver. */
4811 	error = register_blkdev(0, MTIP_DRV_NAME);
4812 	if (error <= 0) {
4813 		pr_err("Unable to register block device (%d)\n",
4814 		error);
4815 		return -EBUSY;
4816 	}
4817 	mtip_major = error;
4818 
4819 	dfs_parent = debugfs_create_dir("rssd", NULL);
4820 	if (IS_ERR_OR_NULL(dfs_parent)) {
4821 		pr_warn("Error creating debugfs parent\n");
4822 		dfs_parent = NULL;
4823 	}
4824 	if (dfs_parent) {
4825 		dfs_device_status = debugfs_create_file("device_status",
4826 					S_IRUGO, dfs_parent, NULL,
4827 					&mtip_device_status_fops);
4828 		if (IS_ERR_OR_NULL(dfs_device_status)) {
4829 			pr_err("Error creating device_status node\n");
4830 			dfs_device_status = NULL;
4831 		}
4832 	}
4833 
4834 	/* Register our PCI operations. */
4835 	error = pci_register_driver(&mtip_pci_driver);
4836 	if (error) {
4837 		debugfs_remove(dfs_parent);
4838 		unregister_blkdev(mtip_major, MTIP_DRV_NAME);
4839 	}
4840 
4841 	return error;
4842 }
4843 
4844 /*
4845  * Module de-initialization function.
4846  *
4847  * Called once when the module is unloaded. This function deallocates
4848  * the major block device number allocated by mtip_init() and
4849  * unregisters the PCI layer of the driver.
4850  *
4851  * Return value
4852  *      none
4853  */
4854 static void __exit mtip_exit(void)
4855 {
4856 	debugfs_remove_recursive(dfs_parent);
4857 
4858 	/* Release the allocated major block device number. */
4859 	unregister_blkdev(mtip_major, MTIP_DRV_NAME);
4860 
4861 	/* Unregister the PCI driver. */
4862 	pci_unregister_driver(&mtip_pci_driver);
4863 }
4864 
4865 MODULE_AUTHOR("Micron Technology, Inc");
4866 MODULE_DESCRIPTION("Micron RealSSD PCIe Block Driver");
4867 MODULE_LICENSE("GPL");
4868 MODULE_VERSION(MTIP_DRV_VERSION);
4869 
4870 module_init(mtip_init);
4871 module_exit(mtip_exit);
4872