1 /*
2  * Driver for the Micron P320 SSD
3  *   Copyright (C) 2011 Micron Technology, Inc.
4  *
5  * Portions of this code were derived from works subjected to the
6  * following copyright:
7  *    Copyright (C) 2009 Integrated Device Technology, Inc.
8  *
9  * This program is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU General Public License as published by
11  * the Free Software Foundation; either version 2 of the License, or
12  * (at your option) any later version.
13  *
14  * This program is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  * GNU General Public License for more details.
18  *
19  */
20 
21 #include <linux/pci.h>
22 #include <linux/interrupt.h>
23 #include <linux/ata.h>
24 #include <linux/delay.h>
25 #include <linux/hdreg.h>
26 #include <linux/uaccess.h>
27 #include <linux/random.h>
28 #include <linux/smp.h>
29 #include <linux/compat.h>
30 #include <linux/fs.h>
31 #include <linux/module.h>
32 #include <linux/genhd.h>
33 #include <linux/blkdev.h>
34 #include <linux/bio.h>
35 #include <linux/dma-mapping.h>
36 #include <linux/idr.h>
37 #include <linux/kthread.h>
38 #include <../drivers/ata/ahci.h>
39 #include <linux/export.h>
40 #include <linux/debugfs.h>
41 #include "mtip32xx.h"
42 
43 #define HW_CMD_SLOT_SZ		(MTIP_MAX_COMMAND_SLOTS * 32)
44 
45 /* DMA region containing RX Fis, Identify, RLE10, and SMART buffers */
46 #define AHCI_RX_FIS_SZ          0x100
47 #define AHCI_RX_FIS_OFFSET      0x0
48 #define AHCI_IDFY_SZ            ATA_SECT_SIZE
49 #define AHCI_IDFY_OFFSET        0x400
50 #define AHCI_SECTBUF_SZ         ATA_SECT_SIZE
51 #define AHCI_SECTBUF_OFFSET     0x800
52 #define AHCI_SMARTBUF_SZ        ATA_SECT_SIZE
53 #define AHCI_SMARTBUF_OFFSET    0xC00
54 /* 0x100 + 0x200 + 0x200 + 0x200 is smaller than 4k but we pad it out */
55 #define BLOCK_DMA_ALLOC_SZ      4096
56 
57 /* DMA region containing command table (should be 8192 bytes) */
58 #define AHCI_CMD_SLOT_SZ        sizeof(struct mtip_cmd_hdr)
59 #define AHCI_CMD_TBL_SZ         (MTIP_MAX_COMMAND_SLOTS * AHCI_CMD_SLOT_SZ)
60 #define AHCI_CMD_TBL_OFFSET     0x0
61 
62 /* DMA region per command (contains header and SGL) */
63 #define AHCI_CMD_TBL_HDR_SZ     0x80
64 #define AHCI_CMD_TBL_HDR_OFFSET 0x0
65 #define AHCI_CMD_TBL_SGL_SZ     (MTIP_MAX_SG * sizeof(struct mtip_cmd_sg))
66 #define AHCI_CMD_TBL_SGL_OFFSET AHCI_CMD_TBL_HDR_SZ
67 #define CMD_DMA_ALLOC_SZ        (AHCI_CMD_TBL_SGL_SZ + AHCI_CMD_TBL_HDR_SZ)
68 
69 
70 #define HOST_CAP_NZDMA		(1 << 19)
71 #define HOST_HSORG		0xFC
72 #define HSORG_DISABLE_SLOTGRP_INTR (1<<24)
73 #define HSORG_DISABLE_SLOTGRP_PXIS (1<<16)
74 #define HSORG_HWREV		0xFF00
75 #define HSORG_STYLE		0x8
76 #define HSORG_SLOTGROUPS	0x7
77 
78 #define PORT_COMMAND_ISSUE	0x38
79 #define PORT_SDBV		0x7C
80 
81 #define PORT_OFFSET		0x100
82 #define PORT_MEM_SIZE		0x80
83 
84 #define PORT_IRQ_ERR \
85 	(PORT_IRQ_HBUS_ERR | PORT_IRQ_IF_ERR | PORT_IRQ_CONNECT | \
86 	 PORT_IRQ_PHYRDY | PORT_IRQ_UNK_FIS | PORT_IRQ_BAD_PMP | \
87 	 PORT_IRQ_TF_ERR | PORT_IRQ_HBUS_DATA_ERR | PORT_IRQ_IF_NONFATAL | \
88 	 PORT_IRQ_OVERFLOW)
89 #define PORT_IRQ_LEGACY \
90 	(PORT_IRQ_PIOS_FIS | PORT_IRQ_D2H_REG_FIS)
91 #define PORT_IRQ_HANDLED \
92 	(PORT_IRQ_SDB_FIS | PORT_IRQ_LEGACY | \
93 	 PORT_IRQ_TF_ERR | PORT_IRQ_IF_ERR | \
94 	 PORT_IRQ_CONNECT | PORT_IRQ_PHYRDY)
95 #define DEF_PORT_IRQ \
96 	(PORT_IRQ_ERR | PORT_IRQ_LEGACY | PORT_IRQ_SDB_FIS)
97 
98 /* product numbers */
99 #define MTIP_PRODUCT_UNKNOWN	0x00
100 #define MTIP_PRODUCT_ASICFPGA	0x11
101 
102 /* Device instance number, incremented each time a device is probed. */
103 static int instance;
104 
105 struct list_head online_list;
106 struct list_head removing_list;
107 spinlock_t dev_lock;
108 
109 /*
110  * Global variable used to hold the major block device number
111  * allocated in mtip_init().
112  */
113 static int mtip_major;
114 static struct dentry *dfs_parent;
115 static struct dentry *dfs_device_status;
116 
117 static u32 cpu_use[NR_CPUS];
118 
119 static DEFINE_SPINLOCK(rssd_index_lock);
120 static DEFINE_IDA(rssd_index_ida);
121 
122 static int mtip_block_initialize(struct driver_data *dd);
123 
124 #ifdef CONFIG_COMPAT
125 struct mtip_compat_ide_task_request_s {
126 	__u8		io_ports[8];
127 	__u8		hob_ports[8];
128 	ide_reg_valid_t	out_flags;
129 	ide_reg_valid_t	in_flags;
130 	int		data_phase;
131 	int		req_cmd;
132 	compat_ulong_t	out_size;
133 	compat_ulong_t	in_size;
134 };
135 #endif
136 
137 /*
138  * This function check_for_surprise_removal is called
139  * while card is removed from the system and it will
140  * read the vendor id from the configration space
141  *
142  * @pdev Pointer to the pci_dev structure.
143  *
144  * return value
145  *	 true if device removed, else false
146  */
147 static bool mtip_check_surprise_removal(struct pci_dev *pdev)
148 {
149 	u16 vendor_id = 0;
150 	struct driver_data *dd = pci_get_drvdata(pdev);
151 
152 	if (dd->sr)
153 		return true;
154 
155        /* Read the vendorID from the configuration space */
156 	pci_read_config_word(pdev, 0x00, &vendor_id);
157 	if (vendor_id == 0xFFFF) {
158 		dd->sr = true;
159 		if (dd->queue)
160 			set_bit(QUEUE_FLAG_DEAD, &dd->queue->queue_flags);
161 		else
162 			dev_warn(&dd->pdev->dev,
163 				"%s: dd->queue is NULL\n", __func__);
164 		if (dd->port) {
165 			set_bit(MTIP_PF_SR_CLEANUP_BIT, &dd->port->flags);
166 			wake_up_interruptible(&dd->port->svc_wait);
167 		} else
168 			dev_warn(&dd->pdev->dev,
169 				"%s: dd->port is NULL\n", __func__);
170 		return true; /* device removed */
171 	}
172 
173 	return false; /* device present */
174 }
175 
176 /*
177  * Obtain an empty command slot.
178  *
179  * This function needs to be reentrant since it could be called
180  * at the same time on multiple CPUs. The allocation of the
181  * command slot must be atomic.
182  *
183  * @port Pointer to the port data structure.
184  *
185  * return value
186  *	>= 0	Index of command slot obtained.
187  *	-1	No command slots available.
188  */
189 static int get_slot(struct mtip_port *port)
190 {
191 	int slot, i;
192 	unsigned int num_command_slots = port->dd->slot_groups * 32;
193 
194 	/*
195 	 * Try 10 times, because there is a small race here.
196 	 *  that's ok, because it's still cheaper than a lock.
197 	 *
198 	 * Race: Since this section is not protected by lock, same bit
199 	 * could be chosen by different process contexts running in
200 	 * different processor. So instead of costly lock, we are going
201 	 * with loop.
202 	 */
203 	for (i = 0; i < 10; i++) {
204 		slot = find_next_zero_bit(port->allocated,
205 					 num_command_slots, 1);
206 		if ((slot < num_command_slots) &&
207 		    (!test_and_set_bit(slot, port->allocated)))
208 			return slot;
209 	}
210 	dev_warn(&port->dd->pdev->dev, "Failed to get a tag.\n");
211 
212 	mtip_check_surprise_removal(port->dd->pdev);
213 	return -1;
214 }
215 
216 /*
217  * Release a command slot.
218  *
219  * @port Pointer to the port data structure.
220  * @tag  Tag of command to release
221  *
222  * return value
223  *	None
224  */
225 static inline void release_slot(struct mtip_port *port, int tag)
226 {
227 	smp_mb__before_clear_bit();
228 	clear_bit(tag, port->allocated);
229 	smp_mb__after_clear_bit();
230 }
231 
232 /*
233  * IO completion function.
234  *
235  * This completion function is called by the driver ISR when a
236  * command that was issued by the kernel completes. It first calls the
237  * asynchronous completion function which normally calls back into the block
238  * layer passing the asynchronous callback data, then unmaps the
239  * scatter list associated with the completed command, and finally
240  * clears the allocated bit associated with the completed command.
241  *
242  * @port   Pointer to the port data structure.
243  * @tag    Tag of the command.
244  * @data   Pointer to driver_data.
245  * @status Completion status.
246  *
247  * return value
248  *	None
249  */
250 static void mtip_async_complete(struct mtip_port *port,
251 				int tag,
252 				void *data,
253 				int status)
254 {
255 	struct mtip_cmd *cmd;
256 	struct driver_data *dd = data;
257 	int unaligned, cb_status = status ? -EIO : 0;
258 	void (*func)(void *, int);
259 
260 	if (unlikely(!dd) || unlikely(!port))
261 		return;
262 
263 	cmd = &port->commands[tag];
264 
265 	if (unlikely(status == PORT_IRQ_TF_ERR)) {
266 		dev_warn(&port->dd->pdev->dev,
267 			"Command tag %d failed due to TFE\n", tag);
268 	}
269 
270 	/* Clear the active flag */
271 	atomic_set(&port->commands[tag].active, 0);
272 
273 	/* Upper layer callback */
274 	func = cmd->async_callback;
275 	if (likely(func && cmpxchg(&cmd->async_callback, func, 0) == func)) {
276 
277 		/* Unmap the DMA scatter list entries */
278 		dma_unmap_sg(&dd->pdev->dev,
279 			cmd->sg,
280 			cmd->scatter_ents,
281 			cmd->direction);
282 
283 		func(cmd->async_data, cb_status);
284 		unaligned = cmd->unaligned;
285 
286 		/* Clear the allocated bit for the command */
287 		release_slot(port, tag);
288 
289 		if (unlikely(unaligned))
290 			up(&port->cmd_slot_unal);
291 		else
292 			up(&port->cmd_slot);
293 	}
294 }
295 
296 /*
297  * This function is called for clean the pending command in the
298  * command slot during the surprise removal of device and return
299  * error to the upper layer.
300  *
301  * @dd Pointer to the DRIVER_DATA structure.
302  *
303  * return value
304  *	None
305  */
306 static void mtip_command_cleanup(struct driver_data *dd)
307 {
308 	int tag = 0;
309 	struct mtip_cmd *cmd;
310 	struct mtip_port *port = dd->port;
311 	unsigned int num_cmd_slots = dd->slot_groups * 32;
312 
313 	if (!test_bit(MTIP_DDF_INIT_DONE_BIT, &dd->dd_flag))
314 		return;
315 
316 	if (!port)
317 		return;
318 
319 	cmd = &port->commands[MTIP_TAG_INTERNAL];
320 	if (atomic_read(&cmd->active))
321 		if (readl(port->cmd_issue[MTIP_TAG_INTERNAL]) &
322 					(1 << MTIP_TAG_INTERNAL))
323 			if (cmd->comp_func)
324 				cmd->comp_func(port, MTIP_TAG_INTERNAL,
325 					 cmd->comp_data, -ENODEV);
326 
327 	while (1) {
328 		tag = find_next_bit(port->allocated, num_cmd_slots, tag);
329 		if (tag >= num_cmd_slots)
330 			break;
331 
332 		cmd = &port->commands[tag];
333 		if (atomic_read(&cmd->active))
334 			mtip_async_complete(port, tag, dd, -ENODEV);
335 	}
336 
337 	set_bit(MTIP_DDF_CLEANUP_BIT, &dd->dd_flag);
338 }
339 
340 /*
341  * Reset the HBA (without sleeping)
342  *
343  * @dd Pointer to the driver data structure.
344  *
345  * return value
346  *	0	The reset was successful.
347  *	-1	The HBA Reset bit did not clear.
348  */
349 static int mtip_hba_reset(struct driver_data *dd)
350 {
351 	unsigned long timeout;
352 
353 	/* Set the reset bit */
354 	writel(HOST_RESET, dd->mmio + HOST_CTL);
355 
356 	/* Flush */
357 	readl(dd->mmio + HOST_CTL);
358 
359 	/* Spin for up to 2 seconds, waiting for reset acknowledgement */
360 	timeout = jiffies + msecs_to_jiffies(2000);
361 	do {
362 		mdelay(10);
363 		if (test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag))
364 			return -1;
365 
366 	} while ((readl(dd->mmio + HOST_CTL) & HOST_RESET)
367 		 && time_before(jiffies, timeout));
368 
369 	if (readl(dd->mmio + HOST_CTL) & HOST_RESET)
370 		return -1;
371 
372 	return 0;
373 }
374 
375 /*
376  * Issue a command to the hardware.
377  *
378  * Set the appropriate bit in the s_active and Command Issue hardware
379  * registers, causing hardware command processing to begin.
380  *
381  * @port Pointer to the port structure.
382  * @tag  The tag of the command to be issued.
383  *
384  * return value
385  *      None
386  */
387 static inline void mtip_issue_ncq_command(struct mtip_port *port, int tag)
388 {
389 	int group = tag >> 5;
390 
391 	atomic_set(&port->commands[tag].active, 1);
392 
393 	/* guard SACT and CI registers */
394 	spin_lock(&port->cmd_issue_lock[group]);
395 	writel((1 << MTIP_TAG_BIT(tag)),
396 			port->s_active[MTIP_TAG_INDEX(tag)]);
397 	writel((1 << MTIP_TAG_BIT(tag)),
398 			port->cmd_issue[MTIP_TAG_INDEX(tag)]);
399 	spin_unlock(&port->cmd_issue_lock[group]);
400 
401 	/* Set the command's timeout value.*/
402 	port->commands[tag].comp_time = jiffies + msecs_to_jiffies(
403 					MTIP_NCQ_COMMAND_TIMEOUT_MS);
404 }
405 
406 /*
407  * Enable/disable the reception of FIS
408  *
409  * @port   Pointer to the port data structure
410  * @enable 1 to enable, 0 to disable
411  *
412  * return value
413  *	Previous state: 1 enabled, 0 disabled
414  */
415 static int mtip_enable_fis(struct mtip_port *port, int enable)
416 {
417 	u32 tmp;
418 
419 	/* enable FIS reception */
420 	tmp = readl(port->mmio + PORT_CMD);
421 	if (enable)
422 		writel(tmp | PORT_CMD_FIS_RX, port->mmio + PORT_CMD);
423 	else
424 		writel(tmp & ~PORT_CMD_FIS_RX, port->mmio + PORT_CMD);
425 
426 	/* Flush */
427 	readl(port->mmio + PORT_CMD);
428 
429 	return (((tmp & PORT_CMD_FIS_RX) == PORT_CMD_FIS_RX));
430 }
431 
432 /*
433  * Enable/disable the DMA engine
434  *
435  * @port   Pointer to the port data structure
436  * @enable 1 to enable, 0 to disable
437  *
438  * return value
439  *	Previous state: 1 enabled, 0 disabled.
440  */
441 static int mtip_enable_engine(struct mtip_port *port, int enable)
442 {
443 	u32 tmp;
444 
445 	/* enable FIS reception */
446 	tmp = readl(port->mmio + PORT_CMD);
447 	if (enable)
448 		writel(tmp | PORT_CMD_START, port->mmio + PORT_CMD);
449 	else
450 		writel(tmp & ~PORT_CMD_START, port->mmio + PORT_CMD);
451 
452 	readl(port->mmio + PORT_CMD);
453 	return (((tmp & PORT_CMD_START) == PORT_CMD_START));
454 }
455 
456 /*
457  * Enables the port DMA engine and FIS reception.
458  *
459  * return value
460  *	None
461  */
462 static inline void mtip_start_port(struct mtip_port *port)
463 {
464 	/* Enable FIS reception */
465 	mtip_enable_fis(port, 1);
466 
467 	/* Enable the DMA engine */
468 	mtip_enable_engine(port, 1);
469 }
470 
471 /*
472  * Deinitialize a port by disabling port interrupts, the DMA engine,
473  * and FIS reception.
474  *
475  * @port Pointer to the port structure
476  *
477  * return value
478  *	None
479  */
480 static inline void mtip_deinit_port(struct mtip_port *port)
481 {
482 	/* Disable interrupts on this port */
483 	writel(0, port->mmio + PORT_IRQ_MASK);
484 
485 	/* Disable the DMA engine */
486 	mtip_enable_engine(port, 0);
487 
488 	/* Disable FIS reception */
489 	mtip_enable_fis(port, 0);
490 }
491 
492 /*
493  * Initialize a port.
494  *
495  * This function deinitializes the port by calling mtip_deinit_port() and
496  * then initializes it by setting the command header and RX FIS addresses,
497  * clearing the SError register and any pending port interrupts before
498  * re-enabling the default set of port interrupts.
499  *
500  * @port Pointer to the port structure.
501  *
502  * return value
503  *	None
504  */
505 static void mtip_init_port(struct mtip_port *port)
506 {
507 	int i;
508 	mtip_deinit_port(port);
509 
510 	/* Program the command list base and FIS base addresses */
511 	if (readl(port->dd->mmio + HOST_CAP) & HOST_CAP_64) {
512 		writel((port->command_list_dma >> 16) >> 16,
513 			 port->mmio + PORT_LST_ADDR_HI);
514 		writel((port->rxfis_dma >> 16) >> 16,
515 			 port->mmio + PORT_FIS_ADDR_HI);
516 	}
517 
518 	writel(port->command_list_dma & 0xFFFFFFFF,
519 			port->mmio + PORT_LST_ADDR);
520 	writel(port->rxfis_dma & 0xFFFFFFFF, port->mmio + PORT_FIS_ADDR);
521 
522 	/* Clear SError */
523 	writel(readl(port->mmio + PORT_SCR_ERR), port->mmio + PORT_SCR_ERR);
524 
525 	/* reset the completed registers.*/
526 	for (i = 0; i < port->dd->slot_groups; i++)
527 		writel(0xFFFFFFFF, port->completed[i]);
528 
529 	/* Clear any pending interrupts for this port */
530 	writel(readl(port->mmio + PORT_IRQ_STAT), port->mmio + PORT_IRQ_STAT);
531 
532 	/* Clear any pending interrupts on the HBA. */
533 	writel(readl(port->dd->mmio + HOST_IRQ_STAT),
534 					port->dd->mmio + HOST_IRQ_STAT);
535 
536 	/* Enable port interrupts */
537 	writel(DEF_PORT_IRQ, port->mmio + PORT_IRQ_MASK);
538 }
539 
540 /*
541  * Restart a port
542  *
543  * @port Pointer to the port data structure.
544  *
545  * return value
546  *	None
547  */
548 static void mtip_restart_port(struct mtip_port *port)
549 {
550 	unsigned long timeout;
551 
552 	/* Disable the DMA engine */
553 	mtip_enable_engine(port, 0);
554 
555 	/* Chip quirk: wait up to 500ms for PxCMD.CR == 0 */
556 	timeout = jiffies + msecs_to_jiffies(500);
557 	while ((readl(port->mmio + PORT_CMD) & PORT_CMD_LIST_ON)
558 		 && time_before(jiffies, timeout))
559 		;
560 
561 	if (test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &port->dd->dd_flag))
562 		return;
563 
564 	/*
565 	 * Chip quirk: escalate to hba reset if
566 	 * PxCMD.CR not clear after 500 ms
567 	 */
568 	if (readl(port->mmio + PORT_CMD) & PORT_CMD_LIST_ON) {
569 		dev_warn(&port->dd->pdev->dev,
570 			"PxCMD.CR not clear, escalating reset\n");
571 
572 		if (mtip_hba_reset(port->dd))
573 			dev_err(&port->dd->pdev->dev,
574 				"HBA reset escalation failed.\n");
575 
576 		/* 30 ms delay before com reset to quiesce chip */
577 		mdelay(30);
578 	}
579 
580 	dev_warn(&port->dd->pdev->dev, "Issuing COM reset\n");
581 
582 	/* Set PxSCTL.DET */
583 	writel(readl(port->mmio + PORT_SCR_CTL) |
584 			 1, port->mmio + PORT_SCR_CTL);
585 	readl(port->mmio + PORT_SCR_CTL);
586 
587 	/* Wait 1 ms to quiesce chip function */
588 	timeout = jiffies + msecs_to_jiffies(1);
589 	while (time_before(jiffies, timeout))
590 		;
591 
592 	if (test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &port->dd->dd_flag))
593 		return;
594 
595 	/* Clear PxSCTL.DET */
596 	writel(readl(port->mmio + PORT_SCR_CTL) & ~1,
597 			 port->mmio + PORT_SCR_CTL);
598 	readl(port->mmio + PORT_SCR_CTL);
599 
600 	/* Wait 500 ms for bit 0 of PORT_SCR_STS to be set */
601 	timeout = jiffies + msecs_to_jiffies(500);
602 	while (((readl(port->mmio + PORT_SCR_STAT) & 0x01) == 0)
603 			 && time_before(jiffies, timeout))
604 		;
605 
606 	if (test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &port->dd->dd_flag))
607 		return;
608 
609 	if ((readl(port->mmio + PORT_SCR_STAT) & 0x01) == 0)
610 		dev_warn(&port->dd->pdev->dev,
611 			"COM reset failed\n");
612 
613 	mtip_init_port(port);
614 	mtip_start_port(port);
615 
616 }
617 
618 static int mtip_device_reset(struct driver_data *dd)
619 {
620 	int rv = 0;
621 
622 	if (mtip_check_surprise_removal(dd->pdev))
623 		return 0;
624 
625 	if (mtip_hba_reset(dd) < 0)
626 		rv = -EFAULT;
627 
628 	mdelay(1);
629 	mtip_init_port(dd->port);
630 	mtip_start_port(dd->port);
631 
632 	/* Enable interrupts on the HBA. */
633 	writel(readl(dd->mmio + HOST_CTL) | HOST_IRQ_EN,
634 					dd->mmio + HOST_CTL);
635 	return rv;
636 }
637 
638 /*
639  * Helper function for tag logging
640  */
641 static void print_tags(struct driver_data *dd,
642 			char *msg,
643 			unsigned long *tagbits,
644 			int cnt)
645 {
646 	unsigned char tagmap[128];
647 	int group, tagmap_len = 0;
648 
649 	memset(tagmap, 0, sizeof(tagmap));
650 	for (group = SLOTBITS_IN_LONGS; group > 0; group--)
651 		tagmap_len = sprintf(tagmap + tagmap_len, "%016lX ",
652 						tagbits[group-1]);
653 	dev_warn(&dd->pdev->dev,
654 			"%d command(s) %s: tagmap [%s]", cnt, msg, tagmap);
655 }
656 
657 /*
658  * Called periodically to see if any read/write commands are
659  * taking too long to complete.
660  *
661  * @data Pointer to the PORT data structure.
662  *
663  * return value
664  *	None
665  */
666 static void mtip_timeout_function(unsigned long int data)
667 {
668 	struct mtip_port *port = (struct mtip_port *) data;
669 	struct host_to_dev_fis *fis;
670 	struct mtip_cmd *cmd;
671 	int unaligned, tag, cmdto_cnt = 0;
672 	unsigned int bit, group;
673 	unsigned int num_command_slots;
674 	unsigned long to, tagaccum[SLOTBITS_IN_LONGS];
675 	void (*func)(void *, int);
676 
677 	if (unlikely(!port))
678 		return;
679 
680 	if (unlikely(port->dd->sr))
681 		return;
682 
683 	if (test_bit(MTIP_DDF_RESUME_BIT, &port->dd->dd_flag)) {
684 		mod_timer(&port->cmd_timer,
685 			jiffies + msecs_to_jiffies(30000));
686 		return;
687 	}
688 	/* clear the tag accumulator */
689 	memset(tagaccum, 0, SLOTBITS_IN_LONGS * sizeof(long));
690 	num_command_slots = port->dd->slot_groups * 32;
691 
692 	for (tag = 0; tag < num_command_slots; tag++) {
693 		/*
694 		 * Skip internal command slot as it has
695 		 * its own timeout mechanism
696 		 */
697 		if (tag == MTIP_TAG_INTERNAL)
698 			continue;
699 
700 		if (atomic_read(&port->commands[tag].active) &&
701 		   (time_after(jiffies, port->commands[tag].comp_time))) {
702 			group = tag >> 5;
703 			bit = tag & 0x1F;
704 
705 			cmd = &port->commands[tag];
706 			fis = (struct host_to_dev_fis *) cmd->command;
707 
708 			set_bit(tag, tagaccum);
709 			cmdto_cnt++;
710 			if (cmdto_cnt == 1)
711 				set_bit(MTIP_PF_EH_ACTIVE_BIT, &port->flags);
712 
713 			/*
714 			 * Clear the completed bit. This should prevent
715 			 *  any interrupt handlers from trying to retire
716 			 *  the command.
717 			 */
718 			writel(1 << bit, port->completed[group]);
719 
720 			/* Clear the active flag for the command */
721 			atomic_set(&port->commands[tag].active, 0);
722 
723 			func = cmd->async_callback;
724 			if (func &&
725 			    cmpxchg(&cmd->async_callback, func, 0) == func) {
726 
727 				/* Unmap the DMA scatter list entries */
728 				dma_unmap_sg(&port->dd->pdev->dev,
729 						cmd->sg,
730 						cmd->scatter_ents,
731 						cmd->direction);
732 
733 				func(cmd->async_data, -EIO);
734 				unaligned = cmd->unaligned;
735 
736 				/* Clear the allocated bit for the command. */
737 				release_slot(port, tag);
738 
739 				if (unaligned)
740 					up(&port->cmd_slot_unal);
741 				else
742 					up(&port->cmd_slot);
743 			}
744 		}
745 	}
746 
747 	if (cmdto_cnt) {
748 		print_tags(port->dd, "timed out", tagaccum, cmdto_cnt);
749 		if (!test_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags)) {
750 			mtip_device_reset(port->dd);
751 			wake_up_interruptible(&port->svc_wait);
752 		}
753 		clear_bit(MTIP_PF_EH_ACTIVE_BIT, &port->flags);
754 	}
755 
756 	if (port->ic_pause_timer) {
757 		to  = port->ic_pause_timer + msecs_to_jiffies(1000);
758 		if (time_after(jiffies, to)) {
759 			if (!test_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags)) {
760 				port->ic_pause_timer = 0;
761 				clear_bit(MTIP_PF_SE_ACTIVE_BIT, &port->flags);
762 				clear_bit(MTIP_PF_DM_ACTIVE_BIT, &port->flags);
763 				clear_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags);
764 				wake_up_interruptible(&port->svc_wait);
765 			}
766 
767 
768 		}
769 	}
770 
771 	/* Restart the timer */
772 	mod_timer(&port->cmd_timer,
773 		jiffies + msecs_to_jiffies(MTIP_TIMEOUT_CHECK_PERIOD));
774 }
775 
776 /*
777  * Internal command completion callback function.
778  *
779  * This function is normally called by the driver ISR when an internal
780  * command completed. This function signals the command completion by
781  * calling complete().
782  *
783  * @port   Pointer to the port data structure.
784  * @tag    Tag of the command that has completed.
785  * @data   Pointer to a completion structure.
786  * @status Completion status.
787  *
788  * return value
789  *	None
790  */
791 static void mtip_completion(struct mtip_port *port,
792 			    int tag,
793 			    void *data,
794 			    int status)
795 {
796 	struct mtip_cmd *command = &port->commands[tag];
797 	struct completion *waiting = data;
798 	if (unlikely(status == PORT_IRQ_TF_ERR))
799 		dev_warn(&port->dd->pdev->dev,
800 			"Internal command %d completed with TFE\n", tag);
801 
802 	command->async_callback = NULL;
803 	command->comp_func = NULL;
804 
805 	complete(waiting);
806 }
807 
808 static void mtip_null_completion(struct mtip_port *port,
809 			    int tag,
810 			    void *data,
811 			    int status)
812 {
813 	return;
814 }
815 
816 static int mtip_read_log_page(struct mtip_port *port, u8 page, u16 *buffer,
817 				dma_addr_t buffer_dma, unsigned int sectors);
818 static int mtip_get_smart_attr(struct mtip_port *port, unsigned int id,
819 						struct smart_attr *attrib);
820 /*
821  * Handle an error.
822  *
823  * @dd Pointer to the DRIVER_DATA structure.
824  *
825  * return value
826  *	None
827  */
828 static void mtip_handle_tfe(struct driver_data *dd)
829 {
830 	int group, tag, bit, reissue, rv;
831 	struct mtip_port *port;
832 	struct mtip_cmd  *cmd;
833 	u32 completed;
834 	struct host_to_dev_fis *fis;
835 	unsigned long tagaccum[SLOTBITS_IN_LONGS];
836 	unsigned int cmd_cnt = 0;
837 	unsigned char *buf;
838 	char *fail_reason = NULL;
839 	int fail_all_ncq_write = 0, fail_all_ncq_cmds = 0;
840 
841 	dev_warn(&dd->pdev->dev, "Taskfile error\n");
842 
843 	port = dd->port;
844 
845 	/* Stop the timer to prevent command timeouts. */
846 	del_timer(&port->cmd_timer);
847 	set_bit(MTIP_PF_EH_ACTIVE_BIT, &port->flags);
848 
849 	if (test_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags) &&
850 			test_bit(MTIP_TAG_INTERNAL, port->allocated)) {
851 		cmd = &port->commands[MTIP_TAG_INTERNAL];
852 		dbg_printk(MTIP_DRV_NAME " TFE for the internal command\n");
853 
854 		atomic_inc(&cmd->active); /* active > 1 indicates error */
855 		if (cmd->comp_data && cmd->comp_func) {
856 			cmd->comp_func(port, MTIP_TAG_INTERNAL,
857 					cmd->comp_data, PORT_IRQ_TF_ERR);
858 		}
859 		goto handle_tfe_exit;
860 	}
861 
862 	/* clear the tag accumulator */
863 	memset(tagaccum, 0, SLOTBITS_IN_LONGS * sizeof(long));
864 
865 	/* Loop through all the groups */
866 	for (group = 0; group < dd->slot_groups; group++) {
867 		completed = readl(port->completed[group]);
868 
869 		/* clear completed status register in the hardware.*/
870 		writel(completed, port->completed[group]);
871 
872 		/* Process successfully completed commands */
873 		for (bit = 0; bit < 32 && completed; bit++) {
874 			if (!(completed & (1<<bit)))
875 				continue;
876 			tag = (group << 5) + bit;
877 
878 			/* Skip the internal command slot */
879 			if (tag == MTIP_TAG_INTERNAL)
880 				continue;
881 
882 			cmd = &port->commands[tag];
883 			if (likely(cmd->comp_func)) {
884 				set_bit(tag, tagaccum);
885 				cmd_cnt++;
886 				atomic_set(&cmd->active, 0);
887 				cmd->comp_func(port,
888 					 tag,
889 					 cmd->comp_data,
890 					 0);
891 			} else {
892 				dev_err(&port->dd->pdev->dev,
893 					"Missing completion func for tag %d",
894 					tag);
895 				if (mtip_check_surprise_removal(dd->pdev)) {
896 					/* don't proceed further */
897 					return;
898 				}
899 			}
900 		}
901 	}
902 
903 	print_tags(dd, "completed (TFE)", tagaccum, cmd_cnt);
904 
905 	/* Restart the port */
906 	mdelay(20);
907 	mtip_restart_port(port);
908 
909 	/* Trying to determine the cause of the error */
910 	rv = mtip_read_log_page(dd->port, ATA_LOG_SATA_NCQ,
911 				dd->port->log_buf,
912 				dd->port->log_buf_dma, 1);
913 	if (rv) {
914 		dev_warn(&dd->pdev->dev,
915 			"Error in READ LOG EXT (10h) command\n");
916 		/* non-critical error, don't fail the load */
917 	} else {
918 		buf = (unsigned char *)dd->port->log_buf;
919 		if (buf[259] & 0x1) {
920 			dev_info(&dd->pdev->dev,
921 				"Write protect bit is set.\n");
922 			set_bit(MTIP_DDF_WRITE_PROTECT_BIT, &dd->dd_flag);
923 			fail_all_ncq_write = 1;
924 			fail_reason = "write protect";
925 		}
926 		if (buf[288] == 0xF7) {
927 			dev_info(&dd->pdev->dev,
928 				"Exceeded Tmax, drive in thermal shutdown.\n");
929 			set_bit(MTIP_DDF_OVER_TEMP_BIT, &dd->dd_flag);
930 			fail_all_ncq_cmds = 1;
931 			fail_reason = "thermal shutdown";
932 		}
933 		if (buf[288] == 0xBF) {
934 			set_bit(MTIP_DDF_SEC_LOCK_BIT, &dd->dd_flag);
935 			dev_info(&dd->pdev->dev,
936 				"Drive indicates rebuild has failed. Secure erase required.\n");
937 			fail_all_ncq_cmds = 1;
938 			fail_reason = "rebuild failed";
939 		}
940 	}
941 
942 	/* clear the tag accumulator */
943 	memset(tagaccum, 0, SLOTBITS_IN_LONGS * sizeof(long));
944 
945 	/* Loop through all the groups */
946 	for (group = 0; group < dd->slot_groups; group++) {
947 		for (bit = 0; bit < 32; bit++) {
948 			reissue = 1;
949 			tag = (group << 5) + bit;
950 			cmd = &port->commands[tag];
951 
952 			/* If the active bit is set re-issue the command */
953 			if (atomic_read(&cmd->active) == 0)
954 				continue;
955 
956 			fis = (struct host_to_dev_fis *)cmd->command;
957 
958 			/* Should re-issue? */
959 			if (tag == MTIP_TAG_INTERNAL ||
960 			    fis->command == ATA_CMD_SET_FEATURES)
961 				reissue = 0;
962 			else {
963 				if (fail_all_ncq_cmds ||
964 					(fail_all_ncq_write &&
965 					fis->command == ATA_CMD_FPDMA_WRITE)) {
966 					dev_warn(&dd->pdev->dev,
967 					"  Fail: %s w/tag %d [%s].\n",
968 					fis->command == ATA_CMD_FPDMA_WRITE ?
969 						"write" : "read",
970 					tag,
971 					fail_reason != NULL ?
972 						fail_reason : "unknown");
973 					atomic_set(&cmd->active, 0);
974 					if (cmd->comp_func) {
975 						cmd->comp_func(port, tag,
976 							cmd->comp_data,
977 							-ENODATA);
978 					}
979 					continue;
980 				}
981 			}
982 
983 			/*
984 			 * First check if this command has
985 			 *  exceeded its retries.
986 			 */
987 			if (reissue && (cmd->retries-- > 0)) {
988 
989 				set_bit(tag, tagaccum);
990 
991 				/* Re-issue the command. */
992 				mtip_issue_ncq_command(port, tag);
993 
994 				continue;
995 			}
996 
997 			/* Retire a command that will not be reissued */
998 			dev_warn(&port->dd->pdev->dev,
999 				"retiring tag %d\n", tag);
1000 			atomic_set(&cmd->active, 0);
1001 
1002 			if (cmd->comp_func)
1003 				cmd->comp_func(
1004 					port,
1005 					tag,
1006 					cmd->comp_data,
1007 					PORT_IRQ_TF_ERR);
1008 			else
1009 				dev_warn(&port->dd->pdev->dev,
1010 					"Bad completion for tag %d\n",
1011 					tag);
1012 		}
1013 	}
1014 	print_tags(dd, "reissued (TFE)", tagaccum, cmd_cnt);
1015 
1016 handle_tfe_exit:
1017 	/* clear eh_active */
1018 	clear_bit(MTIP_PF_EH_ACTIVE_BIT, &port->flags);
1019 	wake_up_interruptible(&port->svc_wait);
1020 
1021 	mod_timer(&port->cmd_timer,
1022 		 jiffies + msecs_to_jiffies(MTIP_TIMEOUT_CHECK_PERIOD));
1023 }
1024 
1025 /*
1026  * Handle a set device bits interrupt
1027  */
1028 static inline void mtip_workq_sdbfx(struct mtip_port *port, int group,
1029 							u32 completed)
1030 {
1031 	struct driver_data *dd = port->dd;
1032 	int tag, bit;
1033 	struct mtip_cmd *command;
1034 
1035 	if (!completed) {
1036 		WARN_ON_ONCE(!completed);
1037 		return;
1038 	}
1039 	/* clear completed status register in the hardware.*/
1040 	writel(completed, port->completed[group]);
1041 
1042 	/* Process completed commands. */
1043 	for (bit = 0; (bit < 32) && completed; bit++) {
1044 		if (completed & 0x01) {
1045 			tag = (group << 5) | bit;
1046 
1047 			/* skip internal command slot. */
1048 			if (unlikely(tag == MTIP_TAG_INTERNAL))
1049 				continue;
1050 
1051 			command = &port->commands[tag];
1052 			/* make internal callback */
1053 			if (likely(command->comp_func)) {
1054 				command->comp_func(
1055 					port,
1056 					tag,
1057 					command->comp_data,
1058 					0);
1059 			} else {
1060 				dev_dbg(&dd->pdev->dev,
1061 					"Null completion for tag %d",
1062 					tag);
1063 
1064 				if (mtip_check_surprise_removal(
1065 					dd->pdev)) {
1066 					return;
1067 				}
1068 			}
1069 		}
1070 		completed >>= 1;
1071 	}
1072 
1073 	/* If last, re-enable interrupts */
1074 	if (atomic_dec_return(&dd->irq_workers_active) == 0)
1075 		writel(0xffffffff, dd->mmio + HOST_IRQ_STAT);
1076 }
1077 
1078 /*
1079  * Process legacy pio and d2h interrupts
1080  */
1081 static inline void mtip_process_legacy(struct driver_data *dd, u32 port_stat)
1082 {
1083 	struct mtip_port *port = dd->port;
1084 	struct mtip_cmd *cmd = &port->commands[MTIP_TAG_INTERNAL];
1085 
1086 	if (test_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags) &&
1087 	    (cmd != NULL) && !(readl(port->cmd_issue[MTIP_TAG_INTERNAL])
1088 		& (1 << MTIP_TAG_INTERNAL))) {
1089 		if (cmd->comp_func) {
1090 			cmd->comp_func(port,
1091 				MTIP_TAG_INTERNAL,
1092 				cmd->comp_data,
1093 				0);
1094 			return;
1095 		}
1096 	}
1097 
1098 	return;
1099 }
1100 
1101 /*
1102  * Demux and handle errors
1103  */
1104 static inline void mtip_process_errors(struct driver_data *dd, u32 port_stat)
1105 {
1106 	if (likely(port_stat & (PORT_IRQ_TF_ERR | PORT_IRQ_IF_ERR)))
1107 		mtip_handle_tfe(dd);
1108 
1109 	if (unlikely(port_stat & PORT_IRQ_CONNECT)) {
1110 		dev_warn(&dd->pdev->dev,
1111 			"Clearing PxSERR.DIAG.x\n");
1112 		writel((1 << 26), dd->port->mmio + PORT_SCR_ERR);
1113 	}
1114 
1115 	if (unlikely(port_stat & PORT_IRQ_PHYRDY)) {
1116 		dev_warn(&dd->pdev->dev,
1117 			"Clearing PxSERR.DIAG.n\n");
1118 		writel((1 << 16), dd->port->mmio + PORT_SCR_ERR);
1119 	}
1120 
1121 	if (unlikely(port_stat & ~PORT_IRQ_HANDLED)) {
1122 		dev_warn(&dd->pdev->dev,
1123 			"Port stat errors %x unhandled\n",
1124 			(port_stat & ~PORT_IRQ_HANDLED));
1125 	}
1126 }
1127 
1128 static inline irqreturn_t mtip_handle_irq(struct driver_data *data)
1129 {
1130 	struct driver_data *dd = (struct driver_data *) data;
1131 	struct mtip_port *port = dd->port;
1132 	u32 hba_stat, port_stat;
1133 	int rv = IRQ_NONE;
1134 	int do_irq_enable = 1, i, workers;
1135 	struct mtip_work *twork;
1136 
1137 	hba_stat = readl(dd->mmio + HOST_IRQ_STAT);
1138 	if (hba_stat) {
1139 		rv = IRQ_HANDLED;
1140 
1141 		/* Acknowledge the interrupt status on the port.*/
1142 		port_stat = readl(port->mmio + PORT_IRQ_STAT);
1143 		writel(port_stat, port->mmio + PORT_IRQ_STAT);
1144 
1145 		/* Demux port status */
1146 		if (likely(port_stat & PORT_IRQ_SDB_FIS)) {
1147 			do_irq_enable = 0;
1148 			WARN_ON_ONCE(atomic_read(&dd->irq_workers_active) != 0);
1149 
1150 			/* Start at 1: group zero is always local? */
1151 			for (i = 0, workers = 0; i < MTIP_MAX_SLOT_GROUPS;
1152 									i++) {
1153 				twork = &dd->work[i];
1154 				twork->completed = readl(port->completed[i]);
1155 				if (twork->completed)
1156 					workers++;
1157 			}
1158 
1159 			atomic_set(&dd->irq_workers_active, workers);
1160 			if (workers) {
1161 				for (i = 1; i < MTIP_MAX_SLOT_GROUPS; i++) {
1162 					twork = &dd->work[i];
1163 					if (twork->completed)
1164 						queue_work_on(
1165 							twork->cpu_binding,
1166 							dd->isr_workq,
1167 							&twork->work);
1168 				}
1169 
1170 				if (likely(dd->work[0].completed))
1171 					mtip_workq_sdbfx(port, 0,
1172 							dd->work[0].completed);
1173 
1174 			} else {
1175 				/*
1176 				 * Chip quirk: SDB interrupt but nothing
1177 				 * to complete
1178 				 */
1179 				do_irq_enable = 1;
1180 			}
1181 		}
1182 
1183 		if (unlikely(port_stat & PORT_IRQ_ERR)) {
1184 			if (unlikely(mtip_check_surprise_removal(dd->pdev))) {
1185 				/* don't proceed further */
1186 				return IRQ_HANDLED;
1187 			}
1188 			if (test_bit(MTIP_DDF_REMOVE_PENDING_BIT,
1189 							&dd->dd_flag))
1190 				return rv;
1191 
1192 			mtip_process_errors(dd, port_stat & PORT_IRQ_ERR);
1193 		}
1194 
1195 		if (unlikely(port_stat & PORT_IRQ_LEGACY))
1196 			mtip_process_legacy(dd, port_stat & PORT_IRQ_LEGACY);
1197 	}
1198 
1199 	/* acknowledge interrupt */
1200 	if (unlikely(do_irq_enable))
1201 		writel(hba_stat, dd->mmio + HOST_IRQ_STAT);
1202 
1203 	return rv;
1204 }
1205 
1206 /*
1207  * HBA interrupt subroutine.
1208  *
1209  * @irq		IRQ number.
1210  * @instance	Pointer to the driver data structure.
1211  *
1212  * return value
1213  *	IRQ_HANDLED	A HBA interrupt was pending and handled.
1214  *	IRQ_NONE	This interrupt was not for the HBA.
1215  */
1216 static irqreturn_t mtip_irq_handler(int irq, void *instance)
1217 {
1218 	struct driver_data *dd = instance;
1219 
1220 	return mtip_handle_irq(dd);
1221 }
1222 
1223 static void mtip_issue_non_ncq_command(struct mtip_port *port, int tag)
1224 {
1225 	atomic_set(&port->commands[tag].active, 1);
1226 	writel(1 << MTIP_TAG_BIT(tag),
1227 		port->cmd_issue[MTIP_TAG_INDEX(tag)]);
1228 }
1229 
1230 static bool mtip_pause_ncq(struct mtip_port *port,
1231 				struct host_to_dev_fis *fis)
1232 {
1233 	struct host_to_dev_fis *reply;
1234 	unsigned long task_file_data;
1235 
1236 	reply = port->rxfis + RX_FIS_D2H_REG;
1237 	task_file_data = readl(port->mmio+PORT_TFDATA);
1238 
1239 	if (fis->command == ATA_CMD_SEC_ERASE_UNIT)
1240 		clear_bit(MTIP_DDF_SEC_LOCK_BIT, &port->dd->dd_flag);
1241 
1242 	if ((task_file_data & 1))
1243 		return false;
1244 
1245 	if (fis->command == ATA_CMD_SEC_ERASE_PREP) {
1246 		set_bit(MTIP_PF_SE_ACTIVE_BIT, &port->flags);
1247 		set_bit(MTIP_DDF_SEC_LOCK_BIT, &port->dd->dd_flag);
1248 		port->ic_pause_timer = jiffies;
1249 		return true;
1250 	} else if ((fis->command == ATA_CMD_DOWNLOAD_MICRO) &&
1251 					(fis->features == 0x03)) {
1252 		set_bit(MTIP_PF_DM_ACTIVE_BIT, &port->flags);
1253 		port->ic_pause_timer = jiffies;
1254 		return true;
1255 	} else if ((fis->command == ATA_CMD_SEC_ERASE_UNIT) ||
1256 		((fis->command == 0xFC) &&
1257 			(fis->features == 0x27 || fis->features == 0x72 ||
1258 			 fis->features == 0x62 || fis->features == 0x26))) {
1259 		/* Com reset after secure erase or lowlevel format */
1260 		mtip_restart_port(port);
1261 		return false;
1262 	}
1263 
1264 	return false;
1265 }
1266 
1267 /*
1268  * Wait for port to quiesce
1269  *
1270  * @port    Pointer to port data structure
1271  * @timeout Max duration to wait (ms)
1272  *
1273  * return value
1274  *	0	Success
1275  *	-EBUSY  Commands still active
1276  */
1277 static int mtip_quiesce_io(struct mtip_port *port, unsigned long timeout)
1278 {
1279 	unsigned long to;
1280 	unsigned int n;
1281 	unsigned int active = 1;
1282 
1283 	to = jiffies + msecs_to_jiffies(timeout);
1284 	do {
1285 		if (test_bit(MTIP_PF_SVC_THD_ACTIVE_BIT, &port->flags) &&
1286 			test_bit(MTIP_PF_ISSUE_CMDS_BIT, &port->flags)) {
1287 			msleep(20);
1288 			continue; /* svc thd is actively issuing commands */
1289 		}
1290 		if (test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &port->dd->dd_flag))
1291 			return -EFAULT;
1292 		/*
1293 		 * Ignore s_active bit 0 of array element 0.
1294 		 * This bit will always be set
1295 		 */
1296 		active = readl(port->s_active[0]) & 0xFFFFFFFE;
1297 		for (n = 1; n < port->dd->slot_groups; n++)
1298 			active |= readl(port->s_active[n]);
1299 
1300 		if (!active)
1301 			break;
1302 
1303 		msleep(20);
1304 	} while (time_before(jiffies, to));
1305 
1306 	return active ? -EBUSY : 0;
1307 }
1308 
1309 /*
1310  * Execute an internal command and wait for the completion.
1311  *
1312  * @port    Pointer to the port data structure.
1313  * @fis     Pointer to the FIS that describes the command.
1314  * @fis_len  Length in WORDS of the FIS.
1315  * @buffer  DMA accessible for command data.
1316  * @buf_len  Length, in bytes, of the data buffer.
1317  * @opts    Command header options, excluding the FIS length
1318  *             and the number of PRD entries.
1319  * @timeout Time in ms to wait for the command to complete.
1320  *
1321  * return value
1322  *	0	 Command completed successfully.
1323  *	-EFAULT  The buffer address is not correctly aligned.
1324  *	-EBUSY   Internal command or other IO in progress.
1325  *	-EAGAIN  Time out waiting for command to complete.
1326  */
1327 static int mtip_exec_internal_command(struct mtip_port *port,
1328 					struct host_to_dev_fis *fis,
1329 					int fis_len,
1330 					dma_addr_t buffer,
1331 					int buf_len,
1332 					u32 opts,
1333 					gfp_t atomic,
1334 					unsigned long timeout)
1335 {
1336 	struct mtip_cmd_sg *command_sg;
1337 	DECLARE_COMPLETION_ONSTACK(wait);
1338 	int rv = 0, ready2go = 1;
1339 	struct mtip_cmd *int_cmd = &port->commands[MTIP_TAG_INTERNAL];
1340 	unsigned long to;
1341 	struct driver_data *dd = port->dd;
1342 
1343 	/* Make sure the buffer is 8 byte aligned. This is asic specific. */
1344 	if (buffer & 0x00000007) {
1345 		dev_err(&dd->pdev->dev, "SG buffer is not 8 byte aligned\n");
1346 		return -EFAULT;
1347 	}
1348 
1349 	to = jiffies + msecs_to_jiffies(timeout);
1350 	do {
1351 		ready2go = !test_and_set_bit(MTIP_TAG_INTERNAL,
1352 						port->allocated);
1353 		if (ready2go)
1354 			break;
1355 		mdelay(100);
1356 	} while (time_before(jiffies, to));
1357 	if (!ready2go) {
1358 		dev_warn(&dd->pdev->dev,
1359 			"Internal cmd active. new cmd [%02X]\n", fis->command);
1360 		return -EBUSY;
1361 	}
1362 	set_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags);
1363 	port->ic_pause_timer = 0;
1364 
1365 	clear_bit(MTIP_PF_SE_ACTIVE_BIT, &port->flags);
1366 	clear_bit(MTIP_PF_DM_ACTIVE_BIT, &port->flags);
1367 
1368 	if (atomic == GFP_KERNEL) {
1369 		if (fis->command != ATA_CMD_STANDBYNOW1) {
1370 			/* wait for io to complete if non atomic */
1371 			if (mtip_quiesce_io(port, 5000) < 0) {
1372 				dev_warn(&dd->pdev->dev,
1373 					"Failed to quiesce IO\n");
1374 				release_slot(port, MTIP_TAG_INTERNAL);
1375 				clear_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags);
1376 				wake_up_interruptible(&port->svc_wait);
1377 				return -EBUSY;
1378 			}
1379 		}
1380 
1381 		/* Set the completion function and data for the command. */
1382 		int_cmd->comp_data = &wait;
1383 		int_cmd->comp_func = mtip_completion;
1384 
1385 	} else {
1386 		/* Clear completion - we're going to poll */
1387 		int_cmd->comp_data = NULL;
1388 		int_cmd->comp_func = mtip_null_completion;
1389 	}
1390 
1391 	/* Copy the command to the command table */
1392 	memcpy(int_cmd->command, fis, fis_len*4);
1393 
1394 	/* Populate the SG list */
1395 	int_cmd->command_header->opts =
1396 		 __force_bit2int cpu_to_le32(opts | fis_len);
1397 	if (buf_len) {
1398 		command_sg = int_cmd->command + AHCI_CMD_TBL_HDR_SZ;
1399 
1400 		command_sg->info =
1401 			__force_bit2int cpu_to_le32((buf_len-1) & 0x3FFFFF);
1402 		command_sg->dba	=
1403 			__force_bit2int cpu_to_le32(buffer & 0xFFFFFFFF);
1404 		command_sg->dba_upper =
1405 			__force_bit2int cpu_to_le32((buffer >> 16) >> 16);
1406 
1407 		int_cmd->command_header->opts |=
1408 			__force_bit2int cpu_to_le32((1 << 16));
1409 	}
1410 
1411 	/* Populate the command header */
1412 	int_cmd->command_header->byte_count = 0;
1413 
1414 	/* Issue the command to the hardware */
1415 	mtip_issue_non_ncq_command(port, MTIP_TAG_INTERNAL);
1416 
1417 	if (atomic == GFP_KERNEL) {
1418 		/* Wait for the command to complete or timeout. */
1419 		if (wait_for_completion_interruptible_timeout(
1420 				&wait,
1421 				msecs_to_jiffies(timeout)) <= 0) {
1422 			if (rv == -ERESTARTSYS) { /* interrupted */
1423 				dev_err(&dd->pdev->dev,
1424 					"Internal command [%02X] was interrupted after %lu ms\n",
1425 					fis->command, timeout);
1426 				rv = -EINTR;
1427 				goto exec_ic_exit;
1428 			} else if (rv == 0) /* timeout */
1429 				dev_err(&dd->pdev->dev,
1430 					"Internal command did not complete [%02X] within timeout of  %lu ms\n",
1431 					fis->command, timeout);
1432 			else
1433 				dev_err(&dd->pdev->dev,
1434 					"Internal command [%02X] wait returned code [%d] after %lu ms - unhandled\n",
1435 					fis->command, rv, timeout);
1436 
1437 			if (mtip_check_surprise_removal(dd->pdev) ||
1438 				test_bit(MTIP_DDF_REMOVE_PENDING_BIT,
1439 						&dd->dd_flag)) {
1440 				dev_err(&dd->pdev->dev,
1441 					"Internal command [%02X] wait returned due to SR\n",
1442 					fis->command);
1443 				rv = -ENXIO;
1444 				goto exec_ic_exit;
1445 			}
1446 			mtip_device_reset(dd); /* recover from timeout issue */
1447 			rv = -EAGAIN;
1448 			goto exec_ic_exit;
1449 		}
1450 	} else {
1451 		u32 hba_stat, port_stat;
1452 
1453 		/* Spin for <timeout> checking if command still outstanding */
1454 		timeout = jiffies + msecs_to_jiffies(timeout);
1455 		while ((readl(port->cmd_issue[MTIP_TAG_INTERNAL])
1456 				& (1 << MTIP_TAG_INTERNAL))
1457 				&& time_before(jiffies, timeout)) {
1458 			if (mtip_check_surprise_removal(dd->pdev)) {
1459 				rv = -ENXIO;
1460 				goto exec_ic_exit;
1461 			}
1462 			if ((fis->command != ATA_CMD_STANDBYNOW1) &&
1463 				test_bit(MTIP_DDF_REMOVE_PENDING_BIT,
1464 						&dd->dd_flag)) {
1465 				rv = -ENXIO;
1466 				goto exec_ic_exit;
1467 			}
1468 			port_stat = readl(port->mmio + PORT_IRQ_STAT);
1469 			if (!port_stat)
1470 				continue;
1471 
1472 			if (port_stat & PORT_IRQ_ERR) {
1473 				dev_err(&dd->pdev->dev,
1474 					"Internal command [%02X] failed\n",
1475 					fis->command);
1476 				mtip_device_reset(dd);
1477 				rv = -EIO;
1478 				goto exec_ic_exit;
1479 			} else {
1480 				writel(port_stat, port->mmio + PORT_IRQ_STAT);
1481 				hba_stat = readl(dd->mmio + HOST_IRQ_STAT);
1482 				if (hba_stat)
1483 					writel(hba_stat,
1484 						dd->mmio + HOST_IRQ_STAT);
1485 			}
1486 			break;
1487 		}
1488 	}
1489 
1490 	if (readl(port->cmd_issue[MTIP_TAG_INTERNAL])
1491 			& (1 << MTIP_TAG_INTERNAL)) {
1492 		rv = -ENXIO;
1493 		if (!test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag)) {
1494 			mtip_device_reset(dd);
1495 			rv = -EAGAIN;
1496 		}
1497 	}
1498 exec_ic_exit:
1499 	/* Clear the allocated and active bits for the internal command. */
1500 	atomic_set(&int_cmd->active, 0);
1501 	release_slot(port, MTIP_TAG_INTERNAL);
1502 	if (rv >= 0 && mtip_pause_ncq(port, fis)) {
1503 		/* NCQ paused */
1504 		return rv;
1505 	}
1506 	clear_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags);
1507 	wake_up_interruptible(&port->svc_wait);
1508 
1509 	return rv;
1510 }
1511 
1512 /*
1513  * Byte-swap ATA ID strings.
1514  *
1515  * ATA identify data contains strings in byte-swapped 16-bit words.
1516  * They must be swapped (on all architectures) to be usable as C strings.
1517  * This function swaps bytes in-place.
1518  *
1519  * @buf The buffer location of the string
1520  * @len The number of bytes to swap
1521  *
1522  * return value
1523  *	None
1524  */
1525 static inline void ata_swap_string(u16 *buf, unsigned int len)
1526 {
1527 	int i;
1528 	for (i = 0; i < (len/2); i++)
1529 		be16_to_cpus(&buf[i]);
1530 }
1531 
1532 /*
1533  * Request the device identity information.
1534  *
1535  * If a user space buffer is not specified, i.e. is NULL, the
1536  * identify information is still read from the drive and placed
1537  * into the identify data buffer (@e port->identify) in the
1538  * port data structure.
1539  * When the identify buffer contains valid identify information @e
1540  * port->identify_valid is non-zero.
1541  *
1542  * @port	 Pointer to the port structure.
1543  * @user_buffer  A user space buffer where the identify data should be
1544  *                    copied.
1545  *
1546  * return value
1547  *	0	Command completed successfully.
1548  *	-EFAULT An error occurred while coping data to the user buffer.
1549  *	-1	Command failed.
1550  */
1551 static int mtip_get_identify(struct mtip_port *port, void __user *user_buffer)
1552 {
1553 	int rv = 0;
1554 	struct host_to_dev_fis fis;
1555 
1556 	if (test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &port->dd->dd_flag))
1557 		return -EFAULT;
1558 
1559 	/* Build the FIS. */
1560 	memset(&fis, 0, sizeof(struct host_to_dev_fis));
1561 	fis.type	= 0x27;
1562 	fis.opts	= 1 << 7;
1563 	fis.command	= ATA_CMD_ID_ATA;
1564 
1565 	/* Set the identify information as invalid. */
1566 	port->identify_valid = 0;
1567 
1568 	/* Clear the identify information. */
1569 	memset(port->identify, 0, sizeof(u16) * ATA_ID_WORDS);
1570 
1571 	/* Execute the command. */
1572 	if (mtip_exec_internal_command(port,
1573 				&fis,
1574 				5,
1575 				port->identify_dma,
1576 				sizeof(u16) * ATA_ID_WORDS,
1577 				0,
1578 				GFP_KERNEL,
1579 				MTIP_INTERNAL_COMMAND_TIMEOUT_MS)
1580 				< 0) {
1581 		rv = -1;
1582 		goto out;
1583 	}
1584 
1585 	/*
1586 	 * Perform any necessary byte-swapping.  Yes, the kernel does in fact
1587 	 * perform field-sensitive swapping on the string fields.
1588 	 * See the kernel use of ata_id_string() for proof of this.
1589 	 */
1590 #ifdef __LITTLE_ENDIAN
1591 	ata_swap_string(port->identify + 27, 40);  /* model string*/
1592 	ata_swap_string(port->identify + 23, 8);   /* firmware string*/
1593 	ata_swap_string(port->identify + 10, 20);  /* serial# string*/
1594 #else
1595 	{
1596 		int i;
1597 		for (i = 0; i < ATA_ID_WORDS; i++)
1598 			port->identify[i] = le16_to_cpu(port->identify[i]);
1599 	}
1600 #endif
1601 
1602 	/* Check security locked state */
1603 	if (port->identify[128] & 0x4)
1604 		set_bit(MTIP_DDF_SEC_LOCK_BIT, &port->dd->dd_flag);
1605 	else
1606 		clear_bit(MTIP_DDF_SEC_LOCK_BIT, &port->dd->dd_flag);
1607 
1608 #ifdef MTIP_TRIM /* Disabling TRIM support temporarily */
1609 	/* Demux ID.DRAT & ID.RZAT to determine trim support */
1610 	if (port->identify[69] & (1 << 14) && port->identify[69] & (1 << 5))
1611 		port->dd->trim_supp = true;
1612 	else
1613 #endif
1614 		port->dd->trim_supp = false;
1615 
1616 	/* Set the identify buffer as valid. */
1617 	port->identify_valid = 1;
1618 
1619 	if (user_buffer) {
1620 		if (copy_to_user(
1621 			user_buffer,
1622 			port->identify,
1623 			ATA_ID_WORDS * sizeof(u16))) {
1624 			rv = -EFAULT;
1625 			goto out;
1626 		}
1627 	}
1628 
1629 out:
1630 	return rv;
1631 }
1632 
1633 /*
1634  * Issue a standby immediate command to the device.
1635  *
1636  * @port Pointer to the port structure.
1637  *
1638  * return value
1639  *	0	Command was executed successfully.
1640  *	-1	An error occurred while executing the command.
1641  */
1642 static int mtip_standby_immediate(struct mtip_port *port)
1643 {
1644 	int rv;
1645 	struct host_to_dev_fis	fis;
1646 	unsigned long start;
1647 
1648 	/* Build the FIS. */
1649 	memset(&fis, 0, sizeof(struct host_to_dev_fis));
1650 	fis.type	= 0x27;
1651 	fis.opts	= 1 << 7;
1652 	fis.command	= ATA_CMD_STANDBYNOW1;
1653 
1654 	start = jiffies;
1655 	rv = mtip_exec_internal_command(port,
1656 					&fis,
1657 					5,
1658 					0,
1659 					0,
1660 					0,
1661 					GFP_ATOMIC,
1662 					15000);
1663 	dbg_printk(MTIP_DRV_NAME "Time taken to complete standby cmd: %d ms\n",
1664 			jiffies_to_msecs(jiffies - start));
1665 	if (rv)
1666 		dev_warn(&port->dd->pdev->dev,
1667 			"STANDBY IMMEDIATE command failed.\n");
1668 
1669 	return rv;
1670 }
1671 
1672 /*
1673  * Issue a READ LOG EXT command to the device.
1674  *
1675  * @port	pointer to the port structure.
1676  * @page	page number to fetch
1677  * @buffer	pointer to buffer
1678  * @buffer_dma	dma address corresponding to @buffer
1679  * @sectors	page length to fetch, in sectors
1680  *
1681  * return value
1682  *	@rv	return value from mtip_exec_internal_command()
1683  */
1684 static int mtip_read_log_page(struct mtip_port *port, u8 page, u16 *buffer,
1685 				dma_addr_t buffer_dma, unsigned int sectors)
1686 {
1687 	struct host_to_dev_fis fis;
1688 
1689 	memset(&fis, 0, sizeof(struct host_to_dev_fis));
1690 	fis.type	= 0x27;
1691 	fis.opts	= 1 << 7;
1692 	fis.command	= ATA_CMD_READ_LOG_EXT;
1693 	fis.sect_count	= sectors & 0xFF;
1694 	fis.sect_cnt_ex	= (sectors >> 8) & 0xFF;
1695 	fis.lba_low	= page;
1696 	fis.lba_mid	= 0;
1697 	fis.device	= ATA_DEVICE_OBS;
1698 
1699 	memset(buffer, 0, sectors * ATA_SECT_SIZE);
1700 
1701 	return mtip_exec_internal_command(port,
1702 					&fis,
1703 					5,
1704 					buffer_dma,
1705 					sectors * ATA_SECT_SIZE,
1706 					0,
1707 					GFP_ATOMIC,
1708 					MTIP_INTERNAL_COMMAND_TIMEOUT_MS);
1709 }
1710 
1711 /*
1712  * Issue a SMART READ DATA command to the device.
1713  *
1714  * @port	pointer to the port structure.
1715  * @buffer	pointer to buffer
1716  * @buffer_dma	dma address corresponding to @buffer
1717  *
1718  * return value
1719  *	@rv	return value from mtip_exec_internal_command()
1720  */
1721 static int mtip_get_smart_data(struct mtip_port *port, u8 *buffer,
1722 					dma_addr_t buffer_dma)
1723 {
1724 	struct host_to_dev_fis fis;
1725 
1726 	memset(&fis, 0, sizeof(struct host_to_dev_fis));
1727 	fis.type	= 0x27;
1728 	fis.opts	= 1 << 7;
1729 	fis.command	= ATA_CMD_SMART;
1730 	fis.features	= 0xD0;
1731 	fis.sect_count	= 1;
1732 	fis.lba_mid	= 0x4F;
1733 	fis.lba_hi	= 0xC2;
1734 	fis.device	= ATA_DEVICE_OBS;
1735 
1736 	return mtip_exec_internal_command(port,
1737 					&fis,
1738 					5,
1739 					buffer_dma,
1740 					ATA_SECT_SIZE,
1741 					0,
1742 					GFP_ATOMIC,
1743 					15000);
1744 }
1745 
1746 /*
1747  * Get the value of a smart attribute
1748  *
1749  * @port	pointer to the port structure
1750  * @id		attribute number
1751  * @attrib	pointer to return attrib information corresponding to @id
1752  *
1753  * return value
1754  *	-EINVAL	NULL buffer passed or unsupported attribute @id.
1755  *	-EPERM	Identify data not valid, SMART not supported or not enabled
1756  */
1757 static int mtip_get_smart_attr(struct mtip_port *port, unsigned int id,
1758 						struct smart_attr *attrib)
1759 {
1760 	int rv, i;
1761 	struct smart_attr *pattr;
1762 
1763 	if (!attrib)
1764 		return -EINVAL;
1765 
1766 	if (!port->identify_valid) {
1767 		dev_warn(&port->dd->pdev->dev, "IDENTIFY DATA not valid\n");
1768 		return -EPERM;
1769 	}
1770 	if (!(port->identify[82] & 0x1)) {
1771 		dev_warn(&port->dd->pdev->dev, "SMART not supported\n");
1772 		return -EPERM;
1773 	}
1774 	if (!(port->identify[85] & 0x1)) {
1775 		dev_warn(&port->dd->pdev->dev, "SMART not enabled\n");
1776 		return -EPERM;
1777 	}
1778 
1779 	memset(port->smart_buf, 0, ATA_SECT_SIZE);
1780 	rv = mtip_get_smart_data(port, port->smart_buf, port->smart_buf_dma);
1781 	if (rv) {
1782 		dev_warn(&port->dd->pdev->dev, "Failed to ge SMART data\n");
1783 		return rv;
1784 	}
1785 
1786 	pattr = (struct smart_attr *)(port->smart_buf + 2);
1787 	for (i = 0; i < 29; i++, pattr++)
1788 		if (pattr->attr_id == id) {
1789 			memcpy(attrib, pattr, sizeof(struct smart_attr));
1790 			break;
1791 		}
1792 
1793 	if (i == 29) {
1794 		dev_warn(&port->dd->pdev->dev,
1795 			"Query for invalid SMART attribute ID\n");
1796 		rv = -EINVAL;
1797 	}
1798 
1799 	return rv;
1800 }
1801 
1802 /*
1803  * Trim unused sectors
1804  *
1805  * @dd		pointer to driver_data structure
1806  * @lba		starting lba
1807  * @len		# of 512b sectors to trim
1808  *
1809  * return value
1810  *      -ENOMEM		Out of dma memory
1811  *      -EINVAL		Invalid parameters passed in, trim not supported
1812  *      -EIO		Error submitting trim request to hw
1813  */
1814 static int mtip_send_trim(struct driver_data *dd, unsigned int lba,
1815 				unsigned int len)
1816 {
1817 	int i, rv = 0;
1818 	u64 tlba, tlen, sect_left;
1819 	struct mtip_trim_entry *buf;
1820 	dma_addr_t dma_addr;
1821 	struct host_to_dev_fis fis;
1822 
1823 	if (!len || dd->trim_supp == false)
1824 		return -EINVAL;
1825 
1826 	/* Trim request too big */
1827 	WARN_ON(len > (MTIP_MAX_TRIM_ENTRY_LEN * MTIP_MAX_TRIM_ENTRIES));
1828 
1829 	/* Trim request not aligned on 4k boundary */
1830 	WARN_ON(len % 8 != 0);
1831 
1832 	/* Warn if vu_trim structure is too big */
1833 	WARN_ON(sizeof(struct mtip_trim) > ATA_SECT_SIZE);
1834 
1835 	/* Allocate a DMA buffer for the trim structure */
1836 	buf = dmam_alloc_coherent(&dd->pdev->dev, ATA_SECT_SIZE, &dma_addr,
1837 								GFP_KERNEL);
1838 	if (!buf)
1839 		return -ENOMEM;
1840 	memset(buf, 0, ATA_SECT_SIZE);
1841 
1842 	for (i = 0, sect_left = len, tlba = lba;
1843 			i < MTIP_MAX_TRIM_ENTRIES && sect_left;
1844 			i++) {
1845 		tlen = (sect_left >= MTIP_MAX_TRIM_ENTRY_LEN ?
1846 					MTIP_MAX_TRIM_ENTRY_LEN :
1847 					sect_left);
1848 		buf[i].lba = __force_bit2int cpu_to_le32(tlba);
1849 		buf[i].range = __force_bit2int cpu_to_le16(tlen);
1850 		tlba += tlen;
1851 		sect_left -= tlen;
1852 	}
1853 	WARN_ON(sect_left != 0);
1854 
1855 	/* Build the fis */
1856 	memset(&fis, 0, sizeof(struct host_to_dev_fis));
1857 	fis.type       = 0x27;
1858 	fis.opts       = 1 << 7;
1859 	fis.command    = 0xfb;
1860 	fis.features   = 0x60;
1861 	fis.sect_count = 1;
1862 	fis.device     = ATA_DEVICE_OBS;
1863 
1864 	if (mtip_exec_internal_command(dd->port,
1865 					&fis,
1866 					5,
1867 					dma_addr,
1868 					ATA_SECT_SIZE,
1869 					0,
1870 					GFP_KERNEL,
1871 					MTIP_TRIM_TIMEOUT_MS) < 0)
1872 		rv = -EIO;
1873 
1874 	dmam_free_coherent(&dd->pdev->dev, ATA_SECT_SIZE, buf, dma_addr);
1875 	return rv;
1876 }
1877 
1878 /*
1879  * Get the drive capacity.
1880  *
1881  * @dd      Pointer to the device data structure.
1882  * @sectors Pointer to the variable that will receive the sector count.
1883  *
1884  * return value
1885  *	1 Capacity was returned successfully.
1886  *	0 The identify information is invalid.
1887  */
1888 static bool mtip_hw_get_capacity(struct driver_data *dd, sector_t *sectors)
1889 {
1890 	struct mtip_port *port = dd->port;
1891 	u64 total, raw0, raw1, raw2, raw3;
1892 	raw0 = port->identify[100];
1893 	raw1 = port->identify[101];
1894 	raw2 = port->identify[102];
1895 	raw3 = port->identify[103];
1896 	total = raw0 | raw1<<16 | raw2<<32 | raw3<<48;
1897 	*sectors = total;
1898 	return (bool) !!port->identify_valid;
1899 }
1900 
1901 /*
1902  * Display the identify command data.
1903  *
1904  * @port Pointer to the port data structure.
1905  *
1906  * return value
1907  *	None
1908  */
1909 static void mtip_dump_identify(struct mtip_port *port)
1910 {
1911 	sector_t sectors;
1912 	unsigned short revid;
1913 	char cbuf[42];
1914 
1915 	if (!port->identify_valid)
1916 		return;
1917 
1918 	strlcpy(cbuf, (char *)(port->identify+10), 21);
1919 	dev_info(&port->dd->pdev->dev,
1920 		"Serial No.: %s\n", cbuf);
1921 
1922 	strlcpy(cbuf, (char *)(port->identify+23), 9);
1923 	dev_info(&port->dd->pdev->dev,
1924 		"Firmware Ver.: %s\n", cbuf);
1925 
1926 	strlcpy(cbuf, (char *)(port->identify+27), 41);
1927 	dev_info(&port->dd->pdev->dev, "Model: %s\n", cbuf);
1928 
1929 	dev_info(&port->dd->pdev->dev, "Security: %04x %s\n",
1930 		port->identify[128],
1931 		port->identify[128] & 0x4 ? "(LOCKED)" : "");
1932 
1933 	if (mtip_hw_get_capacity(port->dd, &sectors))
1934 		dev_info(&port->dd->pdev->dev,
1935 			"Capacity: %llu sectors (%llu MB)\n",
1936 			 (u64)sectors,
1937 			 ((u64)sectors) * ATA_SECT_SIZE >> 20);
1938 
1939 	pci_read_config_word(port->dd->pdev, PCI_REVISION_ID, &revid);
1940 	switch (revid & 0xFF) {
1941 	case 0x1:
1942 		strlcpy(cbuf, "A0", 3);
1943 		break;
1944 	case 0x3:
1945 		strlcpy(cbuf, "A2", 3);
1946 		break;
1947 	default:
1948 		strlcpy(cbuf, "?", 2);
1949 		break;
1950 	}
1951 	dev_info(&port->dd->pdev->dev,
1952 		"Card Type: %s\n", cbuf);
1953 }
1954 
1955 /*
1956  * Map the commands scatter list into the command table.
1957  *
1958  * @command Pointer to the command.
1959  * @nents Number of scatter list entries.
1960  *
1961  * return value
1962  *	None
1963  */
1964 static inline void fill_command_sg(struct driver_data *dd,
1965 				struct mtip_cmd *command,
1966 				int nents)
1967 {
1968 	int n;
1969 	unsigned int dma_len;
1970 	struct mtip_cmd_sg *command_sg;
1971 	struct scatterlist *sg = command->sg;
1972 
1973 	command_sg = command->command + AHCI_CMD_TBL_HDR_SZ;
1974 
1975 	for (n = 0; n < nents; n++) {
1976 		dma_len = sg_dma_len(sg);
1977 		if (dma_len > 0x400000)
1978 			dev_err(&dd->pdev->dev,
1979 				"DMA segment length truncated\n");
1980 		command_sg->info = __force_bit2int
1981 			cpu_to_le32((dma_len-1) & 0x3FFFFF);
1982 		command_sg->dba	= __force_bit2int
1983 			cpu_to_le32(sg_dma_address(sg));
1984 		command_sg->dba_upper = __force_bit2int
1985 			cpu_to_le32((sg_dma_address(sg) >> 16) >> 16);
1986 		command_sg++;
1987 		sg++;
1988 	}
1989 }
1990 
1991 /*
1992  * @brief Execute a drive command.
1993  *
1994  * return value 0 The command completed successfully.
1995  * return value -1 An error occurred while executing the command.
1996  */
1997 static int exec_drive_task(struct mtip_port *port, u8 *command)
1998 {
1999 	struct host_to_dev_fis	fis;
2000 	struct host_to_dev_fis *reply = (port->rxfis + RX_FIS_D2H_REG);
2001 
2002 	/* Build the FIS. */
2003 	memset(&fis, 0, sizeof(struct host_to_dev_fis));
2004 	fis.type	= 0x27;
2005 	fis.opts	= 1 << 7;
2006 	fis.command	= command[0];
2007 	fis.features	= command[1];
2008 	fis.sect_count	= command[2];
2009 	fis.sector	= command[3];
2010 	fis.cyl_low	= command[4];
2011 	fis.cyl_hi	= command[5];
2012 	fis.device	= command[6] & ~0x10; /* Clear the dev bit*/
2013 
2014 	dbg_printk(MTIP_DRV_NAME " %s: User Command: cmd %x, feat %x, nsect %x, sect %x, lcyl %x, hcyl %x, sel %x\n",
2015 		__func__,
2016 		command[0],
2017 		command[1],
2018 		command[2],
2019 		command[3],
2020 		command[4],
2021 		command[5],
2022 		command[6]);
2023 
2024 	/* Execute the command. */
2025 	if (mtip_exec_internal_command(port,
2026 				 &fis,
2027 				 5,
2028 				 0,
2029 				 0,
2030 				 0,
2031 				 GFP_KERNEL,
2032 				 MTIP_IOCTL_COMMAND_TIMEOUT_MS) < 0) {
2033 		return -1;
2034 	}
2035 
2036 	command[0] = reply->command; /* Status*/
2037 	command[1] = reply->features; /* Error*/
2038 	command[4] = reply->cyl_low;
2039 	command[5] = reply->cyl_hi;
2040 
2041 	dbg_printk(MTIP_DRV_NAME " %s: Completion Status: stat %x, err %x , cyl_lo %x cyl_hi %x\n",
2042 		__func__,
2043 		command[0],
2044 		command[1],
2045 		command[4],
2046 		command[5]);
2047 
2048 	return 0;
2049 }
2050 
2051 /*
2052  * @brief Execute a drive command.
2053  *
2054  * @param port Pointer to the port data structure.
2055  * @param command Pointer to the user specified command parameters.
2056  * @param user_buffer Pointer to the user space buffer where read sector
2057  *                   data should be copied.
2058  *
2059  * return value 0 The command completed successfully.
2060  * return value -EFAULT An error occurred while copying the completion
2061  *                 data to the user space buffer.
2062  * return value -1 An error occurred while executing the command.
2063  */
2064 static int exec_drive_command(struct mtip_port *port, u8 *command,
2065 				void __user *user_buffer)
2066 {
2067 	struct host_to_dev_fis	fis;
2068 	struct host_to_dev_fis *reply;
2069 	u8 *buf = NULL;
2070 	dma_addr_t dma_addr = 0;
2071 	int rv = 0, xfer_sz = command[3];
2072 
2073 	if (xfer_sz) {
2074 		if (!user_buffer)
2075 			return -EFAULT;
2076 
2077 		buf = dmam_alloc_coherent(&port->dd->pdev->dev,
2078 				ATA_SECT_SIZE * xfer_sz,
2079 				&dma_addr,
2080 				GFP_KERNEL);
2081 		if (!buf) {
2082 			dev_err(&port->dd->pdev->dev,
2083 				"Memory allocation failed (%d bytes)\n",
2084 				ATA_SECT_SIZE * xfer_sz);
2085 			return -ENOMEM;
2086 		}
2087 		memset(buf, 0, ATA_SECT_SIZE * xfer_sz);
2088 	}
2089 
2090 	/* Build the FIS. */
2091 	memset(&fis, 0, sizeof(struct host_to_dev_fis));
2092 	fis.type	= 0x27;
2093 	fis.opts	= 1 << 7;
2094 	fis.command	= command[0];
2095 	fis.features	= command[2];
2096 	fis.sect_count	= command[3];
2097 	if (fis.command == ATA_CMD_SMART) {
2098 		fis.sector	= command[1];
2099 		fis.cyl_low	= 0x4F;
2100 		fis.cyl_hi	= 0xC2;
2101 	}
2102 
2103 	if (xfer_sz)
2104 		reply = (port->rxfis + RX_FIS_PIO_SETUP);
2105 	else
2106 		reply = (port->rxfis + RX_FIS_D2H_REG);
2107 
2108 	dbg_printk(MTIP_DRV_NAME
2109 		" %s: User Command: cmd %x, sect %x, "
2110 		"feat %x, sectcnt %x\n",
2111 		__func__,
2112 		command[0],
2113 		command[1],
2114 		command[2],
2115 		command[3]);
2116 
2117 	/* Execute the command. */
2118 	if (mtip_exec_internal_command(port,
2119 				&fis,
2120 				 5,
2121 				 (xfer_sz ? dma_addr : 0),
2122 				 (xfer_sz ? ATA_SECT_SIZE * xfer_sz : 0),
2123 				 0,
2124 				 GFP_KERNEL,
2125 				 MTIP_IOCTL_COMMAND_TIMEOUT_MS)
2126 				 < 0) {
2127 		rv = -EFAULT;
2128 		goto exit_drive_command;
2129 	}
2130 
2131 	/* Collect the completion status. */
2132 	command[0] = reply->command; /* Status*/
2133 	command[1] = reply->features; /* Error*/
2134 	command[2] = reply->sect_count;
2135 
2136 	dbg_printk(MTIP_DRV_NAME
2137 		" %s: Completion Status: stat %x, "
2138 		"err %x, nsect %x\n",
2139 		__func__,
2140 		command[0],
2141 		command[1],
2142 		command[2]);
2143 
2144 	if (xfer_sz) {
2145 		if (copy_to_user(user_buffer,
2146 				 buf,
2147 				 ATA_SECT_SIZE * command[3])) {
2148 			rv = -EFAULT;
2149 			goto exit_drive_command;
2150 		}
2151 	}
2152 exit_drive_command:
2153 	if (buf)
2154 		dmam_free_coherent(&port->dd->pdev->dev,
2155 				ATA_SECT_SIZE * xfer_sz, buf, dma_addr);
2156 	return rv;
2157 }
2158 
2159 /*
2160  *  Indicates whether a command has a single sector payload.
2161  *
2162  *  @command passed to the device to perform the certain event.
2163  *  @features passed to the device to perform the certain event.
2164  *
2165  *  return value
2166  *	1	command is one that always has a single sector payload,
2167  *		regardless of the value in the Sector Count field.
2168  *      0       otherwise
2169  *
2170  */
2171 static unsigned int implicit_sector(unsigned char command,
2172 				    unsigned char features)
2173 {
2174 	unsigned int rv = 0;
2175 
2176 	/* list of commands that have an implicit sector count of 1 */
2177 	switch (command) {
2178 	case ATA_CMD_SEC_SET_PASS:
2179 	case ATA_CMD_SEC_UNLOCK:
2180 	case ATA_CMD_SEC_ERASE_PREP:
2181 	case ATA_CMD_SEC_ERASE_UNIT:
2182 	case ATA_CMD_SEC_FREEZE_LOCK:
2183 	case ATA_CMD_SEC_DISABLE_PASS:
2184 	case ATA_CMD_PMP_READ:
2185 	case ATA_CMD_PMP_WRITE:
2186 		rv = 1;
2187 		break;
2188 	case ATA_CMD_SET_MAX:
2189 		if (features == ATA_SET_MAX_UNLOCK)
2190 			rv = 1;
2191 		break;
2192 	case ATA_CMD_SMART:
2193 		if ((features == ATA_SMART_READ_VALUES) ||
2194 				(features == ATA_SMART_READ_THRESHOLDS))
2195 			rv = 1;
2196 		break;
2197 	case ATA_CMD_CONF_OVERLAY:
2198 		if ((features == ATA_DCO_IDENTIFY) ||
2199 				(features == ATA_DCO_SET))
2200 			rv = 1;
2201 		break;
2202 	}
2203 	return rv;
2204 }
2205 static void mtip_set_timeout(struct driver_data *dd,
2206 					struct host_to_dev_fis *fis,
2207 					unsigned int *timeout, u8 erasemode)
2208 {
2209 	switch (fis->command) {
2210 	case ATA_CMD_DOWNLOAD_MICRO:
2211 		*timeout = 120000; /* 2 minutes */
2212 		break;
2213 	case ATA_CMD_SEC_ERASE_UNIT:
2214 	case 0xFC:
2215 		if (erasemode)
2216 			*timeout = ((*(dd->port->identify + 90) * 2) * 60000);
2217 		else
2218 			*timeout = ((*(dd->port->identify + 89) * 2) * 60000);
2219 		break;
2220 	case ATA_CMD_STANDBYNOW1:
2221 		*timeout = 120000;  /* 2 minutes */
2222 		break;
2223 	case 0xF7:
2224 	case 0xFA:
2225 		*timeout = 60000;  /* 60 seconds */
2226 		break;
2227 	case ATA_CMD_SMART:
2228 		*timeout = 15000;  /* 15 seconds */
2229 		break;
2230 	default:
2231 		*timeout = MTIP_IOCTL_COMMAND_TIMEOUT_MS;
2232 		break;
2233 	}
2234 }
2235 
2236 /*
2237  * Executes a taskfile
2238  * See ide_taskfile_ioctl() for derivation
2239  */
2240 static int exec_drive_taskfile(struct driver_data *dd,
2241 			       void __user *buf,
2242 			       ide_task_request_t *req_task,
2243 			       int outtotal)
2244 {
2245 	struct host_to_dev_fis	fis;
2246 	struct host_to_dev_fis *reply;
2247 	u8 *outbuf = NULL;
2248 	u8 *inbuf = NULL;
2249 	dma_addr_t outbuf_dma = 0;
2250 	dma_addr_t inbuf_dma = 0;
2251 	dma_addr_t dma_buffer = 0;
2252 	int err = 0;
2253 	unsigned int taskin = 0;
2254 	unsigned int taskout = 0;
2255 	u8 nsect = 0;
2256 	unsigned int timeout;
2257 	unsigned int force_single_sector;
2258 	unsigned int transfer_size;
2259 	unsigned long task_file_data;
2260 	int intotal = outtotal + req_task->out_size;
2261 	int erasemode = 0;
2262 
2263 	taskout = req_task->out_size;
2264 	taskin = req_task->in_size;
2265 	/* 130560 = 512 * 0xFF*/
2266 	if (taskin > 130560 || taskout > 130560) {
2267 		err = -EINVAL;
2268 		goto abort;
2269 	}
2270 
2271 	if (taskout) {
2272 		outbuf = kzalloc(taskout, GFP_KERNEL);
2273 		if (outbuf == NULL) {
2274 			err = -ENOMEM;
2275 			goto abort;
2276 		}
2277 		if (copy_from_user(outbuf, buf + outtotal, taskout)) {
2278 			err = -EFAULT;
2279 			goto abort;
2280 		}
2281 		outbuf_dma = pci_map_single(dd->pdev,
2282 					 outbuf,
2283 					 taskout,
2284 					 DMA_TO_DEVICE);
2285 		if (outbuf_dma == 0) {
2286 			err = -ENOMEM;
2287 			goto abort;
2288 		}
2289 		dma_buffer = outbuf_dma;
2290 	}
2291 
2292 	if (taskin) {
2293 		inbuf = kzalloc(taskin, GFP_KERNEL);
2294 		if (inbuf == NULL) {
2295 			err = -ENOMEM;
2296 			goto abort;
2297 		}
2298 
2299 		if (copy_from_user(inbuf, buf + intotal, taskin)) {
2300 			err = -EFAULT;
2301 			goto abort;
2302 		}
2303 		inbuf_dma = pci_map_single(dd->pdev,
2304 					 inbuf,
2305 					 taskin, DMA_FROM_DEVICE);
2306 		if (inbuf_dma == 0) {
2307 			err = -ENOMEM;
2308 			goto abort;
2309 		}
2310 		dma_buffer = inbuf_dma;
2311 	}
2312 
2313 	/* only supports PIO and non-data commands from this ioctl. */
2314 	switch (req_task->data_phase) {
2315 	case TASKFILE_OUT:
2316 		nsect = taskout / ATA_SECT_SIZE;
2317 		reply = (dd->port->rxfis + RX_FIS_PIO_SETUP);
2318 		break;
2319 	case TASKFILE_IN:
2320 		reply = (dd->port->rxfis + RX_FIS_PIO_SETUP);
2321 		break;
2322 	case TASKFILE_NO_DATA:
2323 		reply = (dd->port->rxfis + RX_FIS_D2H_REG);
2324 		break;
2325 	default:
2326 		err = -EINVAL;
2327 		goto abort;
2328 	}
2329 
2330 	/* Build the FIS. */
2331 	memset(&fis, 0, sizeof(struct host_to_dev_fis));
2332 
2333 	fis.type	= 0x27;
2334 	fis.opts	= 1 << 7;
2335 	fis.command	= req_task->io_ports[7];
2336 	fis.features	= req_task->io_ports[1];
2337 	fis.sect_count	= req_task->io_ports[2];
2338 	fis.lba_low	= req_task->io_ports[3];
2339 	fis.lba_mid	= req_task->io_ports[4];
2340 	fis.lba_hi	= req_task->io_ports[5];
2341 	 /* Clear the dev bit*/
2342 	fis.device	= req_task->io_ports[6] & ~0x10;
2343 
2344 	if ((req_task->in_flags.all == 0) && (req_task->out_flags.all & 1)) {
2345 		req_task->in_flags.all	=
2346 			IDE_TASKFILE_STD_IN_FLAGS |
2347 			(IDE_HOB_STD_IN_FLAGS << 8);
2348 		fis.lba_low_ex		= req_task->hob_ports[3];
2349 		fis.lba_mid_ex		= req_task->hob_ports[4];
2350 		fis.lba_hi_ex		= req_task->hob_ports[5];
2351 		fis.features_ex		= req_task->hob_ports[1];
2352 		fis.sect_cnt_ex		= req_task->hob_ports[2];
2353 
2354 	} else {
2355 		req_task->in_flags.all = IDE_TASKFILE_STD_IN_FLAGS;
2356 	}
2357 
2358 	force_single_sector = implicit_sector(fis.command, fis.features);
2359 
2360 	if ((taskin || taskout) && (!fis.sect_count)) {
2361 		if (nsect)
2362 			fis.sect_count = nsect;
2363 		else {
2364 			if (!force_single_sector) {
2365 				dev_warn(&dd->pdev->dev,
2366 					"data movement but "
2367 					"sect_count is 0\n");
2368 					err = -EINVAL;
2369 					goto abort;
2370 			}
2371 		}
2372 	}
2373 
2374 	dbg_printk(MTIP_DRV_NAME
2375 		" %s: cmd %x, feat %x, nsect %x,"
2376 		" sect/lbal %x, lcyl/lbam %x, hcyl/lbah %x,"
2377 		" head/dev %x\n",
2378 		__func__,
2379 		fis.command,
2380 		fis.features,
2381 		fis.sect_count,
2382 		fis.lba_low,
2383 		fis.lba_mid,
2384 		fis.lba_hi,
2385 		fis.device);
2386 
2387 	/* check for erase mode support during secure erase.*/
2388 	if ((fis.command == ATA_CMD_SEC_ERASE_UNIT) && outbuf &&
2389 					(outbuf[0] & MTIP_SEC_ERASE_MODE)) {
2390 		erasemode = 1;
2391 	}
2392 
2393 	mtip_set_timeout(dd, &fis, &timeout, erasemode);
2394 
2395 	/* Determine the correct transfer size.*/
2396 	if (force_single_sector)
2397 		transfer_size = ATA_SECT_SIZE;
2398 	else
2399 		transfer_size = ATA_SECT_SIZE * fis.sect_count;
2400 
2401 	/* Execute the command.*/
2402 	if (mtip_exec_internal_command(dd->port,
2403 				 &fis,
2404 				 5,
2405 				 dma_buffer,
2406 				 transfer_size,
2407 				 0,
2408 				 GFP_KERNEL,
2409 				 timeout) < 0) {
2410 		err = -EIO;
2411 		goto abort;
2412 	}
2413 
2414 	task_file_data = readl(dd->port->mmio+PORT_TFDATA);
2415 
2416 	if ((req_task->data_phase == TASKFILE_IN) && !(task_file_data & 1)) {
2417 		reply = dd->port->rxfis + RX_FIS_PIO_SETUP;
2418 		req_task->io_ports[7] = reply->control;
2419 	} else {
2420 		reply = dd->port->rxfis + RX_FIS_D2H_REG;
2421 		req_task->io_ports[7] = reply->command;
2422 	}
2423 
2424 	/* reclaim the DMA buffers.*/
2425 	if (inbuf_dma)
2426 		pci_unmap_single(dd->pdev, inbuf_dma,
2427 			taskin, DMA_FROM_DEVICE);
2428 	if (outbuf_dma)
2429 		pci_unmap_single(dd->pdev, outbuf_dma,
2430 			taskout, DMA_TO_DEVICE);
2431 	inbuf_dma  = 0;
2432 	outbuf_dma = 0;
2433 
2434 	/* return the ATA registers to the caller.*/
2435 	req_task->io_ports[1] = reply->features;
2436 	req_task->io_ports[2] = reply->sect_count;
2437 	req_task->io_ports[3] = reply->lba_low;
2438 	req_task->io_ports[4] = reply->lba_mid;
2439 	req_task->io_ports[5] = reply->lba_hi;
2440 	req_task->io_ports[6] = reply->device;
2441 
2442 	if (req_task->out_flags.all & 1)  {
2443 
2444 		req_task->hob_ports[3] = reply->lba_low_ex;
2445 		req_task->hob_ports[4] = reply->lba_mid_ex;
2446 		req_task->hob_ports[5] = reply->lba_hi_ex;
2447 		req_task->hob_ports[1] = reply->features_ex;
2448 		req_task->hob_ports[2] = reply->sect_cnt_ex;
2449 	}
2450 	dbg_printk(MTIP_DRV_NAME
2451 		" %s: Completion: stat %x,"
2452 		"err %x, sect_cnt %x, lbalo %x,"
2453 		"lbamid %x, lbahi %x, dev %x\n",
2454 		__func__,
2455 		req_task->io_ports[7],
2456 		req_task->io_ports[1],
2457 		req_task->io_ports[2],
2458 		req_task->io_ports[3],
2459 		req_task->io_ports[4],
2460 		req_task->io_ports[5],
2461 		req_task->io_ports[6]);
2462 
2463 	if (taskout) {
2464 		if (copy_to_user(buf + outtotal, outbuf, taskout)) {
2465 			err = -EFAULT;
2466 			goto abort;
2467 		}
2468 	}
2469 	if (taskin) {
2470 		if (copy_to_user(buf + intotal, inbuf, taskin)) {
2471 			err = -EFAULT;
2472 			goto abort;
2473 		}
2474 	}
2475 abort:
2476 	if (inbuf_dma)
2477 		pci_unmap_single(dd->pdev, inbuf_dma,
2478 					taskin, DMA_FROM_DEVICE);
2479 	if (outbuf_dma)
2480 		pci_unmap_single(dd->pdev, outbuf_dma,
2481 					taskout, DMA_TO_DEVICE);
2482 	kfree(outbuf);
2483 	kfree(inbuf);
2484 
2485 	return err;
2486 }
2487 
2488 /*
2489  * Handle IOCTL calls from the Block Layer.
2490  *
2491  * This function is called by the Block Layer when it receives an IOCTL
2492  * command that it does not understand. If the IOCTL command is not supported
2493  * this function returns -ENOTTY.
2494  *
2495  * @dd  Pointer to the driver data structure.
2496  * @cmd IOCTL command passed from the Block Layer.
2497  * @arg IOCTL argument passed from the Block Layer.
2498  *
2499  * return value
2500  *	0	The IOCTL completed successfully.
2501  *	-ENOTTY The specified command is not supported.
2502  *	-EFAULT An error occurred copying data to a user space buffer.
2503  *	-EIO	An error occurred while executing the command.
2504  */
2505 static int mtip_hw_ioctl(struct driver_data *dd, unsigned int cmd,
2506 			 unsigned long arg)
2507 {
2508 	switch (cmd) {
2509 	case HDIO_GET_IDENTITY:
2510 	{
2511 		if (copy_to_user((void __user *)arg, dd->port->identify,
2512 						sizeof(u16) * ATA_ID_WORDS))
2513 			return -EFAULT;
2514 		break;
2515 	}
2516 	case HDIO_DRIVE_CMD:
2517 	{
2518 		u8 drive_command[4];
2519 
2520 		/* Copy the user command info to our buffer. */
2521 		if (copy_from_user(drive_command,
2522 					 (void __user *) arg,
2523 					 sizeof(drive_command)))
2524 			return -EFAULT;
2525 
2526 		/* Execute the drive command. */
2527 		if (exec_drive_command(dd->port,
2528 					 drive_command,
2529 					 (void __user *) (arg+4)))
2530 			return -EIO;
2531 
2532 		/* Copy the status back to the users buffer. */
2533 		if (copy_to_user((void __user *) arg,
2534 					 drive_command,
2535 					 sizeof(drive_command)))
2536 			return -EFAULT;
2537 
2538 		break;
2539 	}
2540 	case HDIO_DRIVE_TASK:
2541 	{
2542 		u8 drive_command[7];
2543 
2544 		/* Copy the user command info to our buffer. */
2545 		if (copy_from_user(drive_command,
2546 					 (void __user *) arg,
2547 					 sizeof(drive_command)))
2548 			return -EFAULT;
2549 
2550 		/* Execute the drive command. */
2551 		if (exec_drive_task(dd->port, drive_command))
2552 			return -EIO;
2553 
2554 		/* Copy the status back to the users buffer. */
2555 		if (copy_to_user((void __user *) arg,
2556 					 drive_command,
2557 					 sizeof(drive_command)))
2558 			return -EFAULT;
2559 
2560 		break;
2561 	}
2562 	case HDIO_DRIVE_TASKFILE: {
2563 		ide_task_request_t req_task;
2564 		int ret, outtotal;
2565 
2566 		if (copy_from_user(&req_task, (void __user *) arg,
2567 					sizeof(req_task)))
2568 			return -EFAULT;
2569 
2570 		outtotal = sizeof(req_task);
2571 
2572 		ret = exec_drive_taskfile(dd, (void __user *) arg,
2573 						&req_task, outtotal);
2574 
2575 		if (copy_to_user((void __user *) arg, &req_task,
2576 							sizeof(req_task)))
2577 			return -EFAULT;
2578 
2579 		return ret;
2580 	}
2581 
2582 	default:
2583 		return -EINVAL;
2584 	}
2585 	return 0;
2586 }
2587 
2588 /*
2589  * Submit an IO to the hw
2590  *
2591  * This function is called by the block layer to issue an io
2592  * to the device. Upon completion, the callback function will
2593  * be called with the data parameter passed as the callback data.
2594  *
2595  * @dd       Pointer to the driver data structure.
2596  * @start    First sector to read.
2597  * @nsect    Number of sectors to read.
2598  * @nents    Number of entries in scatter list for the read command.
2599  * @tag      The tag of this read command.
2600  * @callback Pointer to the function that should be called
2601  *	     when the read completes.
2602  * @data     Callback data passed to the callback function
2603  *	     when the read completes.
2604  * @dir      Direction (read or write)
2605  *
2606  * return value
2607  *	None
2608  */
2609 static void mtip_hw_submit_io(struct driver_data *dd, sector_t sector,
2610 			      int nsect, int nents, int tag, void *callback,
2611 			      void *data, int dir, int unaligned)
2612 {
2613 	struct host_to_dev_fis	*fis;
2614 	struct mtip_port *port = dd->port;
2615 	struct mtip_cmd *command = &port->commands[tag];
2616 	int dma_dir = (dir == READ) ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
2617 	u64 start = sector;
2618 
2619 	/* Map the scatter list for DMA access */
2620 	nents = dma_map_sg(&dd->pdev->dev, command->sg, nents, dma_dir);
2621 
2622 	command->scatter_ents = nents;
2623 
2624 	command->unaligned = unaligned;
2625 	/*
2626 	 * The number of retries for this command before it is
2627 	 * reported as a failure to the upper layers.
2628 	 */
2629 	command->retries = MTIP_MAX_RETRIES;
2630 
2631 	/* Fill out fis */
2632 	fis = command->command;
2633 	fis->type        = 0x27;
2634 	fis->opts        = 1 << 7;
2635 	fis->command     =
2636 		(dir == READ ? ATA_CMD_FPDMA_READ : ATA_CMD_FPDMA_WRITE);
2637 	fis->lba_low     = start & 0xFF;
2638 	fis->lba_mid     = (start >> 8) & 0xFF;
2639 	fis->lba_hi      = (start >> 16) & 0xFF;
2640 	fis->lba_low_ex  = (start >> 24) & 0xFF;
2641 	fis->lba_mid_ex  = (start >> 32) & 0xFF;
2642 	fis->lba_hi_ex   = (start >> 40) & 0xFF;
2643 	fis->device	 = 1 << 6;
2644 	fis->features    = nsect & 0xFF;
2645 	fis->features_ex = (nsect >> 8) & 0xFF;
2646 	fis->sect_count  = ((tag << 3) | (tag >> 5));
2647 	fis->sect_cnt_ex = 0;
2648 	fis->control     = 0;
2649 	fis->res2        = 0;
2650 	fis->res3        = 0;
2651 	fill_command_sg(dd, command, nents);
2652 
2653 	if (unaligned)
2654 		fis->device |= 1 << 7;
2655 
2656 	/* Populate the command header */
2657 	command->command_header->opts =
2658 			__force_bit2int cpu_to_le32(
2659 				(nents << 16) | 5 | AHCI_CMD_PREFETCH);
2660 	command->command_header->byte_count = 0;
2661 
2662 	/*
2663 	 * Set the completion function and data for the command
2664 	 * within this layer.
2665 	 */
2666 	command->comp_data = dd;
2667 	command->comp_func = mtip_async_complete;
2668 	command->direction = dma_dir;
2669 
2670 	/*
2671 	 * Set the completion function and data for the command passed
2672 	 * from the upper layer.
2673 	 */
2674 	command->async_data = data;
2675 	command->async_callback = callback;
2676 
2677 	/*
2678 	 * To prevent this command from being issued
2679 	 * if an internal command is in progress or error handling is active.
2680 	 */
2681 	if (port->flags & MTIP_PF_PAUSE_IO) {
2682 		set_bit(tag, port->cmds_to_issue);
2683 		set_bit(MTIP_PF_ISSUE_CMDS_BIT, &port->flags);
2684 		return;
2685 	}
2686 
2687 	/* Issue the command to the hardware */
2688 	mtip_issue_ncq_command(port, tag);
2689 
2690 	return;
2691 }
2692 
2693 /*
2694  * Release a command slot.
2695  *
2696  * @dd  Pointer to the driver data structure.
2697  * @tag Slot tag
2698  *
2699  * return value
2700  *      None
2701  */
2702 static void mtip_hw_release_scatterlist(struct driver_data *dd, int tag,
2703 								int unaligned)
2704 {
2705 	struct semaphore *sem = unaligned ? &dd->port->cmd_slot_unal :
2706 							&dd->port->cmd_slot;
2707 	release_slot(dd->port, tag);
2708 	up(sem);
2709 }
2710 
2711 /*
2712  * Obtain a command slot and return its associated scatter list.
2713  *
2714  * @dd  Pointer to the driver data structure.
2715  * @tag Pointer to an int that will receive the allocated command
2716  *            slot tag.
2717  *
2718  * return value
2719  *	Pointer to the scatter list for the allocated command slot
2720  *	or NULL if no command slots are available.
2721  */
2722 static struct scatterlist *mtip_hw_get_scatterlist(struct driver_data *dd,
2723 						   int *tag, int unaligned)
2724 {
2725 	struct semaphore *sem = unaligned ? &dd->port->cmd_slot_unal :
2726 							&dd->port->cmd_slot;
2727 
2728 	/*
2729 	 * It is possible that, even with this semaphore, a thread
2730 	 * may think that no command slots are available. Therefore, we
2731 	 * need to make an attempt to get_slot().
2732 	 */
2733 	down(sem);
2734 	*tag = get_slot(dd->port);
2735 
2736 	if (unlikely(test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag))) {
2737 		up(sem);
2738 		return NULL;
2739 	}
2740 	if (unlikely(*tag < 0)) {
2741 		up(sem);
2742 		return NULL;
2743 	}
2744 
2745 	return dd->port->commands[*tag].sg;
2746 }
2747 
2748 /*
2749  * Sysfs status dump.
2750  *
2751  * @dev  Pointer to the device structure, passed by the kernrel.
2752  * @attr Pointer to the device_attribute structure passed by the kernel.
2753  * @buf  Pointer to the char buffer that will receive the stats info.
2754  *
2755  * return value
2756  *	The size, in bytes, of the data copied into buf.
2757  */
2758 static ssize_t mtip_hw_show_status(struct device *dev,
2759 				struct device_attribute *attr,
2760 				char *buf)
2761 {
2762 	struct driver_data *dd = dev_to_disk(dev)->private_data;
2763 	int size = 0;
2764 
2765 	if (test_bit(MTIP_DDF_OVER_TEMP_BIT, &dd->dd_flag))
2766 		size += sprintf(buf, "%s", "thermal_shutdown\n");
2767 	else if (test_bit(MTIP_DDF_WRITE_PROTECT_BIT, &dd->dd_flag))
2768 		size += sprintf(buf, "%s", "write_protect\n");
2769 	else
2770 		size += sprintf(buf, "%s", "online\n");
2771 
2772 	return size;
2773 }
2774 
2775 static DEVICE_ATTR(status, S_IRUGO, mtip_hw_show_status, NULL);
2776 
2777 /* debugsfs entries */
2778 
2779 static ssize_t show_device_status(struct device_driver *drv, char *buf)
2780 {
2781 	int size = 0;
2782 	struct driver_data *dd, *tmp;
2783 	unsigned long flags;
2784 	char id_buf[42];
2785 	u16 status = 0;
2786 
2787 	spin_lock_irqsave(&dev_lock, flags);
2788 	size += sprintf(&buf[size], "Devices Present:\n");
2789 	list_for_each_entry_safe(dd, tmp, &online_list, online_list) {
2790 		if (dd->pdev) {
2791 			if (dd->port &&
2792 			    dd->port->identify &&
2793 			    dd->port->identify_valid) {
2794 				strlcpy(id_buf,
2795 					(char *) (dd->port->identify + 10), 21);
2796 				status = *(dd->port->identify + 141);
2797 			} else {
2798 				memset(id_buf, 0, 42);
2799 				status = 0;
2800 			}
2801 
2802 			if (dd->port &&
2803 			    test_bit(MTIP_PF_REBUILD_BIT, &dd->port->flags)) {
2804 				size += sprintf(&buf[size],
2805 					" device %s %s (ftl rebuild %d %%)\n",
2806 					dev_name(&dd->pdev->dev),
2807 					id_buf,
2808 					status);
2809 			} else {
2810 				size += sprintf(&buf[size],
2811 					" device %s %s\n",
2812 					dev_name(&dd->pdev->dev),
2813 					id_buf);
2814 			}
2815 		}
2816 	}
2817 
2818 	size += sprintf(&buf[size], "Devices Being Removed:\n");
2819 	list_for_each_entry_safe(dd, tmp, &removing_list, remove_list) {
2820 		if (dd->pdev) {
2821 			if (dd->port &&
2822 			    dd->port->identify &&
2823 			    dd->port->identify_valid) {
2824 				strlcpy(id_buf,
2825 					(char *) (dd->port->identify+10), 21);
2826 				status = *(dd->port->identify + 141);
2827 			} else {
2828 				memset(id_buf, 0, 42);
2829 				status = 0;
2830 			}
2831 
2832 			if (dd->port &&
2833 			    test_bit(MTIP_PF_REBUILD_BIT, &dd->port->flags)) {
2834 				size += sprintf(&buf[size],
2835 					" device %s %s (ftl rebuild %d %%)\n",
2836 					dev_name(&dd->pdev->dev),
2837 					id_buf,
2838 					status);
2839 			} else {
2840 				size += sprintf(&buf[size],
2841 					" device %s %s\n",
2842 					dev_name(&dd->pdev->dev),
2843 					id_buf);
2844 			}
2845 		}
2846 	}
2847 	spin_unlock_irqrestore(&dev_lock, flags);
2848 
2849 	return size;
2850 }
2851 
2852 static ssize_t mtip_hw_read_device_status(struct file *f, char __user *ubuf,
2853 						size_t len, loff_t *offset)
2854 {
2855 	struct driver_data *dd =  (struct driver_data *)f->private_data;
2856 	int size = *offset;
2857 	char *buf;
2858 	int rv = 0;
2859 
2860 	if (!len || *offset)
2861 		return 0;
2862 
2863 	buf = kzalloc(MTIP_DFS_MAX_BUF_SIZE, GFP_KERNEL);
2864 	if (!buf) {
2865 		dev_err(&dd->pdev->dev,
2866 			"Memory allocation: status buffer\n");
2867 		return -ENOMEM;
2868 	}
2869 
2870 	size += show_device_status(NULL, buf);
2871 
2872 	*offset = size <= len ? size : len;
2873 	size = copy_to_user(ubuf, buf, *offset);
2874 	if (size)
2875 		rv = -EFAULT;
2876 
2877 	kfree(buf);
2878 	return rv ? rv : *offset;
2879 }
2880 
2881 static ssize_t mtip_hw_read_registers(struct file *f, char __user *ubuf,
2882 				  size_t len, loff_t *offset)
2883 {
2884 	struct driver_data *dd =  (struct driver_data *)f->private_data;
2885 	char *buf;
2886 	u32 group_allocated;
2887 	int size = *offset;
2888 	int n, rv = 0;
2889 
2890 	if (!len || size)
2891 		return 0;
2892 
2893 	buf = kzalloc(MTIP_DFS_MAX_BUF_SIZE, GFP_KERNEL);
2894 	if (!buf) {
2895 		dev_err(&dd->pdev->dev,
2896 			"Memory allocation: register buffer\n");
2897 		return -ENOMEM;
2898 	}
2899 
2900 	size += sprintf(&buf[size], "H/ S ACTive      : [ 0x");
2901 
2902 	for (n = dd->slot_groups-1; n >= 0; n--)
2903 		size += sprintf(&buf[size], "%08X ",
2904 					 readl(dd->port->s_active[n]));
2905 
2906 	size += sprintf(&buf[size], "]\n");
2907 	size += sprintf(&buf[size], "H/ Command Issue : [ 0x");
2908 
2909 	for (n = dd->slot_groups-1; n >= 0; n--)
2910 		size += sprintf(&buf[size], "%08X ",
2911 					readl(dd->port->cmd_issue[n]));
2912 
2913 	size += sprintf(&buf[size], "]\n");
2914 	size += sprintf(&buf[size], "H/ Completed     : [ 0x");
2915 
2916 	for (n = dd->slot_groups-1; n >= 0; n--)
2917 		size += sprintf(&buf[size], "%08X ",
2918 				readl(dd->port->completed[n]));
2919 
2920 	size += sprintf(&buf[size], "]\n");
2921 	size += sprintf(&buf[size], "H/ PORT IRQ STAT : [ 0x%08X ]\n",
2922 				readl(dd->port->mmio + PORT_IRQ_STAT));
2923 	size += sprintf(&buf[size], "H/ HOST IRQ STAT : [ 0x%08X ]\n",
2924 				readl(dd->mmio + HOST_IRQ_STAT));
2925 	size += sprintf(&buf[size], "\n");
2926 
2927 	size += sprintf(&buf[size], "L/ Allocated     : [ 0x");
2928 
2929 	for (n = dd->slot_groups-1; n >= 0; n--) {
2930 		if (sizeof(long) > sizeof(u32))
2931 			group_allocated =
2932 				dd->port->allocated[n/2] >> (32*(n&1));
2933 		else
2934 			group_allocated = dd->port->allocated[n];
2935 		size += sprintf(&buf[size], "%08X ", group_allocated);
2936 	}
2937 	size += sprintf(&buf[size], "]\n");
2938 
2939 	size += sprintf(&buf[size], "L/ Commands in Q : [ 0x");
2940 
2941 	for (n = dd->slot_groups-1; n >= 0; n--) {
2942 		if (sizeof(long) > sizeof(u32))
2943 			group_allocated =
2944 				dd->port->cmds_to_issue[n/2] >> (32*(n&1));
2945 		else
2946 			group_allocated = dd->port->cmds_to_issue[n];
2947 		size += sprintf(&buf[size], "%08X ", group_allocated);
2948 	}
2949 	size += sprintf(&buf[size], "]\n");
2950 
2951 	*offset = size <= len ? size : len;
2952 	size = copy_to_user(ubuf, buf, *offset);
2953 	if (size)
2954 		rv = -EFAULT;
2955 
2956 	kfree(buf);
2957 	return rv ? rv : *offset;
2958 }
2959 
2960 static ssize_t mtip_hw_read_flags(struct file *f, char __user *ubuf,
2961 				  size_t len, loff_t *offset)
2962 {
2963 	struct driver_data *dd =  (struct driver_data *)f->private_data;
2964 	char *buf;
2965 	int size = *offset;
2966 	int rv = 0;
2967 
2968 	if (!len || size)
2969 		return 0;
2970 
2971 	buf = kzalloc(MTIP_DFS_MAX_BUF_SIZE, GFP_KERNEL);
2972 	if (!buf) {
2973 		dev_err(&dd->pdev->dev,
2974 			"Memory allocation: flag buffer\n");
2975 		return -ENOMEM;
2976 	}
2977 
2978 	size += sprintf(&buf[size], "Flag-port : [ %08lX ]\n",
2979 							dd->port->flags);
2980 	size += sprintf(&buf[size], "Flag-dd   : [ %08lX ]\n",
2981 							dd->dd_flag);
2982 
2983 	*offset = size <= len ? size : len;
2984 	size = copy_to_user(ubuf, buf, *offset);
2985 	if (size)
2986 		rv = -EFAULT;
2987 
2988 	kfree(buf);
2989 	return rv ? rv : *offset;
2990 }
2991 
2992 static const struct file_operations mtip_device_status_fops = {
2993 	.owner  = THIS_MODULE,
2994 	.open   = simple_open,
2995 	.read   = mtip_hw_read_device_status,
2996 	.llseek = no_llseek,
2997 };
2998 
2999 static const struct file_operations mtip_regs_fops = {
3000 	.owner  = THIS_MODULE,
3001 	.open   = simple_open,
3002 	.read   = mtip_hw_read_registers,
3003 	.llseek = no_llseek,
3004 };
3005 
3006 static const struct file_operations mtip_flags_fops = {
3007 	.owner  = THIS_MODULE,
3008 	.open   = simple_open,
3009 	.read   = mtip_hw_read_flags,
3010 	.llseek = no_llseek,
3011 };
3012 
3013 /*
3014  * Create the sysfs related attributes.
3015  *
3016  * @dd   Pointer to the driver data structure.
3017  * @kobj Pointer to the kobj for the block device.
3018  *
3019  * return value
3020  *	0	Operation completed successfully.
3021  *	-EINVAL Invalid parameter.
3022  */
3023 static int mtip_hw_sysfs_init(struct driver_data *dd, struct kobject *kobj)
3024 {
3025 	if (!kobj || !dd)
3026 		return -EINVAL;
3027 
3028 	if (sysfs_create_file(kobj, &dev_attr_status.attr))
3029 		dev_warn(&dd->pdev->dev,
3030 			"Error creating 'status' sysfs entry\n");
3031 	return 0;
3032 }
3033 
3034 /*
3035  * Remove the sysfs related attributes.
3036  *
3037  * @dd   Pointer to the driver data structure.
3038  * @kobj Pointer to the kobj for the block device.
3039  *
3040  * return value
3041  *	0	Operation completed successfully.
3042  *	-EINVAL Invalid parameter.
3043  */
3044 static int mtip_hw_sysfs_exit(struct driver_data *dd, struct kobject *kobj)
3045 {
3046 	if (!kobj || !dd)
3047 		return -EINVAL;
3048 
3049 	sysfs_remove_file(kobj, &dev_attr_status.attr);
3050 
3051 	return 0;
3052 }
3053 
3054 static int mtip_hw_debugfs_init(struct driver_data *dd)
3055 {
3056 	if (!dfs_parent)
3057 		return -1;
3058 
3059 	dd->dfs_node = debugfs_create_dir(dd->disk->disk_name, dfs_parent);
3060 	if (IS_ERR_OR_NULL(dd->dfs_node)) {
3061 		dev_warn(&dd->pdev->dev,
3062 			"Error creating node %s under debugfs\n",
3063 						dd->disk->disk_name);
3064 		dd->dfs_node = NULL;
3065 		return -1;
3066 	}
3067 
3068 	debugfs_create_file("flags", S_IRUGO, dd->dfs_node, dd,
3069 							&mtip_flags_fops);
3070 	debugfs_create_file("registers", S_IRUGO, dd->dfs_node, dd,
3071 							&mtip_regs_fops);
3072 
3073 	return 0;
3074 }
3075 
3076 static void mtip_hw_debugfs_exit(struct driver_data *dd)
3077 {
3078 	if (dd->dfs_node)
3079 		debugfs_remove_recursive(dd->dfs_node);
3080 }
3081 
3082 static int mtip_free_orphan(struct driver_data *dd)
3083 {
3084 	struct kobject *kobj;
3085 
3086 	if (dd->bdev) {
3087 		if (dd->bdev->bd_holders >= 1)
3088 			return -2;
3089 
3090 		bdput(dd->bdev);
3091 		dd->bdev = NULL;
3092 	}
3093 
3094 	mtip_hw_debugfs_exit(dd);
3095 
3096 	spin_lock(&rssd_index_lock);
3097 	ida_remove(&rssd_index_ida, dd->index);
3098 	spin_unlock(&rssd_index_lock);
3099 
3100 	if (!test_bit(MTIP_DDF_INIT_DONE_BIT, &dd->dd_flag) &&
3101 			test_bit(MTIP_DDF_REBUILD_FAILED_BIT, &dd->dd_flag)) {
3102 		put_disk(dd->disk);
3103 	} else {
3104 		if (dd->disk) {
3105 			kobj = kobject_get(&disk_to_dev(dd->disk)->kobj);
3106 			if (kobj) {
3107 				mtip_hw_sysfs_exit(dd, kobj);
3108 				kobject_put(kobj);
3109 			}
3110 			del_gendisk(dd->disk);
3111 			dd->disk = NULL;
3112 		}
3113 		if (dd->queue) {
3114 			dd->queue->queuedata = NULL;
3115 			blk_cleanup_queue(dd->queue);
3116 			dd->queue = NULL;
3117 		}
3118 	}
3119 	kfree(dd);
3120 	return 0;
3121 }
3122 
3123 /*
3124  * Perform any init/resume time hardware setup
3125  *
3126  * @dd Pointer to the driver data structure.
3127  *
3128  * return value
3129  *	None
3130  */
3131 static inline void hba_setup(struct driver_data *dd)
3132 {
3133 	u32 hwdata;
3134 	hwdata = readl(dd->mmio + HOST_HSORG);
3135 
3136 	/* interrupt bug workaround: use only 1 IS bit.*/
3137 	writel(hwdata |
3138 		HSORG_DISABLE_SLOTGRP_INTR |
3139 		HSORG_DISABLE_SLOTGRP_PXIS,
3140 		dd->mmio + HOST_HSORG);
3141 }
3142 
3143 static int mtip_device_unaligned_constrained(struct driver_data *dd)
3144 {
3145 	return (dd->pdev->device == P420M_DEVICE_ID ? 1 : 0);
3146 }
3147 
3148 /*
3149  * Detect the details of the product, and store anything needed
3150  * into the driver data structure.  This includes product type and
3151  * version and number of slot groups.
3152  *
3153  * @dd Pointer to the driver data structure.
3154  *
3155  * return value
3156  *	None
3157  */
3158 static void mtip_detect_product(struct driver_data *dd)
3159 {
3160 	u32 hwdata;
3161 	unsigned int rev, slotgroups;
3162 
3163 	/*
3164 	 * HBA base + 0xFC [15:0] - vendor-specific hardware interface
3165 	 * info register:
3166 	 * [15:8] hardware/software interface rev#
3167 	 * [   3] asic-style interface
3168 	 * [ 2:0] number of slot groups, minus 1 (only valid for asic-style).
3169 	 */
3170 	hwdata = readl(dd->mmio + HOST_HSORG);
3171 
3172 	dd->product_type = MTIP_PRODUCT_UNKNOWN;
3173 	dd->slot_groups = 1;
3174 
3175 	if (hwdata & 0x8) {
3176 		dd->product_type = MTIP_PRODUCT_ASICFPGA;
3177 		rev = (hwdata & HSORG_HWREV) >> 8;
3178 		slotgroups = (hwdata & HSORG_SLOTGROUPS) + 1;
3179 		dev_info(&dd->pdev->dev,
3180 			"ASIC-FPGA design, HS rev 0x%x, "
3181 			"%i slot groups [%i slots]\n",
3182 			 rev,
3183 			 slotgroups,
3184 			 slotgroups * 32);
3185 
3186 		if (slotgroups > MTIP_MAX_SLOT_GROUPS) {
3187 			dev_warn(&dd->pdev->dev,
3188 				"Warning: driver only supports "
3189 				"%i slot groups.\n", MTIP_MAX_SLOT_GROUPS);
3190 			slotgroups = MTIP_MAX_SLOT_GROUPS;
3191 		}
3192 		dd->slot_groups = slotgroups;
3193 		return;
3194 	}
3195 
3196 	dev_warn(&dd->pdev->dev, "Unrecognized product id\n");
3197 }
3198 
3199 /*
3200  * Blocking wait for FTL rebuild to complete
3201  *
3202  * @dd Pointer to the DRIVER_DATA structure.
3203  *
3204  * return value
3205  *	0	FTL rebuild completed successfully
3206  *	-EFAULT FTL rebuild error/timeout/interruption
3207  */
3208 static int mtip_ftl_rebuild_poll(struct driver_data *dd)
3209 {
3210 	unsigned long timeout, cnt = 0, start;
3211 
3212 	dev_warn(&dd->pdev->dev,
3213 		"FTL rebuild in progress. Polling for completion.\n");
3214 
3215 	start = jiffies;
3216 	timeout = jiffies + msecs_to_jiffies(MTIP_FTL_REBUILD_TIMEOUT_MS);
3217 
3218 	do {
3219 		if (unlikely(test_bit(MTIP_DDF_REMOVE_PENDING_BIT,
3220 				&dd->dd_flag)))
3221 			return -EFAULT;
3222 		if (mtip_check_surprise_removal(dd->pdev))
3223 			return -EFAULT;
3224 
3225 		if (mtip_get_identify(dd->port, NULL) < 0)
3226 			return -EFAULT;
3227 
3228 		if (*(dd->port->identify + MTIP_FTL_REBUILD_OFFSET) ==
3229 			MTIP_FTL_REBUILD_MAGIC) {
3230 			ssleep(1);
3231 			/* Print message every 3 minutes */
3232 			if (cnt++ >= 180) {
3233 				dev_warn(&dd->pdev->dev,
3234 				"FTL rebuild in progress (%d secs).\n",
3235 				jiffies_to_msecs(jiffies - start) / 1000);
3236 				cnt = 0;
3237 			}
3238 		} else {
3239 			dev_warn(&dd->pdev->dev,
3240 				"FTL rebuild complete (%d secs).\n",
3241 			jiffies_to_msecs(jiffies - start) / 1000);
3242 			mtip_block_initialize(dd);
3243 			return 0;
3244 		}
3245 		ssleep(10);
3246 	} while (time_before(jiffies, timeout));
3247 
3248 	/* Check for timeout */
3249 	dev_err(&dd->pdev->dev,
3250 		"Timed out waiting for FTL rebuild to complete (%d secs).\n",
3251 		jiffies_to_msecs(jiffies - start) / 1000);
3252 	return -EFAULT;
3253 }
3254 
3255 /*
3256  * service thread to issue queued commands
3257  *
3258  * @data Pointer to the driver data structure.
3259  *
3260  * return value
3261  *	0
3262  */
3263 
3264 static int mtip_service_thread(void *data)
3265 {
3266 	struct driver_data *dd = (struct driver_data *)data;
3267 	unsigned long slot, slot_start, slot_wrap;
3268 	unsigned int num_cmd_slots = dd->slot_groups * 32;
3269 	struct mtip_port *port = dd->port;
3270 	int ret;
3271 
3272 	while (1) {
3273 		/*
3274 		 * the condition is to check neither an internal command is
3275 		 * is in progress nor error handling is active
3276 		 */
3277 		wait_event_interruptible(port->svc_wait, (port->flags) &&
3278 			!(port->flags & MTIP_PF_PAUSE_IO));
3279 
3280 		if (kthread_should_stop())
3281 			goto st_out;
3282 
3283 		set_bit(MTIP_PF_SVC_THD_ACTIVE_BIT, &port->flags);
3284 
3285 		/* If I am an orphan, start self cleanup */
3286 		if (test_bit(MTIP_PF_SR_CLEANUP_BIT, &port->flags))
3287 			break;
3288 
3289 		if (unlikely(test_bit(MTIP_DDF_REMOVE_PENDING_BIT,
3290 				&dd->dd_flag)))
3291 			goto st_out;
3292 
3293 		if (test_bit(MTIP_PF_ISSUE_CMDS_BIT, &port->flags)) {
3294 			slot = 1;
3295 			/* used to restrict the loop to one iteration */
3296 			slot_start = num_cmd_slots;
3297 			slot_wrap = 0;
3298 			while (1) {
3299 				slot = find_next_bit(port->cmds_to_issue,
3300 						num_cmd_slots, slot);
3301 				if (slot_wrap == 1) {
3302 					if ((slot_start >= slot) ||
3303 						(slot >= num_cmd_slots))
3304 						break;
3305 				}
3306 				if (unlikely(slot_start == num_cmd_slots))
3307 					slot_start = slot;
3308 
3309 				if (unlikely(slot == num_cmd_slots)) {
3310 					slot = 1;
3311 					slot_wrap = 1;
3312 					continue;
3313 				}
3314 
3315 				/* Issue the command to the hardware */
3316 				mtip_issue_ncq_command(port, slot);
3317 
3318 				clear_bit(slot, port->cmds_to_issue);
3319 			}
3320 
3321 			clear_bit(MTIP_PF_ISSUE_CMDS_BIT, &port->flags);
3322 		} else if (test_bit(MTIP_PF_REBUILD_BIT, &port->flags)) {
3323 			if (mtip_ftl_rebuild_poll(dd) < 0)
3324 				set_bit(MTIP_DDF_REBUILD_FAILED_BIT,
3325 							&dd->dd_flag);
3326 			clear_bit(MTIP_PF_REBUILD_BIT, &port->flags);
3327 		}
3328 		clear_bit(MTIP_PF_SVC_THD_ACTIVE_BIT, &port->flags);
3329 
3330 		if (test_bit(MTIP_PF_SVC_THD_STOP_BIT, &port->flags))
3331 			goto st_out;
3332 	}
3333 
3334 	/* wait for pci remove to exit */
3335 	while (1) {
3336 		if (test_bit(MTIP_DDF_REMOVE_DONE_BIT, &dd->dd_flag))
3337 			break;
3338 		msleep_interruptible(1000);
3339 		if (kthread_should_stop())
3340 			goto st_out;
3341 	}
3342 
3343 	while (1) {
3344 		ret = mtip_free_orphan(dd);
3345 		if (!ret) {
3346 			/* NOTE: All data structures are invalid, do not
3347 			 * access any here */
3348 			return 0;
3349 		}
3350 		msleep_interruptible(1000);
3351 		if (kthread_should_stop())
3352 			goto st_out;
3353 	}
3354 st_out:
3355 	return 0;
3356 }
3357 
3358 /*
3359  * DMA region teardown
3360  *
3361  * @dd Pointer to driver_data structure
3362  *
3363  * return value
3364  *      None
3365  */
3366 static void mtip_dma_free(struct driver_data *dd)
3367 {
3368 	int i;
3369 	struct mtip_port *port = dd->port;
3370 
3371 	if (port->block1)
3372 		dmam_free_coherent(&dd->pdev->dev, BLOCK_DMA_ALLOC_SZ,
3373 					port->block1, port->block1_dma);
3374 
3375 	if (port->command_list) {
3376 		dmam_free_coherent(&dd->pdev->dev, AHCI_CMD_TBL_SZ,
3377 				port->command_list, port->command_list_dma);
3378 	}
3379 
3380 	for (i = 0; i < MTIP_MAX_COMMAND_SLOTS; i++) {
3381 		if (port->commands[i].command)
3382 			dmam_free_coherent(&dd->pdev->dev, CMD_DMA_ALLOC_SZ,
3383 				port->commands[i].command,
3384 				port->commands[i].command_dma);
3385 	}
3386 }
3387 
3388 /*
3389  * DMA region setup
3390  *
3391  * @dd Pointer to driver_data structure
3392  *
3393  * return value
3394  *      -ENOMEM Not enough free DMA region space to initialize driver
3395  */
3396 static int mtip_dma_alloc(struct driver_data *dd)
3397 {
3398 	struct mtip_port *port = dd->port;
3399 	int i, rv = 0;
3400 	u32 host_cap_64 = readl(dd->mmio + HOST_CAP) & HOST_CAP_64;
3401 
3402 	/* Allocate dma memory for RX Fis, Identify, and Sector Bufffer */
3403 	port->block1 =
3404 		dmam_alloc_coherent(&dd->pdev->dev, BLOCK_DMA_ALLOC_SZ,
3405 					&port->block1_dma, GFP_KERNEL);
3406 	if (!port->block1)
3407 		return -ENOMEM;
3408 	memset(port->block1, 0, BLOCK_DMA_ALLOC_SZ);
3409 
3410 	/* Allocate dma memory for command list */
3411 	port->command_list =
3412 		dmam_alloc_coherent(&dd->pdev->dev, AHCI_CMD_TBL_SZ,
3413 					&port->command_list_dma, GFP_KERNEL);
3414 	if (!port->command_list) {
3415 		dmam_free_coherent(&dd->pdev->dev, BLOCK_DMA_ALLOC_SZ,
3416 					port->block1, port->block1_dma);
3417 		port->block1 = NULL;
3418 		port->block1_dma = 0;
3419 		return -ENOMEM;
3420 	}
3421 	memset(port->command_list, 0, AHCI_CMD_TBL_SZ);
3422 
3423 	/* Setup all pointers into first DMA region */
3424 	port->rxfis         = port->block1 + AHCI_RX_FIS_OFFSET;
3425 	port->rxfis_dma     = port->block1_dma + AHCI_RX_FIS_OFFSET;
3426 	port->identify      = port->block1 + AHCI_IDFY_OFFSET;
3427 	port->identify_dma  = port->block1_dma + AHCI_IDFY_OFFSET;
3428 	port->log_buf       = port->block1 + AHCI_SECTBUF_OFFSET;
3429 	port->log_buf_dma   = port->block1_dma + AHCI_SECTBUF_OFFSET;
3430 	port->smart_buf     = port->block1 + AHCI_SMARTBUF_OFFSET;
3431 	port->smart_buf_dma = port->block1_dma + AHCI_SMARTBUF_OFFSET;
3432 
3433 	/* Setup per command SGL DMA region */
3434 
3435 	/* Point the command headers at the command tables */
3436 	for (i = 0; i < MTIP_MAX_COMMAND_SLOTS; i++) {
3437 		port->commands[i].command =
3438 			dmam_alloc_coherent(&dd->pdev->dev, CMD_DMA_ALLOC_SZ,
3439 				&port->commands[i].command_dma, GFP_KERNEL);
3440 		if (!port->commands[i].command) {
3441 			rv = -ENOMEM;
3442 			mtip_dma_free(dd);
3443 			return rv;
3444 		}
3445 		memset(port->commands[i].command, 0, CMD_DMA_ALLOC_SZ);
3446 
3447 		port->commands[i].command_header = port->command_list +
3448 					(sizeof(struct mtip_cmd_hdr) * i);
3449 		port->commands[i].command_header_dma =
3450 					dd->port->command_list_dma +
3451 					(sizeof(struct mtip_cmd_hdr) * i);
3452 
3453 		if (host_cap_64)
3454 			port->commands[i].command_header->ctbau =
3455 				__force_bit2int cpu_to_le32(
3456 				(port->commands[i].command_dma >> 16) >> 16);
3457 
3458 		port->commands[i].command_header->ctba =
3459 				__force_bit2int cpu_to_le32(
3460 				port->commands[i].command_dma & 0xFFFFFFFF);
3461 
3462 		sg_init_table(port->commands[i].sg, MTIP_MAX_SG);
3463 
3464 		/* Mark command as currently inactive */
3465 		atomic_set(&dd->port->commands[i].active, 0);
3466 	}
3467 	return 0;
3468 }
3469 
3470 /*
3471  * Called once for each card.
3472  *
3473  * @dd Pointer to the driver data structure.
3474  *
3475  * return value
3476  *	0 on success, else an error code.
3477  */
3478 static int mtip_hw_init(struct driver_data *dd)
3479 {
3480 	int i;
3481 	int rv;
3482 	unsigned int num_command_slots;
3483 	unsigned long timeout, timetaken;
3484 	unsigned char *buf;
3485 	struct smart_attr attr242;
3486 
3487 	dd->mmio = pcim_iomap_table(dd->pdev)[MTIP_ABAR];
3488 
3489 	mtip_detect_product(dd);
3490 	if (dd->product_type == MTIP_PRODUCT_UNKNOWN) {
3491 		rv = -EIO;
3492 		goto out1;
3493 	}
3494 	num_command_slots = dd->slot_groups * 32;
3495 
3496 	hba_setup(dd);
3497 
3498 	dd->port = kzalloc_node(sizeof(struct mtip_port), GFP_KERNEL,
3499 				dd->numa_node);
3500 	if (!dd->port) {
3501 		dev_err(&dd->pdev->dev,
3502 			"Memory allocation: port structure\n");
3503 		return -ENOMEM;
3504 	}
3505 
3506 	/* Continue workqueue setup */
3507 	for (i = 0; i < MTIP_MAX_SLOT_GROUPS; i++)
3508 		dd->work[i].port = dd->port;
3509 
3510 	/* Enable unaligned IO constraints for some devices */
3511 	if (mtip_device_unaligned_constrained(dd))
3512 		dd->unal_qdepth = MTIP_MAX_UNALIGNED_SLOTS;
3513 	else
3514 		dd->unal_qdepth = 0;
3515 
3516 	/* Counting semaphore to track command slot usage */
3517 	sema_init(&dd->port->cmd_slot, num_command_slots - 1 - dd->unal_qdepth);
3518 	sema_init(&dd->port->cmd_slot_unal, dd->unal_qdepth);
3519 
3520 	/* Spinlock to prevent concurrent issue */
3521 	for (i = 0; i < MTIP_MAX_SLOT_GROUPS; i++)
3522 		spin_lock_init(&dd->port->cmd_issue_lock[i]);
3523 
3524 	/* Set the port mmio base address. */
3525 	dd->port->mmio	= dd->mmio + PORT_OFFSET;
3526 	dd->port->dd	= dd;
3527 
3528 	/* DMA allocations */
3529 	rv = mtip_dma_alloc(dd);
3530 	if (rv < 0)
3531 		goto out1;
3532 
3533 	/* Setup the pointers to the extended s_active and CI registers. */
3534 	for (i = 0; i < dd->slot_groups; i++) {
3535 		dd->port->s_active[i] =
3536 			dd->port->mmio + i*0x80 + PORT_SCR_ACT;
3537 		dd->port->cmd_issue[i] =
3538 			dd->port->mmio + i*0x80 + PORT_COMMAND_ISSUE;
3539 		dd->port->completed[i] =
3540 			dd->port->mmio + i*0x80 + PORT_SDBV;
3541 	}
3542 
3543 	timetaken = jiffies;
3544 	timeout = jiffies + msecs_to_jiffies(30000);
3545 	while (((readl(dd->port->mmio + PORT_SCR_STAT) & 0x0F) != 0x03) &&
3546 		 time_before(jiffies, timeout)) {
3547 		mdelay(100);
3548 	}
3549 	if (unlikely(mtip_check_surprise_removal(dd->pdev))) {
3550 		timetaken = jiffies - timetaken;
3551 		dev_warn(&dd->pdev->dev,
3552 			"Surprise removal detected at %u ms\n",
3553 			jiffies_to_msecs(timetaken));
3554 		rv = -ENODEV;
3555 		goto out2 ;
3556 	}
3557 	if (unlikely(test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag))) {
3558 		timetaken = jiffies - timetaken;
3559 		dev_warn(&dd->pdev->dev,
3560 			"Removal detected at %u ms\n",
3561 			jiffies_to_msecs(timetaken));
3562 		rv = -EFAULT;
3563 		goto out2;
3564 	}
3565 
3566 	/* Conditionally reset the HBA. */
3567 	if (!(readl(dd->mmio + HOST_CAP) & HOST_CAP_NZDMA)) {
3568 		if (mtip_hba_reset(dd) < 0) {
3569 			dev_err(&dd->pdev->dev,
3570 				"Card did not reset within timeout\n");
3571 			rv = -EIO;
3572 			goto out2;
3573 		}
3574 	} else {
3575 		/* Clear any pending interrupts on the HBA */
3576 		writel(readl(dd->mmio + HOST_IRQ_STAT),
3577 			dd->mmio + HOST_IRQ_STAT);
3578 	}
3579 
3580 	mtip_init_port(dd->port);
3581 	mtip_start_port(dd->port);
3582 
3583 	/* Setup the ISR and enable interrupts. */
3584 	rv = devm_request_irq(&dd->pdev->dev,
3585 				dd->pdev->irq,
3586 				mtip_irq_handler,
3587 				IRQF_SHARED,
3588 				dev_driver_string(&dd->pdev->dev),
3589 				dd);
3590 
3591 	if (rv) {
3592 		dev_err(&dd->pdev->dev,
3593 			"Unable to allocate IRQ %d\n", dd->pdev->irq);
3594 		goto out2;
3595 	}
3596 	irq_set_affinity_hint(dd->pdev->irq, get_cpu_mask(dd->isr_binding));
3597 
3598 	/* Enable interrupts on the HBA. */
3599 	writel(readl(dd->mmio + HOST_CTL) | HOST_IRQ_EN,
3600 					dd->mmio + HOST_CTL);
3601 
3602 	init_timer(&dd->port->cmd_timer);
3603 	init_waitqueue_head(&dd->port->svc_wait);
3604 
3605 	dd->port->cmd_timer.data = (unsigned long int) dd->port;
3606 	dd->port->cmd_timer.function = mtip_timeout_function;
3607 	mod_timer(&dd->port->cmd_timer,
3608 		jiffies + msecs_to_jiffies(MTIP_TIMEOUT_CHECK_PERIOD));
3609 
3610 
3611 	if (test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag)) {
3612 		rv = -EFAULT;
3613 		goto out3;
3614 	}
3615 
3616 	if (mtip_get_identify(dd->port, NULL) < 0) {
3617 		rv = -EFAULT;
3618 		goto out3;
3619 	}
3620 	mtip_dump_identify(dd->port);
3621 
3622 	if (*(dd->port->identify + MTIP_FTL_REBUILD_OFFSET) ==
3623 		MTIP_FTL_REBUILD_MAGIC) {
3624 		set_bit(MTIP_PF_REBUILD_BIT, &dd->port->flags);
3625 		return MTIP_FTL_REBUILD_MAGIC;
3626 	}
3627 
3628 	/* check write protect, over temp and rebuild statuses */
3629 	rv = mtip_read_log_page(dd->port, ATA_LOG_SATA_NCQ,
3630 				dd->port->log_buf,
3631 				dd->port->log_buf_dma, 1);
3632 	if (rv) {
3633 		dev_warn(&dd->pdev->dev,
3634 			"Error in READ LOG EXT (10h) command\n");
3635 		/* non-critical error, don't fail the load */
3636 	} else {
3637 		buf = (unsigned char *)dd->port->log_buf;
3638 		if (buf[259] & 0x1) {
3639 			dev_info(&dd->pdev->dev,
3640 				"Write protect bit is set.\n");
3641 			set_bit(MTIP_DDF_WRITE_PROTECT_BIT, &dd->dd_flag);
3642 		}
3643 		if (buf[288] == 0xF7) {
3644 			dev_info(&dd->pdev->dev,
3645 				"Exceeded Tmax, drive in thermal shutdown.\n");
3646 			set_bit(MTIP_DDF_OVER_TEMP_BIT, &dd->dd_flag);
3647 		}
3648 		if (buf[288] == 0xBF) {
3649 			dev_info(&dd->pdev->dev,
3650 				"Drive is in security locked state.\n");
3651 			set_bit(MTIP_DDF_SEC_LOCK_BIT, &dd->dd_flag);
3652 		}
3653 	}
3654 
3655 	/* get write protect progess */
3656 	memset(&attr242, 0, sizeof(struct smart_attr));
3657 	if (mtip_get_smart_attr(dd->port, 242, &attr242))
3658 		dev_warn(&dd->pdev->dev,
3659 				"Unable to check write protect progress\n");
3660 	else
3661 		dev_info(&dd->pdev->dev,
3662 				"Write protect progress: %u%% (%u blocks)\n",
3663 				attr242.cur, le32_to_cpu(attr242.data));
3664 	return rv;
3665 
3666 out3:
3667 	del_timer_sync(&dd->port->cmd_timer);
3668 
3669 	/* Disable interrupts on the HBA. */
3670 	writel(readl(dd->mmio + HOST_CTL) & ~HOST_IRQ_EN,
3671 			dd->mmio + HOST_CTL);
3672 
3673 	/* Release the IRQ. */
3674 	irq_set_affinity_hint(dd->pdev->irq, NULL);
3675 	devm_free_irq(&dd->pdev->dev, dd->pdev->irq, dd);
3676 
3677 out2:
3678 	mtip_deinit_port(dd->port);
3679 	mtip_dma_free(dd);
3680 
3681 out1:
3682 	/* Free the memory allocated for the for structure. */
3683 	kfree(dd->port);
3684 
3685 	return rv;
3686 }
3687 
3688 /*
3689  * Called to deinitialize an interface.
3690  *
3691  * @dd Pointer to the driver data structure.
3692  *
3693  * return value
3694  *	0
3695  */
3696 static int mtip_hw_exit(struct driver_data *dd)
3697 {
3698 	/*
3699 	 * Send standby immediate (E0h) to the drive so that it
3700 	 * saves its state.
3701 	 */
3702 	if (!dd->sr) {
3703 		if (!test_bit(MTIP_PF_REBUILD_BIT, &dd->port->flags) &&
3704 		    !test_bit(MTIP_DDF_SEC_LOCK_BIT, &dd->dd_flag))
3705 			if (mtip_standby_immediate(dd->port))
3706 				dev_warn(&dd->pdev->dev,
3707 					"STANDBY IMMEDIATE failed\n");
3708 
3709 		/* de-initialize the port. */
3710 		mtip_deinit_port(dd->port);
3711 
3712 		/* Disable interrupts on the HBA. */
3713 		writel(readl(dd->mmio + HOST_CTL) & ~HOST_IRQ_EN,
3714 				dd->mmio + HOST_CTL);
3715 	}
3716 
3717 	del_timer_sync(&dd->port->cmd_timer);
3718 
3719 	/* Release the IRQ. */
3720 	irq_set_affinity_hint(dd->pdev->irq, NULL);
3721 	devm_free_irq(&dd->pdev->dev, dd->pdev->irq, dd);
3722 
3723 	/* Free dma regions */
3724 	mtip_dma_free(dd);
3725 
3726 	/* Free the memory allocated for the for structure. */
3727 	kfree(dd->port);
3728 	dd->port = NULL;
3729 
3730 	return 0;
3731 }
3732 
3733 /*
3734  * Issue a Standby Immediate command to the device.
3735  *
3736  * This function is called by the Block Layer just before the
3737  * system powers off during a shutdown.
3738  *
3739  * @dd Pointer to the driver data structure.
3740  *
3741  * return value
3742  *	0
3743  */
3744 static int mtip_hw_shutdown(struct driver_data *dd)
3745 {
3746 	/*
3747 	 * Send standby immediate (E0h) to the drive so that it
3748 	 * saves its state.
3749 	 */
3750 	if (!dd->sr && dd->port)
3751 		mtip_standby_immediate(dd->port);
3752 
3753 	return 0;
3754 }
3755 
3756 /*
3757  * Suspend function
3758  *
3759  * This function is called by the Block Layer just before the
3760  * system hibernates.
3761  *
3762  * @dd Pointer to the driver data structure.
3763  *
3764  * return value
3765  *	0	Suspend was successful
3766  *	-EFAULT Suspend was not successful
3767  */
3768 static int mtip_hw_suspend(struct driver_data *dd)
3769 {
3770 	/*
3771 	 * Send standby immediate (E0h) to the drive
3772 	 * so that it saves its state.
3773 	 */
3774 	if (mtip_standby_immediate(dd->port) != 0) {
3775 		dev_err(&dd->pdev->dev,
3776 			"Failed standby-immediate command\n");
3777 		return -EFAULT;
3778 	}
3779 
3780 	/* Disable interrupts on the HBA.*/
3781 	writel(readl(dd->mmio + HOST_CTL) & ~HOST_IRQ_EN,
3782 			dd->mmio + HOST_CTL);
3783 	mtip_deinit_port(dd->port);
3784 
3785 	return 0;
3786 }
3787 
3788 /*
3789  * Resume function
3790  *
3791  * This function is called by the Block Layer as the
3792  * system resumes.
3793  *
3794  * @dd Pointer to the driver data structure.
3795  *
3796  * return value
3797  *	0	Resume was successful
3798  *      -EFAULT Resume was not successful
3799  */
3800 static int mtip_hw_resume(struct driver_data *dd)
3801 {
3802 	/* Perform any needed hardware setup steps */
3803 	hba_setup(dd);
3804 
3805 	/* Reset the HBA */
3806 	if (mtip_hba_reset(dd) != 0) {
3807 		dev_err(&dd->pdev->dev,
3808 			"Unable to reset the HBA\n");
3809 		return -EFAULT;
3810 	}
3811 
3812 	/*
3813 	 * Enable the port, DMA engine, and FIS reception specific
3814 	 * h/w in controller.
3815 	 */
3816 	mtip_init_port(dd->port);
3817 	mtip_start_port(dd->port);
3818 
3819 	/* Enable interrupts on the HBA.*/
3820 	writel(readl(dd->mmio + HOST_CTL) | HOST_IRQ_EN,
3821 			dd->mmio + HOST_CTL);
3822 
3823 	return 0;
3824 }
3825 
3826 /*
3827  * Helper function for reusing disk name
3828  * upon hot insertion.
3829  */
3830 static int rssd_disk_name_format(char *prefix,
3831 				 int index,
3832 				 char *buf,
3833 				 int buflen)
3834 {
3835 	const int base = 'z' - 'a' + 1;
3836 	char *begin = buf + strlen(prefix);
3837 	char *end = buf + buflen;
3838 	char *p;
3839 	int unit;
3840 
3841 	p = end - 1;
3842 	*p = '\0';
3843 	unit = base;
3844 	do {
3845 		if (p == begin)
3846 			return -EINVAL;
3847 		*--p = 'a' + (index % unit);
3848 		index = (index / unit) - 1;
3849 	} while (index >= 0);
3850 
3851 	memmove(begin, p, end - p);
3852 	memcpy(buf, prefix, strlen(prefix));
3853 
3854 	return 0;
3855 }
3856 
3857 /*
3858  * Block layer IOCTL handler.
3859  *
3860  * @dev Pointer to the block_device structure.
3861  * @mode ignored
3862  * @cmd IOCTL command passed from the user application.
3863  * @arg Argument passed from the user application.
3864  *
3865  * return value
3866  *	0        IOCTL completed successfully.
3867  *	-ENOTTY  IOCTL not supported or invalid driver data
3868  *                 structure pointer.
3869  */
3870 static int mtip_block_ioctl(struct block_device *dev,
3871 			    fmode_t mode,
3872 			    unsigned cmd,
3873 			    unsigned long arg)
3874 {
3875 	struct driver_data *dd = dev->bd_disk->private_data;
3876 
3877 	if (!capable(CAP_SYS_ADMIN))
3878 		return -EACCES;
3879 
3880 	if (!dd)
3881 		return -ENOTTY;
3882 
3883 	if (unlikely(test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag)))
3884 		return -ENOTTY;
3885 
3886 	switch (cmd) {
3887 	case BLKFLSBUF:
3888 		return -ENOTTY;
3889 	default:
3890 		return mtip_hw_ioctl(dd, cmd, arg);
3891 	}
3892 }
3893 
3894 #ifdef CONFIG_COMPAT
3895 /*
3896  * Block layer compat IOCTL handler.
3897  *
3898  * @dev Pointer to the block_device structure.
3899  * @mode ignored
3900  * @cmd IOCTL command passed from the user application.
3901  * @arg Argument passed from the user application.
3902  *
3903  * return value
3904  *	0        IOCTL completed successfully.
3905  *	-ENOTTY  IOCTL not supported or invalid driver data
3906  *                 structure pointer.
3907  */
3908 static int mtip_block_compat_ioctl(struct block_device *dev,
3909 			    fmode_t mode,
3910 			    unsigned cmd,
3911 			    unsigned long arg)
3912 {
3913 	struct driver_data *dd = dev->bd_disk->private_data;
3914 
3915 	if (!capable(CAP_SYS_ADMIN))
3916 		return -EACCES;
3917 
3918 	if (!dd)
3919 		return -ENOTTY;
3920 
3921 	if (unlikely(test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag)))
3922 		return -ENOTTY;
3923 
3924 	switch (cmd) {
3925 	case BLKFLSBUF:
3926 		return -ENOTTY;
3927 	case HDIO_DRIVE_TASKFILE: {
3928 		struct mtip_compat_ide_task_request_s __user *compat_req_task;
3929 		ide_task_request_t req_task;
3930 		int compat_tasksize, outtotal, ret;
3931 
3932 		compat_tasksize =
3933 			sizeof(struct mtip_compat_ide_task_request_s);
3934 
3935 		compat_req_task =
3936 			(struct mtip_compat_ide_task_request_s __user *) arg;
3937 
3938 		if (copy_from_user(&req_task, (void __user *) arg,
3939 			compat_tasksize - (2 * sizeof(compat_long_t))))
3940 			return -EFAULT;
3941 
3942 		if (get_user(req_task.out_size, &compat_req_task->out_size))
3943 			return -EFAULT;
3944 
3945 		if (get_user(req_task.in_size, &compat_req_task->in_size))
3946 			return -EFAULT;
3947 
3948 		outtotal = sizeof(struct mtip_compat_ide_task_request_s);
3949 
3950 		ret = exec_drive_taskfile(dd, (void __user *) arg,
3951 						&req_task, outtotal);
3952 
3953 		if (copy_to_user((void __user *) arg, &req_task,
3954 				compat_tasksize -
3955 				(2 * sizeof(compat_long_t))))
3956 			return -EFAULT;
3957 
3958 		if (put_user(req_task.out_size, &compat_req_task->out_size))
3959 			return -EFAULT;
3960 
3961 		if (put_user(req_task.in_size, &compat_req_task->in_size))
3962 			return -EFAULT;
3963 
3964 		return ret;
3965 	}
3966 	default:
3967 		return mtip_hw_ioctl(dd, cmd, arg);
3968 	}
3969 }
3970 #endif
3971 
3972 /*
3973  * Obtain the geometry of the device.
3974  *
3975  * You may think that this function is obsolete, but some applications,
3976  * fdisk for example still used CHS values. This function describes the
3977  * device as having 224 heads and 56 sectors per cylinder. These values are
3978  * chosen so that each cylinder is aligned on a 4KB boundary. Since a
3979  * partition is described in terms of a start and end cylinder this means
3980  * that each partition is also 4KB aligned. Non-aligned partitions adversely
3981  * affects performance.
3982  *
3983  * @dev Pointer to the block_device strucutre.
3984  * @geo Pointer to a hd_geometry structure.
3985  *
3986  * return value
3987  *	0       Operation completed successfully.
3988  *	-ENOTTY An error occurred while reading the drive capacity.
3989  */
3990 static int mtip_block_getgeo(struct block_device *dev,
3991 				struct hd_geometry *geo)
3992 {
3993 	struct driver_data *dd = dev->bd_disk->private_data;
3994 	sector_t capacity;
3995 
3996 	if (!dd)
3997 		return -ENOTTY;
3998 
3999 	if (!(mtip_hw_get_capacity(dd, &capacity))) {
4000 		dev_warn(&dd->pdev->dev,
4001 			"Could not get drive capacity.\n");
4002 		return -ENOTTY;
4003 	}
4004 
4005 	geo->heads = 224;
4006 	geo->sectors = 56;
4007 	sector_div(capacity, (geo->heads * geo->sectors));
4008 	geo->cylinders = capacity;
4009 	return 0;
4010 }
4011 
4012 /*
4013  * Block device operation function.
4014  *
4015  * This structure contains pointers to the functions required by the block
4016  * layer.
4017  */
4018 static const struct block_device_operations mtip_block_ops = {
4019 	.ioctl		= mtip_block_ioctl,
4020 #ifdef CONFIG_COMPAT
4021 	.compat_ioctl	= mtip_block_compat_ioctl,
4022 #endif
4023 	.getgeo		= mtip_block_getgeo,
4024 	.owner		= THIS_MODULE
4025 };
4026 
4027 /*
4028  * Block layer make request function.
4029  *
4030  * This function is called by the kernel to process a BIO for
4031  * the P320 device.
4032  *
4033  * @queue Pointer to the request queue. Unused other than to obtain
4034  *              the driver data structure.
4035  * @bio   Pointer to the BIO.
4036  *
4037  */
4038 static void mtip_make_request(struct request_queue *queue, struct bio *bio)
4039 {
4040 	struct driver_data *dd = queue->queuedata;
4041 	struct scatterlist *sg;
4042 	struct bio_vec bvec;
4043 	struct bvec_iter iter;
4044 	int nents = 0;
4045 	int tag = 0, unaligned = 0;
4046 
4047 	if (unlikely(dd->dd_flag & MTIP_DDF_STOP_IO)) {
4048 		if (unlikely(test_bit(MTIP_DDF_REMOVE_PENDING_BIT,
4049 							&dd->dd_flag))) {
4050 			bio_endio(bio, -ENXIO);
4051 			return;
4052 		}
4053 		if (unlikely(test_bit(MTIP_DDF_OVER_TEMP_BIT, &dd->dd_flag))) {
4054 			bio_endio(bio, -ENODATA);
4055 			return;
4056 		}
4057 		if (unlikely(test_bit(MTIP_DDF_WRITE_PROTECT_BIT,
4058 							&dd->dd_flag) &&
4059 				bio_data_dir(bio))) {
4060 			bio_endio(bio, -ENODATA);
4061 			return;
4062 		}
4063 		if (unlikely(test_bit(MTIP_DDF_SEC_LOCK_BIT, &dd->dd_flag))) {
4064 			bio_endio(bio, -ENODATA);
4065 			return;
4066 		}
4067 		if (test_bit(MTIP_DDF_REBUILD_FAILED_BIT, &dd->dd_flag)) {
4068 			bio_endio(bio, -ENXIO);
4069 			return;
4070 		}
4071 	}
4072 
4073 	if (unlikely(bio->bi_rw & REQ_DISCARD)) {
4074 		bio_endio(bio, mtip_send_trim(dd, bio->bi_iter.bi_sector,
4075 						bio_sectors(bio)));
4076 		return;
4077 	}
4078 
4079 	if (unlikely(!bio_has_data(bio))) {
4080 		blk_queue_flush(queue, 0);
4081 		bio_endio(bio, 0);
4082 		return;
4083 	}
4084 
4085 	if (bio_data_dir(bio) == WRITE && bio_sectors(bio) <= 64 &&
4086 							dd->unal_qdepth) {
4087 		if (bio->bi_iter.bi_sector % 8 != 0)
4088 			/* Unaligned on 4k boundaries */
4089 			unaligned = 1;
4090 		else if (bio_sectors(bio) % 8 != 0) /* Aligned but not 4k/8k */
4091 			unaligned = 1;
4092 	}
4093 
4094 	sg = mtip_hw_get_scatterlist(dd, &tag, unaligned);
4095 	if (likely(sg != NULL)) {
4096 		blk_queue_bounce(queue, &bio);
4097 
4098 		if (unlikely((bio)->bi_vcnt > MTIP_MAX_SG)) {
4099 			dev_warn(&dd->pdev->dev,
4100 				"Maximum number of SGL entries exceeded\n");
4101 			bio_io_error(bio);
4102 			mtip_hw_release_scatterlist(dd, tag, unaligned);
4103 			return;
4104 		}
4105 
4106 		/* Create the scatter list for this bio. */
4107 		bio_for_each_segment(bvec, bio, iter) {
4108 			sg_set_page(&sg[nents],
4109 					bvec.bv_page,
4110 					bvec.bv_len,
4111 					bvec.bv_offset);
4112 			nents++;
4113 		}
4114 
4115 		/* Issue the read/write. */
4116 		mtip_hw_submit_io(dd,
4117 				bio->bi_iter.bi_sector,
4118 				bio_sectors(bio),
4119 				nents,
4120 				tag,
4121 				bio_endio,
4122 				bio,
4123 				bio_data_dir(bio),
4124 				unaligned);
4125 	} else
4126 		bio_io_error(bio);
4127 }
4128 
4129 /*
4130  * Block layer initialization function.
4131  *
4132  * This function is called once by the PCI layer for each P320
4133  * device that is connected to the system.
4134  *
4135  * @dd Pointer to the driver data structure.
4136  *
4137  * return value
4138  *	0 on success else an error code.
4139  */
4140 static int mtip_block_initialize(struct driver_data *dd)
4141 {
4142 	int rv = 0, wait_for_rebuild = 0;
4143 	sector_t capacity;
4144 	unsigned int index = 0;
4145 	struct kobject *kobj;
4146 	unsigned char thd_name[16];
4147 
4148 	if (dd->disk)
4149 		goto skip_create_disk; /* hw init done, before rebuild */
4150 
4151 	/* Initialize the protocol layer. */
4152 	wait_for_rebuild = mtip_hw_init(dd);
4153 	if (wait_for_rebuild < 0) {
4154 		dev_err(&dd->pdev->dev,
4155 			"Protocol layer initialization failed\n");
4156 		rv = -EINVAL;
4157 		goto protocol_init_error;
4158 	}
4159 
4160 	dd->disk = alloc_disk_node(MTIP_MAX_MINORS, dd->numa_node);
4161 	if (dd->disk  == NULL) {
4162 		dev_err(&dd->pdev->dev,
4163 			"Unable to allocate gendisk structure\n");
4164 		rv = -EINVAL;
4165 		goto alloc_disk_error;
4166 	}
4167 
4168 	/* Generate the disk name, implemented same as in sd.c */
4169 	do {
4170 		if (!ida_pre_get(&rssd_index_ida, GFP_KERNEL))
4171 			goto ida_get_error;
4172 
4173 		spin_lock(&rssd_index_lock);
4174 		rv = ida_get_new(&rssd_index_ida, &index);
4175 		spin_unlock(&rssd_index_lock);
4176 	} while (rv == -EAGAIN);
4177 
4178 	if (rv)
4179 		goto ida_get_error;
4180 
4181 	rv = rssd_disk_name_format("rssd",
4182 				index,
4183 				dd->disk->disk_name,
4184 				DISK_NAME_LEN);
4185 	if (rv)
4186 		goto disk_index_error;
4187 
4188 	dd->disk->driverfs_dev	= &dd->pdev->dev;
4189 	dd->disk->major		= dd->major;
4190 	dd->disk->first_minor	= dd->instance * MTIP_MAX_MINORS;
4191 	dd->disk->fops		= &mtip_block_ops;
4192 	dd->disk->private_data	= dd;
4193 	dd->index		= index;
4194 
4195 	mtip_hw_debugfs_init(dd);
4196 
4197 	/*
4198 	 * if rebuild pending, start the service thread, and delay the block
4199 	 * queue creation and add_disk()
4200 	 */
4201 	if (wait_for_rebuild == MTIP_FTL_REBUILD_MAGIC)
4202 		goto start_service_thread;
4203 
4204 skip_create_disk:
4205 	/* Allocate the request queue. */
4206 	dd->queue = blk_alloc_queue_node(GFP_KERNEL, dd->numa_node);
4207 	if (dd->queue == NULL) {
4208 		dev_err(&dd->pdev->dev,
4209 			"Unable to allocate request queue\n");
4210 		rv = -ENOMEM;
4211 		goto block_queue_alloc_init_error;
4212 	}
4213 
4214 	/* Attach our request function to the request queue. */
4215 	blk_queue_make_request(dd->queue, mtip_make_request);
4216 
4217 	dd->disk->queue		= dd->queue;
4218 	dd->queue->queuedata	= dd;
4219 
4220 	/* Set device limits. */
4221 	set_bit(QUEUE_FLAG_NONROT, &dd->queue->queue_flags);
4222 	blk_queue_max_segments(dd->queue, MTIP_MAX_SG);
4223 	blk_queue_physical_block_size(dd->queue, 4096);
4224 	blk_queue_max_hw_sectors(dd->queue, 0xffff);
4225 	blk_queue_max_segment_size(dd->queue, 0x400000);
4226 	blk_queue_io_min(dd->queue, 4096);
4227 	blk_queue_bounce_limit(dd->queue, dd->pdev->dma_mask);
4228 
4229 	/*
4230 	 * write back cache is not supported in the device. FUA depends on
4231 	 * write back cache support, hence setting flush support to zero.
4232 	 */
4233 	blk_queue_flush(dd->queue, 0);
4234 
4235 	/* Signal trim support */
4236 	if (dd->trim_supp == true) {
4237 		set_bit(QUEUE_FLAG_DISCARD, &dd->queue->queue_flags);
4238 		dd->queue->limits.discard_granularity = 4096;
4239 		blk_queue_max_discard_sectors(dd->queue,
4240 			MTIP_MAX_TRIM_ENTRY_LEN * MTIP_MAX_TRIM_ENTRIES);
4241 		dd->queue->limits.discard_zeroes_data = 0;
4242 	}
4243 
4244 	/* Set the capacity of the device in 512 byte sectors. */
4245 	if (!(mtip_hw_get_capacity(dd, &capacity))) {
4246 		dev_warn(&dd->pdev->dev,
4247 			"Could not read drive capacity\n");
4248 		rv = -EIO;
4249 		goto read_capacity_error;
4250 	}
4251 	set_capacity(dd->disk, capacity);
4252 
4253 	/* Enable the block device and add it to /dev */
4254 	add_disk(dd->disk);
4255 
4256 	dd->bdev = bdget_disk(dd->disk, 0);
4257 	/*
4258 	 * Now that the disk is active, initialize any sysfs attributes
4259 	 * managed by the protocol layer.
4260 	 */
4261 	kobj = kobject_get(&disk_to_dev(dd->disk)->kobj);
4262 	if (kobj) {
4263 		mtip_hw_sysfs_init(dd, kobj);
4264 		kobject_put(kobj);
4265 	}
4266 
4267 	if (dd->mtip_svc_handler) {
4268 		set_bit(MTIP_DDF_INIT_DONE_BIT, &dd->dd_flag);
4269 		return rv; /* service thread created for handling rebuild */
4270 	}
4271 
4272 start_service_thread:
4273 	sprintf(thd_name, "mtip_svc_thd_%02d", index);
4274 	dd->mtip_svc_handler = kthread_create_on_node(mtip_service_thread,
4275 						dd, dd->numa_node, "%s",
4276 						thd_name);
4277 
4278 	if (IS_ERR(dd->mtip_svc_handler)) {
4279 		dev_err(&dd->pdev->dev, "service thread failed to start\n");
4280 		dd->mtip_svc_handler = NULL;
4281 		rv = -EFAULT;
4282 		goto kthread_run_error;
4283 	}
4284 	wake_up_process(dd->mtip_svc_handler);
4285 	if (wait_for_rebuild == MTIP_FTL_REBUILD_MAGIC)
4286 		rv = wait_for_rebuild;
4287 
4288 	return rv;
4289 
4290 kthread_run_error:
4291 	bdput(dd->bdev);
4292 	dd->bdev = NULL;
4293 
4294 	/* Delete our gendisk. This also removes the device from /dev */
4295 	del_gendisk(dd->disk);
4296 
4297 read_capacity_error:
4298 	blk_cleanup_queue(dd->queue);
4299 
4300 block_queue_alloc_init_error:
4301 	mtip_hw_debugfs_exit(dd);
4302 disk_index_error:
4303 	spin_lock(&rssd_index_lock);
4304 	ida_remove(&rssd_index_ida, index);
4305 	spin_unlock(&rssd_index_lock);
4306 
4307 ida_get_error:
4308 	put_disk(dd->disk);
4309 
4310 alloc_disk_error:
4311 	mtip_hw_exit(dd); /* De-initialize the protocol layer. */
4312 
4313 protocol_init_error:
4314 	return rv;
4315 }
4316 
4317 /*
4318  * Block layer deinitialization function.
4319  *
4320  * Called by the PCI layer as each P320 device is removed.
4321  *
4322  * @dd Pointer to the driver data structure.
4323  *
4324  * return value
4325  *	0
4326  */
4327 static int mtip_block_remove(struct driver_data *dd)
4328 {
4329 	struct kobject *kobj;
4330 
4331 	if (!dd->sr) {
4332 		mtip_hw_debugfs_exit(dd);
4333 
4334 		if (dd->mtip_svc_handler) {
4335 			set_bit(MTIP_PF_SVC_THD_STOP_BIT, &dd->port->flags);
4336 			wake_up_interruptible(&dd->port->svc_wait);
4337 			kthread_stop(dd->mtip_svc_handler);
4338 		}
4339 
4340 		/* Clean up the sysfs attributes, if created */
4341 		if (test_bit(MTIP_DDF_INIT_DONE_BIT, &dd->dd_flag)) {
4342 			kobj = kobject_get(&disk_to_dev(dd->disk)->kobj);
4343 			if (kobj) {
4344 				mtip_hw_sysfs_exit(dd, kobj);
4345 				kobject_put(kobj);
4346 			}
4347 		}
4348 		/*
4349 		 * Delete our gendisk structure. This also removes the device
4350 		 * from /dev
4351 		 */
4352 		if (dd->bdev) {
4353 			bdput(dd->bdev);
4354 			dd->bdev = NULL;
4355 		}
4356 		if (dd->disk) {
4357 			if (dd->disk->queue) {
4358 				del_gendisk(dd->disk);
4359 				blk_cleanup_queue(dd->queue);
4360 				dd->queue = NULL;
4361 			} else
4362 				put_disk(dd->disk);
4363 		}
4364 		dd->disk  = NULL;
4365 
4366 		spin_lock(&rssd_index_lock);
4367 		ida_remove(&rssd_index_ida, dd->index);
4368 		spin_unlock(&rssd_index_lock);
4369 	} else {
4370 		dev_info(&dd->pdev->dev, "device %s surprise removal\n",
4371 						dd->disk->disk_name);
4372 	}
4373 
4374 	/* De-initialize the protocol layer. */
4375 	mtip_hw_exit(dd);
4376 
4377 	return 0;
4378 }
4379 
4380 /*
4381  * Function called by the PCI layer when just before the
4382  * machine shuts down.
4383  *
4384  * If a protocol layer shutdown function is present it will be called
4385  * by this function.
4386  *
4387  * @dd Pointer to the driver data structure.
4388  *
4389  * return value
4390  *	0
4391  */
4392 static int mtip_block_shutdown(struct driver_data *dd)
4393 {
4394 	/* Delete our gendisk structure, and cleanup the blk queue. */
4395 	if (dd->disk) {
4396 		dev_info(&dd->pdev->dev,
4397 			"Shutting down %s ...\n", dd->disk->disk_name);
4398 
4399 		if (dd->disk->queue) {
4400 			del_gendisk(dd->disk);
4401 			blk_cleanup_queue(dd->queue);
4402 		} else
4403 			put_disk(dd->disk);
4404 		dd->disk  = NULL;
4405 		dd->queue = NULL;
4406 	}
4407 
4408 	spin_lock(&rssd_index_lock);
4409 	ida_remove(&rssd_index_ida, dd->index);
4410 	spin_unlock(&rssd_index_lock);
4411 
4412 	mtip_hw_shutdown(dd);
4413 	return 0;
4414 }
4415 
4416 static int mtip_block_suspend(struct driver_data *dd)
4417 {
4418 	dev_info(&dd->pdev->dev,
4419 		"Suspending %s ...\n", dd->disk->disk_name);
4420 	mtip_hw_suspend(dd);
4421 	return 0;
4422 }
4423 
4424 static int mtip_block_resume(struct driver_data *dd)
4425 {
4426 	dev_info(&dd->pdev->dev, "Resuming %s ...\n",
4427 		dd->disk->disk_name);
4428 	mtip_hw_resume(dd);
4429 	return 0;
4430 }
4431 
4432 static void drop_cpu(int cpu)
4433 {
4434 	cpu_use[cpu]--;
4435 }
4436 
4437 static int get_least_used_cpu_on_node(int node)
4438 {
4439 	int cpu, least_used_cpu, least_cnt;
4440 	const struct cpumask *node_mask;
4441 
4442 	node_mask = cpumask_of_node(node);
4443 	least_used_cpu = cpumask_first(node_mask);
4444 	least_cnt = cpu_use[least_used_cpu];
4445 	cpu = least_used_cpu;
4446 
4447 	for_each_cpu(cpu, node_mask) {
4448 		if (cpu_use[cpu] < least_cnt) {
4449 			least_used_cpu = cpu;
4450 			least_cnt = cpu_use[cpu];
4451 		}
4452 	}
4453 	cpu_use[least_used_cpu]++;
4454 	return least_used_cpu;
4455 }
4456 
4457 /* Helper for selecting a node in round robin mode */
4458 static inline int mtip_get_next_rr_node(void)
4459 {
4460 	static int next_node = -1;
4461 
4462 	if (next_node == -1) {
4463 		next_node = first_online_node;
4464 		return next_node;
4465 	}
4466 
4467 	next_node = next_online_node(next_node);
4468 	if (next_node == MAX_NUMNODES)
4469 		next_node = first_online_node;
4470 	return next_node;
4471 }
4472 
4473 static DEFINE_HANDLER(0);
4474 static DEFINE_HANDLER(1);
4475 static DEFINE_HANDLER(2);
4476 static DEFINE_HANDLER(3);
4477 static DEFINE_HANDLER(4);
4478 static DEFINE_HANDLER(5);
4479 static DEFINE_HANDLER(6);
4480 static DEFINE_HANDLER(7);
4481 
4482 /*
4483  * Called for each supported PCI device detected.
4484  *
4485  * This function allocates the private data structure, enables the
4486  * PCI device and then calls the block layer initialization function.
4487  *
4488  * return value
4489  *	0 on success else an error code.
4490  */
4491 static int mtip_pci_probe(struct pci_dev *pdev,
4492 			const struct pci_device_id *ent)
4493 {
4494 	int rv = 0;
4495 	struct driver_data *dd = NULL;
4496 	char cpu_list[256];
4497 	const struct cpumask *node_mask;
4498 	int cpu, i = 0, j = 0;
4499 	int my_node = NUMA_NO_NODE;
4500 	unsigned long flags;
4501 
4502 	/* Allocate memory for this devices private data. */
4503 	my_node = pcibus_to_node(pdev->bus);
4504 	if (my_node != NUMA_NO_NODE) {
4505 		if (!node_online(my_node))
4506 			my_node = mtip_get_next_rr_node();
4507 	} else {
4508 		dev_info(&pdev->dev, "Kernel not reporting proximity, choosing a node\n");
4509 		my_node = mtip_get_next_rr_node();
4510 	}
4511 	dev_info(&pdev->dev, "NUMA node %d (closest: %d,%d, probe on %d:%d)\n",
4512 		my_node, pcibus_to_node(pdev->bus), dev_to_node(&pdev->dev),
4513 		cpu_to_node(raw_smp_processor_id()), raw_smp_processor_id());
4514 
4515 	dd = kzalloc_node(sizeof(struct driver_data), GFP_KERNEL, my_node);
4516 	if (dd == NULL) {
4517 		dev_err(&pdev->dev,
4518 			"Unable to allocate memory for driver data\n");
4519 		return -ENOMEM;
4520 	}
4521 
4522 	/* Attach the private data to this PCI device.  */
4523 	pci_set_drvdata(pdev, dd);
4524 
4525 	rv = pcim_enable_device(pdev);
4526 	if (rv < 0) {
4527 		dev_err(&pdev->dev, "Unable to enable device\n");
4528 		goto iomap_err;
4529 	}
4530 
4531 	/* Map BAR5 to memory. */
4532 	rv = pcim_iomap_regions(pdev, 1 << MTIP_ABAR, MTIP_DRV_NAME);
4533 	if (rv < 0) {
4534 		dev_err(&pdev->dev, "Unable to map regions\n");
4535 		goto iomap_err;
4536 	}
4537 
4538 	if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) {
4539 		rv = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
4540 
4541 		if (rv) {
4542 			rv = pci_set_consistent_dma_mask(pdev,
4543 						DMA_BIT_MASK(32));
4544 			if (rv) {
4545 				dev_warn(&pdev->dev,
4546 					"64-bit DMA enable failed\n");
4547 				goto setmask_err;
4548 			}
4549 		}
4550 	}
4551 
4552 	/* Copy the info we may need later into the private data structure. */
4553 	dd->major	= mtip_major;
4554 	dd->instance	= instance;
4555 	dd->pdev	= pdev;
4556 	dd->numa_node	= my_node;
4557 
4558 	INIT_LIST_HEAD(&dd->online_list);
4559 	INIT_LIST_HEAD(&dd->remove_list);
4560 
4561 	memset(dd->workq_name, 0, 32);
4562 	snprintf(dd->workq_name, 31, "mtipq%d", dd->instance);
4563 
4564 	dd->isr_workq = create_workqueue(dd->workq_name);
4565 	if (!dd->isr_workq) {
4566 		dev_warn(&pdev->dev, "Can't create wq %d\n", dd->instance);
4567 		rv = -ENOMEM;
4568 		goto block_initialize_err;
4569 	}
4570 
4571 	memset(cpu_list, 0, sizeof(cpu_list));
4572 
4573 	node_mask = cpumask_of_node(dd->numa_node);
4574 	if (!cpumask_empty(node_mask)) {
4575 		for_each_cpu(cpu, node_mask)
4576 		{
4577 			snprintf(&cpu_list[j], 256 - j, "%d ", cpu);
4578 			j = strlen(cpu_list);
4579 		}
4580 
4581 		dev_info(&pdev->dev, "Node %d on package %d has %d cpu(s): %s\n",
4582 			dd->numa_node,
4583 			topology_physical_package_id(cpumask_first(node_mask)),
4584 			nr_cpus_node(dd->numa_node),
4585 			cpu_list);
4586 	} else
4587 		dev_dbg(&pdev->dev, "mtip32xx: node_mask empty\n");
4588 
4589 	dd->isr_binding = get_least_used_cpu_on_node(dd->numa_node);
4590 	dev_info(&pdev->dev, "Initial IRQ binding node:cpu %d:%d\n",
4591 		cpu_to_node(dd->isr_binding), dd->isr_binding);
4592 
4593 	/* first worker context always runs in ISR */
4594 	dd->work[0].cpu_binding = dd->isr_binding;
4595 	dd->work[1].cpu_binding = get_least_used_cpu_on_node(dd->numa_node);
4596 	dd->work[2].cpu_binding = get_least_used_cpu_on_node(dd->numa_node);
4597 	dd->work[3].cpu_binding = dd->work[0].cpu_binding;
4598 	dd->work[4].cpu_binding = dd->work[1].cpu_binding;
4599 	dd->work[5].cpu_binding = dd->work[2].cpu_binding;
4600 	dd->work[6].cpu_binding = dd->work[2].cpu_binding;
4601 	dd->work[7].cpu_binding = dd->work[1].cpu_binding;
4602 
4603 	/* Log the bindings */
4604 	for_each_present_cpu(cpu) {
4605 		memset(cpu_list, 0, sizeof(cpu_list));
4606 		for (i = 0, j = 0; i < MTIP_MAX_SLOT_GROUPS; i++) {
4607 			if (dd->work[i].cpu_binding == cpu) {
4608 				snprintf(&cpu_list[j], 256 - j, "%d ", i);
4609 				j = strlen(cpu_list);
4610 			}
4611 		}
4612 		if (j)
4613 			dev_info(&pdev->dev, "CPU %d: WQs %s\n", cpu, cpu_list);
4614 	}
4615 
4616 	INIT_WORK(&dd->work[0].work, mtip_workq_sdbf0);
4617 	INIT_WORK(&dd->work[1].work, mtip_workq_sdbf1);
4618 	INIT_WORK(&dd->work[2].work, mtip_workq_sdbf2);
4619 	INIT_WORK(&dd->work[3].work, mtip_workq_sdbf3);
4620 	INIT_WORK(&dd->work[4].work, mtip_workq_sdbf4);
4621 	INIT_WORK(&dd->work[5].work, mtip_workq_sdbf5);
4622 	INIT_WORK(&dd->work[6].work, mtip_workq_sdbf6);
4623 	INIT_WORK(&dd->work[7].work, mtip_workq_sdbf7);
4624 
4625 	pci_set_master(pdev);
4626 	rv = pci_enable_msi(pdev);
4627 	if (rv) {
4628 		dev_warn(&pdev->dev,
4629 			"Unable to enable MSI interrupt.\n");
4630 		goto msi_initialize_err;
4631 	}
4632 
4633 	/* Initialize the block layer. */
4634 	rv = mtip_block_initialize(dd);
4635 	if (rv < 0) {
4636 		dev_err(&pdev->dev,
4637 			"Unable to initialize block layer\n");
4638 		goto block_initialize_err;
4639 	}
4640 
4641 	/*
4642 	 * Increment the instance count so that each device has a unique
4643 	 * instance number.
4644 	 */
4645 	instance++;
4646 	if (rv != MTIP_FTL_REBUILD_MAGIC)
4647 		set_bit(MTIP_DDF_INIT_DONE_BIT, &dd->dd_flag);
4648 	else
4649 		rv = 0; /* device in rebuild state, return 0 from probe */
4650 
4651 	/* Add to online list even if in ftl rebuild */
4652 	spin_lock_irqsave(&dev_lock, flags);
4653 	list_add(&dd->online_list, &online_list);
4654 	spin_unlock_irqrestore(&dev_lock, flags);
4655 
4656 	goto done;
4657 
4658 block_initialize_err:
4659 	pci_disable_msi(pdev);
4660 
4661 msi_initialize_err:
4662 	if (dd->isr_workq) {
4663 		flush_workqueue(dd->isr_workq);
4664 		destroy_workqueue(dd->isr_workq);
4665 		drop_cpu(dd->work[0].cpu_binding);
4666 		drop_cpu(dd->work[1].cpu_binding);
4667 		drop_cpu(dd->work[2].cpu_binding);
4668 	}
4669 setmask_err:
4670 	pcim_iounmap_regions(pdev, 1 << MTIP_ABAR);
4671 
4672 iomap_err:
4673 	kfree(dd);
4674 	pci_set_drvdata(pdev, NULL);
4675 	return rv;
4676 done:
4677 	return rv;
4678 }
4679 
4680 /*
4681  * Called for each probed device when the device is removed or the
4682  * driver is unloaded.
4683  *
4684  * return value
4685  *	None
4686  */
4687 static void mtip_pci_remove(struct pci_dev *pdev)
4688 {
4689 	struct driver_data *dd = pci_get_drvdata(pdev);
4690 	unsigned long flags, to;
4691 
4692 	set_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag);
4693 
4694 	spin_lock_irqsave(&dev_lock, flags);
4695 	list_del_init(&dd->online_list);
4696 	list_add(&dd->remove_list, &removing_list);
4697 	spin_unlock_irqrestore(&dev_lock, flags);
4698 
4699 	mtip_check_surprise_removal(pdev);
4700 	synchronize_irq(dd->pdev->irq);
4701 
4702 	/* Spin until workers are done */
4703 	to = jiffies + msecs_to_jiffies(4000);
4704 	do {
4705 		msleep(20);
4706 	} while (atomic_read(&dd->irq_workers_active) != 0 &&
4707 		time_before(jiffies, to));
4708 
4709 	if (atomic_read(&dd->irq_workers_active) != 0) {
4710 		dev_warn(&dd->pdev->dev,
4711 			"Completion workers still active!\n");
4712 	}
4713 	/* Cleanup the outstanding commands */
4714 	mtip_command_cleanup(dd);
4715 
4716 	/* Clean up the block layer. */
4717 	mtip_block_remove(dd);
4718 
4719 	if (dd->isr_workq) {
4720 		flush_workqueue(dd->isr_workq);
4721 		destroy_workqueue(dd->isr_workq);
4722 		drop_cpu(dd->work[0].cpu_binding);
4723 		drop_cpu(dd->work[1].cpu_binding);
4724 		drop_cpu(dd->work[2].cpu_binding);
4725 	}
4726 
4727 	pci_disable_msi(pdev);
4728 
4729 	spin_lock_irqsave(&dev_lock, flags);
4730 	list_del_init(&dd->remove_list);
4731 	spin_unlock_irqrestore(&dev_lock, flags);
4732 
4733 	if (!dd->sr)
4734 		kfree(dd);
4735 	else
4736 		set_bit(MTIP_DDF_REMOVE_DONE_BIT, &dd->dd_flag);
4737 
4738 	pcim_iounmap_regions(pdev, 1 << MTIP_ABAR);
4739 	pci_set_drvdata(pdev, NULL);
4740 	pci_dev_put(pdev);
4741 
4742 }
4743 
4744 /*
4745  * Called for each probed device when the device is suspended.
4746  *
4747  * return value
4748  *	0  Success
4749  *	<0 Error
4750  */
4751 static int mtip_pci_suspend(struct pci_dev *pdev, pm_message_t mesg)
4752 {
4753 	int rv = 0;
4754 	struct driver_data *dd = pci_get_drvdata(pdev);
4755 
4756 	if (!dd) {
4757 		dev_err(&pdev->dev,
4758 			"Driver private datastructure is NULL\n");
4759 		return -EFAULT;
4760 	}
4761 
4762 	set_bit(MTIP_DDF_RESUME_BIT, &dd->dd_flag);
4763 
4764 	/* Disable ports & interrupts then send standby immediate */
4765 	rv = mtip_block_suspend(dd);
4766 	if (rv < 0) {
4767 		dev_err(&pdev->dev,
4768 			"Failed to suspend controller\n");
4769 		return rv;
4770 	}
4771 
4772 	/*
4773 	 * Save the pci config space to pdev structure &
4774 	 * disable the device
4775 	 */
4776 	pci_save_state(pdev);
4777 	pci_disable_device(pdev);
4778 
4779 	/* Move to Low power state*/
4780 	pci_set_power_state(pdev, PCI_D3hot);
4781 
4782 	return rv;
4783 }
4784 
4785 /*
4786  * Called for each probed device when the device is resumed.
4787  *
4788  * return value
4789  *      0  Success
4790  *      <0 Error
4791  */
4792 static int mtip_pci_resume(struct pci_dev *pdev)
4793 {
4794 	int rv = 0;
4795 	struct driver_data *dd;
4796 
4797 	dd = pci_get_drvdata(pdev);
4798 	if (!dd) {
4799 		dev_err(&pdev->dev,
4800 			"Driver private datastructure is NULL\n");
4801 		return -EFAULT;
4802 	}
4803 
4804 	/* Move the device to active State */
4805 	pci_set_power_state(pdev, PCI_D0);
4806 
4807 	/* Restore PCI configuration space */
4808 	pci_restore_state(pdev);
4809 
4810 	/* Enable the PCI device*/
4811 	rv = pcim_enable_device(pdev);
4812 	if (rv < 0) {
4813 		dev_err(&pdev->dev,
4814 			"Failed to enable card during resume\n");
4815 		goto err;
4816 	}
4817 	pci_set_master(pdev);
4818 
4819 	/*
4820 	 * Calls hbaReset, initPort, & startPort function
4821 	 * then enables interrupts
4822 	 */
4823 	rv = mtip_block_resume(dd);
4824 	if (rv < 0)
4825 		dev_err(&pdev->dev, "Unable to resume\n");
4826 
4827 err:
4828 	clear_bit(MTIP_DDF_RESUME_BIT, &dd->dd_flag);
4829 
4830 	return rv;
4831 }
4832 
4833 /*
4834  * Shutdown routine
4835  *
4836  * return value
4837  *      None
4838  */
4839 static void mtip_pci_shutdown(struct pci_dev *pdev)
4840 {
4841 	struct driver_data *dd = pci_get_drvdata(pdev);
4842 	if (dd)
4843 		mtip_block_shutdown(dd);
4844 }
4845 
4846 /* Table of device ids supported by this driver. */
4847 static DEFINE_PCI_DEVICE_TABLE(mtip_pci_tbl) = {
4848 	{ PCI_DEVICE(PCI_VENDOR_ID_MICRON, P320H_DEVICE_ID) },
4849 	{ PCI_DEVICE(PCI_VENDOR_ID_MICRON, P320M_DEVICE_ID) },
4850 	{ PCI_DEVICE(PCI_VENDOR_ID_MICRON, P320S_DEVICE_ID) },
4851 	{ PCI_DEVICE(PCI_VENDOR_ID_MICRON, P325M_DEVICE_ID) },
4852 	{ PCI_DEVICE(PCI_VENDOR_ID_MICRON, P420H_DEVICE_ID) },
4853 	{ PCI_DEVICE(PCI_VENDOR_ID_MICRON, P420M_DEVICE_ID) },
4854 	{ PCI_DEVICE(PCI_VENDOR_ID_MICRON, P425M_DEVICE_ID) },
4855 	{ 0 }
4856 };
4857 
4858 /* Structure that describes the PCI driver functions. */
4859 static struct pci_driver mtip_pci_driver = {
4860 	.name			= MTIP_DRV_NAME,
4861 	.id_table		= mtip_pci_tbl,
4862 	.probe			= mtip_pci_probe,
4863 	.remove			= mtip_pci_remove,
4864 	.suspend		= mtip_pci_suspend,
4865 	.resume			= mtip_pci_resume,
4866 	.shutdown		= mtip_pci_shutdown,
4867 };
4868 
4869 MODULE_DEVICE_TABLE(pci, mtip_pci_tbl);
4870 
4871 /*
4872  * Module initialization function.
4873  *
4874  * Called once when the module is loaded. This function allocates a major
4875  * block device number to the Cyclone devices and registers the PCI layer
4876  * of the driver.
4877  *
4878  * Return value
4879  *      0 on success else error code.
4880  */
4881 static int __init mtip_init(void)
4882 {
4883 	int error;
4884 
4885 	pr_info(MTIP_DRV_NAME " Version " MTIP_DRV_VERSION "\n");
4886 
4887 	spin_lock_init(&dev_lock);
4888 
4889 	INIT_LIST_HEAD(&online_list);
4890 	INIT_LIST_HEAD(&removing_list);
4891 
4892 	/* Allocate a major block device number to use with this driver. */
4893 	error = register_blkdev(0, MTIP_DRV_NAME);
4894 	if (error <= 0) {
4895 		pr_err("Unable to register block device (%d)\n",
4896 		error);
4897 		return -EBUSY;
4898 	}
4899 	mtip_major = error;
4900 
4901 	dfs_parent = debugfs_create_dir("rssd", NULL);
4902 	if (IS_ERR_OR_NULL(dfs_parent)) {
4903 		pr_warn("Error creating debugfs parent\n");
4904 		dfs_parent = NULL;
4905 	}
4906 	if (dfs_parent) {
4907 		dfs_device_status = debugfs_create_file("device_status",
4908 					S_IRUGO, dfs_parent, NULL,
4909 					&mtip_device_status_fops);
4910 		if (IS_ERR_OR_NULL(dfs_device_status)) {
4911 			pr_err("Error creating device_status node\n");
4912 			dfs_device_status = NULL;
4913 		}
4914 	}
4915 
4916 	/* Register our PCI operations. */
4917 	error = pci_register_driver(&mtip_pci_driver);
4918 	if (error) {
4919 		debugfs_remove(dfs_parent);
4920 		unregister_blkdev(mtip_major, MTIP_DRV_NAME);
4921 	}
4922 
4923 	return error;
4924 }
4925 
4926 /*
4927  * Module de-initialization function.
4928  *
4929  * Called once when the module is unloaded. This function deallocates
4930  * the major block device number allocated by mtip_init() and
4931  * unregisters the PCI layer of the driver.
4932  *
4933  * Return value
4934  *      none
4935  */
4936 static void __exit mtip_exit(void)
4937 {
4938 	debugfs_remove_recursive(dfs_parent);
4939 
4940 	/* Release the allocated major block device number. */
4941 	unregister_blkdev(mtip_major, MTIP_DRV_NAME);
4942 
4943 	/* Unregister the PCI driver. */
4944 	pci_unregister_driver(&mtip_pci_driver);
4945 }
4946 
4947 MODULE_AUTHOR("Micron Technology, Inc");
4948 MODULE_DESCRIPTION("Micron RealSSD PCIe Block Driver");
4949 MODULE_LICENSE("GPL");
4950 MODULE_VERSION(MTIP_DRV_VERSION);
4951 
4952 module_init(mtip_init);
4953 module_exit(mtip_exit);
4954