xref: /openbmc/qemu/hw/ide/ahci.c (revision 91bfcdb0)
1 /*
2  * QEMU AHCI Emulation
3  *
4  * Copyright (c) 2010 qiaochong@loongson.cn
5  * Copyright (c) 2010 Roland Elek <elek.roland@gmail.com>
6  * Copyright (c) 2010 Sebastian Herbszt <herbszt@gmx.de>
7  * Copyright (c) 2010 Alexander Graf <agraf@suse.de>
8  *
9  * This library is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public
11  * License as published by the Free Software Foundation; either
12  * version 2 of the License, or (at your option) any later version.
13  *
14  * This library is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
21  *
22  */
23 
24 #include <hw/hw.h>
25 #include <hw/pci/msi.h>
26 #include <hw/i386/pc.h>
27 #include <hw/pci/pci.h>
28 
29 #include "qemu/error-report.h"
30 #include "sysemu/block-backend.h"
31 #include "sysemu/dma.h"
32 #include "internal.h"
33 #include <hw/ide/pci.h>
34 #include <hw/ide/ahci.h>
35 
36 #define DEBUG_AHCI 0
37 
38 #define DPRINTF(port, fmt, ...) \
39 do { \
40     if (DEBUG_AHCI) { \
41         fprintf(stderr, "ahci: %s: [%d] ", __func__, port); \
42         fprintf(stderr, fmt, ## __VA_ARGS__); \
43     } \
44 } while (0)
45 
46 static void check_cmd(AHCIState *s, int port);
47 static int handle_cmd(AHCIState *s, int port, uint8_t slot);
48 static void ahci_reset_port(AHCIState *s, int port);
49 static bool ahci_write_fis_d2h(AHCIDevice *ad);
50 static void ahci_init_d2h(AHCIDevice *ad);
51 static int ahci_dma_prepare_buf(IDEDMA *dma, int32_t limit);
52 static bool ahci_map_clb_address(AHCIDevice *ad);
53 static bool ahci_map_fis_address(AHCIDevice *ad);
54 static void ahci_unmap_clb_address(AHCIDevice *ad);
55 static void ahci_unmap_fis_address(AHCIDevice *ad);
56 
57 
58 static uint32_t  ahci_port_read(AHCIState *s, int port, int offset)
59 {
60     uint32_t val;
61     AHCIPortRegs *pr;
62     pr = &s->dev[port].port_regs;
63 
64     switch (offset) {
65     case PORT_LST_ADDR:
66         val = pr->lst_addr;
67         break;
68     case PORT_LST_ADDR_HI:
69         val = pr->lst_addr_hi;
70         break;
71     case PORT_FIS_ADDR:
72         val = pr->fis_addr;
73         break;
74     case PORT_FIS_ADDR_HI:
75         val = pr->fis_addr_hi;
76         break;
77     case PORT_IRQ_STAT:
78         val = pr->irq_stat;
79         break;
80     case PORT_IRQ_MASK:
81         val = pr->irq_mask;
82         break;
83     case PORT_CMD:
84         val = pr->cmd;
85         break;
86     case PORT_TFDATA:
87         val = pr->tfdata;
88         break;
89     case PORT_SIG:
90         val = pr->sig;
91         break;
92     case PORT_SCR_STAT:
93         if (s->dev[port].port.ifs[0].blk) {
94             val = SATA_SCR_SSTATUS_DET_DEV_PRESENT_PHY_UP |
95                   SATA_SCR_SSTATUS_SPD_GEN1 | SATA_SCR_SSTATUS_IPM_ACTIVE;
96         } else {
97             val = SATA_SCR_SSTATUS_DET_NODEV;
98         }
99         break;
100     case PORT_SCR_CTL:
101         val = pr->scr_ctl;
102         break;
103     case PORT_SCR_ERR:
104         val = pr->scr_err;
105         break;
106     case PORT_SCR_ACT:
107         val = pr->scr_act;
108         break;
109     case PORT_CMD_ISSUE:
110         val = pr->cmd_issue;
111         break;
112     case PORT_RESERVED:
113     default:
114         val = 0;
115     }
116     DPRINTF(port, "offset: 0x%x val: 0x%x\n", offset, val);
117     return val;
118 
119 }
120 
121 static void ahci_irq_raise(AHCIState *s, AHCIDevice *dev)
122 {
123     DeviceState *dev_state = s->container;
124     PCIDevice *pci_dev = (PCIDevice *) object_dynamic_cast(OBJECT(dev_state),
125                                                            TYPE_PCI_DEVICE);
126 
127     DPRINTF(0, "raise irq\n");
128 
129     if (pci_dev && msi_enabled(pci_dev)) {
130         msi_notify(pci_dev, 0);
131     } else {
132         qemu_irq_raise(s->irq);
133     }
134 }
135 
136 static void ahci_irq_lower(AHCIState *s, AHCIDevice *dev)
137 {
138     DeviceState *dev_state = s->container;
139     PCIDevice *pci_dev = (PCIDevice *) object_dynamic_cast(OBJECT(dev_state),
140                                                            TYPE_PCI_DEVICE);
141 
142     DPRINTF(0, "lower irq\n");
143 
144     if (!pci_dev || !msi_enabled(pci_dev)) {
145         qemu_irq_lower(s->irq);
146     }
147 }
148 
149 static void ahci_check_irq(AHCIState *s)
150 {
151     int i;
152 
153     DPRINTF(-1, "check irq %#x\n", s->control_regs.irqstatus);
154 
155     s->control_regs.irqstatus = 0;
156     for (i = 0; i < s->ports; i++) {
157         AHCIPortRegs *pr = &s->dev[i].port_regs;
158         if (pr->irq_stat & pr->irq_mask) {
159             s->control_regs.irqstatus |= (1 << i);
160         }
161     }
162 
163     if (s->control_regs.irqstatus &&
164         (s->control_regs.ghc & HOST_CTL_IRQ_EN)) {
165             ahci_irq_raise(s, NULL);
166     } else {
167         ahci_irq_lower(s, NULL);
168     }
169 }
170 
171 static void ahci_trigger_irq(AHCIState *s, AHCIDevice *d,
172                              int irq_type)
173 {
174     DPRINTF(d->port_no, "trigger irq %#x -> %x\n",
175             irq_type, d->port_regs.irq_mask & irq_type);
176 
177     d->port_regs.irq_stat |= irq_type;
178     ahci_check_irq(s);
179 }
180 
181 static void map_page(AddressSpace *as, uint8_t **ptr, uint64_t addr,
182                      uint32_t wanted)
183 {
184     hwaddr len = wanted;
185 
186     if (*ptr) {
187         dma_memory_unmap(as, *ptr, len, DMA_DIRECTION_FROM_DEVICE, len);
188     }
189 
190     *ptr = dma_memory_map(as, addr, &len, DMA_DIRECTION_FROM_DEVICE);
191     if (len < wanted) {
192         dma_memory_unmap(as, *ptr, len, DMA_DIRECTION_FROM_DEVICE, len);
193         *ptr = NULL;
194     }
195 }
196 
197 /**
198  * Check the cmd register to see if we should start or stop
199  * the DMA or FIS RX engines.
200  *
201  * @ad: Device to engage.
202  * @allow_stop: Allow device to transition from started to stopped?
203  *   'no' is useful for migration post_load, which does not expect a transition.
204  *
205  * @return 0 on success, -1 on error.
206  */
207 static int ahci_cond_start_engines(AHCIDevice *ad, bool allow_stop)
208 {
209     AHCIPortRegs *pr = &ad->port_regs;
210 
211     if (pr->cmd & PORT_CMD_START) {
212         if (ahci_map_clb_address(ad)) {
213             pr->cmd |= PORT_CMD_LIST_ON;
214         } else {
215             error_report("AHCI: Failed to start DMA engine: "
216                          "bad command list buffer address");
217             return -1;
218         }
219     } else if (pr->cmd & PORT_CMD_LIST_ON) {
220         if (allow_stop) {
221             ahci_unmap_clb_address(ad);
222             pr->cmd = pr->cmd & ~(PORT_CMD_LIST_ON);
223         } else {
224             error_report("AHCI: DMA engine should be off, "
225                          "but appears to still be running");
226             return -1;
227         }
228     }
229 
230     if (pr->cmd & PORT_CMD_FIS_RX) {
231         if (ahci_map_fis_address(ad)) {
232             pr->cmd |= PORT_CMD_FIS_ON;
233         } else {
234             error_report("AHCI: Failed to start FIS receive engine: "
235                          "bad FIS receive buffer address");
236             return -1;
237         }
238     } else if (pr->cmd & PORT_CMD_FIS_ON) {
239         if (allow_stop) {
240             ahci_unmap_fis_address(ad);
241             pr->cmd = pr->cmd & ~(PORT_CMD_FIS_ON);
242         } else {
243             error_report("AHCI: FIS receive engine should be off, "
244                          "but appears to still be running");
245             return -1;
246         }
247     }
248 
249     return 0;
250 }
251 
252 static void  ahci_port_write(AHCIState *s, int port, int offset, uint32_t val)
253 {
254     AHCIPortRegs *pr = &s->dev[port].port_regs;
255 
256     DPRINTF(port, "offset: 0x%x val: 0x%x\n", offset, val);
257     switch (offset) {
258         case PORT_LST_ADDR:
259             pr->lst_addr = val;
260             break;
261         case PORT_LST_ADDR_HI:
262             pr->lst_addr_hi = val;
263             break;
264         case PORT_FIS_ADDR:
265             pr->fis_addr = val;
266             break;
267         case PORT_FIS_ADDR_HI:
268             pr->fis_addr_hi = val;
269             break;
270         case PORT_IRQ_STAT:
271             pr->irq_stat &= ~val;
272             ahci_check_irq(s);
273             break;
274         case PORT_IRQ_MASK:
275             pr->irq_mask = val & 0xfdc000ff;
276             ahci_check_irq(s);
277             break;
278         case PORT_CMD:
279             /* Block any Read-only fields from being set;
280              * including LIST_ON and FIS_ON.
281              * The spec requires to set ICC bits to zero after the ICC change
282              * is done. We don't support ICC state changes, therefore always
283              * force the ICC bits to zero.
284              */
285             pr->cmd = (pr->cmd & PORT_CMD_RO_MASK) |
286                       (val & ~(PORT_CMD_RO_MASK|PORT_CMD_ICC_MASK));
287 
288             /* Check FIS RX and CLB engines, allow transition to false: */
289             ahci_cond_start_engines(&s->dev[port], true);
290 
291             /* XXX usually the FIS would be pending on the bus here and
292                    issuing deferred until the OS enables FIS receival.
293                    Instead, we only submit it once - which works in most
294                    cases, but is a hack. */
295             if ((pr->cmd & PORT_CMD_FIS_ON) &&
296                 !s->dev[port].init_d2h_sent) {
297                 ahci_init_d2h(&s->dev[port]);
298             }
299 
300             check_cmd(s, port);
301             break;
302         case PORT_TFDATA:
303             /* Read Only. */
304             break;
305         case PORT_SIG:
306             /* Read Only */
307             break;
308         case PORT_SCR_STAT:
309             /* Read Only */
310             break;
311         case PORT_SCR_CTL:
312             if (((pr->scr_ctl & AHCI_SCR_SCTL_DET) == 1) &&
313                 ((val & AHCI_SCR_SCTL_DET) == 0)) {
314                 ahci_reset_port(s, port);
315             }
316             pr->scr_ctl = val;
317             break;
318         case PORT_SCR_ERR:
319             pr->scr_err &= ~val;
320             break;
321         case PORT_SCR_ACT:
322             /* RW1 */
323             pr->scr_act |= val;
324             break;
325         case PORT_CMD_ISSUE:
326             pr->cmd_issue |= val;
327             check_cmd(s, port);
328             break;
329         default:
330             break;
331     }
332 }
333 
334 static uint64_t ahci_mem_read_32(void *opaque, hwaddr addr)
335 {
336     AHCIState *s = opaque;
337     uint32_t val = 0;
338 
339     if (addr < AHCI_GENERIC_HOST_CONTROL_REGS_MAX_ADDR) {
340         switch (addr) {
341         case HOST_CAP:
342             val = s->control_regs.cap;
343             break;
344         case HOST_CTL:
345             val = s->control_regs.ghc;
346             break;
347         case HOST_IRQ_STAT:
348             val = s->control_regs.irqstatus;
349             break;
350         case HOST_PORTS_IMPL:
351             val = s->control_regs.impl;
352             break;
353         case HOST_VERSION:
354             val = s->control_regs.version;
355             break;
356         }
357 
358         DPRINTF(-1, "(addr 0x%08X), val 0x%08X\n", (unsigned) addr, val);
359     } else if ((addr >= AHCI_PORT_REGS_START_ADDR) &&
360                (addr < (AHCI_PORT_REGS_START_ADDR +
361                 (s->ports * AHCI_PORT_ADDR_OFFSET_LEN)))) {
362         val = ahci_port_read(s, (addr - AHCI_PORT_REGS_START_ADDR) >> 7,
363                              addr & AHCI_PORT_ADDR_OFFSET_MASK);
364     }
365 
366     return val;
367 }
368 
369 
370 /**
371  * AHCI 1.3 section 3 ("HBA Memory Registers")
372  * Support unaligned 8/16/32 bit reads, and 64 bit aligned reads.
373  * Caller is responsible for masking unwanted higher order bytes.
374  */
375 static uint64_t ahci_mem_read(void *opaque, hwaddr addr, unsigned size)
376 {
377     hwaddr aligned = addr & ~0x3;
378     int ofst = addr - aligned;
379     uint64_t lo = ahci_mem_read_32(opaque, aligned);
380     uint64_t hi;
381 
382     /* if < 8 byte read does not cross 4 byte boundary */
383     if (ofst + size <= 4) {
384         return lo >> (ofst * 8);
385     }
386     g_assert_cmpint(size, >, 1);
387 
388     /* If the 64bit read is unaligned, we will produce undefined
389      * results. AHCI does not support unaligned 64bit reads. */
390     hi = ahci_mem_read_32(opaque, aligned + 4);
391     return (hi << 32 | lo) >> (ofst * 8);
392 }
393 
394 
395 static void ahci_mem_write(void *opaque, hwaddr addr,
396                            uint64_t val, unsigned size)
397 {
398     AHCIState *s = opaque;
399 
400     /* Only aligned reads are allowed on AHCI */
401     if (addr & 3) {
402         fprintf(stderr, "ahci: Mis-aligned write to addr 0x"
403                 TARGET_FMT_plx "\n", addr);
404         return;
405     }
406 
407     if (addr < AHCI_GENERIC_HOST_CONTROL_REGS_MAX_ADDR) {
408         DPRINTF(-1, "(addr 0x%08X), val 0x%08"PRIX64"\n", (unsigned) addr, val);
409 
410         switch (addr) {
411             case HOST_CAP: /* R/WO, RO */
412                 /* FIXME handle R/WO */
413                 break;
414             case HOST_CTL: /* R/W */
415                 if (val & HOST_CTL_RESET) {
416                     DPRINTF(-1, "HBA Reset\n");
417                     ahci_reset(s);
418                 } else {
419                     s->control_regs.ghc = (val & 0x3) | HOST_CTL_AHCI_EN;
420                     ahci_check_irq(s);
421                 }
422                 break;
423             case HOST_IRQ_STAT: /* R/WC, RO */
424                 s->control_regs.irqstatus &= ~val;
425                 ahci_check_irq(s);
426                 break;
427             case HOST_PORTS_IMPL: /* R/WO, RO */
428                 /* FIXME handle R/WO */
429                 break;
430             case HOST_VERSION: /* RO */
431                 /* FIXME report write? */
432                 break;
433             default:
434                 DPRINTF(-1, "write to unknown register 0x%x\n", (unsigned)addr);
435         }
436     } else if ((addr >= AHCI_PORT_REGS_START_ADDR) &&
437                (addr < (AHCI_PORT_REGS_START_ADDR +
438                 (s->ports * AHCI_PORT_ADDR_OFFSET_LEN)))) {
439         ahci_port_write(s, (addr - AHCI_PORT_REGS_START_ADDR) >> 7,
440                         addr & AHCI_PORT_ADDR_OFFSET_MASK, val);
441     }
442 
443 }
444 
445 static const MemoryRegionOps ahci_mem_ops = {
446     .read = ahci_mem_read,
447     .write = ahci_mem_write,
448     .endianness = DEVICE_LITTLE_ENDIAN,
449 };
450 
451 static uint64_t ahci_idp_read(void *opaque, hwaddr addr,
452                               unsigned size)
453 {
454     AHCIState *s = opaque;
455 
456     if (addr == s->idp_offset) {
457         /* index register */
458         return s->idp_index;
459     } else if (addr == s->idp_offset + 4) {
460         /* data register - do memory read at location selected by index */
461         return ahci_mem_read(opaque, s->idp_index, size);
462     } else {
463         return 0;
464     }
465 }
466 
467 static void ahci_idp_write(void *opaque, hwaddr addr,
468                            uint64_t val, unsigned size)
469 {
470     AHCIState *s = opaque;
471 
472     if (addr == s->idp_offset) {
473         /* index register - mask off reserved bits */
474         s->idp_index = (uint32_t)val & ((AHCI_MEM_BAR_SIZE - 1) & ~3);
475     } else if (addr == s->idp_offset + 4) {
476         /* data register - do memory write at location selected by index */
477         ahci_mem_write(opaque, s->idp_index, val, size);
478     }
479 }
480 
481 static const MemoryRegionOps ahci_idp_ops = {
482     .read = ahci_idp_read,
483     .write = ahci_idp_write,
484     .endianness = DEVICE_LITTLE_ENDIAN,
485 };
486 
487 
488 static void ahci_reg_init(AHCIState *s)
489 {
490     int i;
491 
492     s->control_regs.cap = (s->ports - 1) |
493                           (AHCI_NUM_COMMAND_SLOTS << 8) |
494                           (AHCI_SUPPORTED_SPEED_GEN1 << AHCI_SUPPORTED_SPEED) |
495                           HOST_CAP_NCQ | HOST_CAP_AHCI;
496 
497     s->control_regs.impl = (1 << s->ports) - 1;
498 
499     s->control_regs.version = AHCI_VERSION_1_0;
500 
501     for (i = 0; i < s->ports; i++) {
502         s->dev[i].port_state = STATE_RUN;
503     }
504 }
505 
506 static void check_cmd(AHCIState *s, int port)
507 {
508     AHCIPortRegs *pr = &s->dev[port].port_regs;
509     uint8_t slot;
510 
511     if ((pr->cmd & PORT_CMD_START) && pr->cmd_issue) {
512         for (slot = 0; (slot < 32) && pr->cmd_issue; slot++) {
513             if ((pr->cmd_issue & (1U << slot)) &&
514                 !handle_cmd(s, port, slot)) {
515                 pr->cmd_issue &= ~(1U << slot);
516             }
517         }
518     }
519 }
520 
521 static void ahci_check_cmd_bh(void *opaque)
522 {
523     AHCIDevice *ad = opaque;
524 
525     qemu_bh_delete(ad->check_bh);
526     ad->check_bh = NULL;
527 
528     if ((ad->busy_slot != -1) &&
529         !(ad->port.ifs[0].status & (BUSY_STAT|DRQ_STAT))) {
530         /* no longer busy */
531         ad->port_regs.cmd_issue &= ~(1 << ad->busy_slot);
532         ad->busy_slot = -1;
533     }
534 
535     check_cmd(ad->hba, ad->port_no);
536 }
537 
538 static void ahci_init_d2h(AHCIDevice *ad)
539 {
540     IDEState *ide_state = &ad->port.ifs[0];
541     AHCIPortRegs *pr = &ad->port_regs;
542 
543     if (ad->init_d2h_sent) {
544         return;
545     }
546 
547     if (ahci_write_fis_d2h(ad)) {
548         ad->init_d2h_sent = true;
549         /* We're emulating receiving the first Reg H2D Fis from the device;
550          * Update the SIG register, but otherwise proceed as normal. */
551         pr->sig = ((uint32_t)ide_state->hcyl << 24) |
552             (ide_state->lcyl << 16) |
553             (ide_state->sector << 8) |
554             (ide_state->nsector & 0xFF);
555     }
556 }
557 
558 static void ahci_set_signature(AHCIDevice *ad, uint32_t sig)
559 {
560     IDEState *s = &ad->port.ifs[0];
561     s->hcyl = sig >> 24 & 0xFF;
562     s->lcyl = sig >> 16 & 0xFF;
563     s->sector = sig >> 8 & 0xFF;
564     s->nsector = sig & 0xFF;
565 
566     DPRINTF(ad->port_no, "set hcyl:lcyl:sect:nsect = 0x%08x\n", sig);
567 }
568 
569 static void ahci_reset_port(AHCIState *s, int port)
570 {
571     AHCIDevice *d = &s->dev[port];
572     AHCIPortRegs *pr = &d->port_regs;
573     IDEState *ide_state = &d->port.ifs[0];
574     int i;
575 
576     DPRINTF(port, "reset port\n");
577 
578     ide_bus_reset(&d->port);
579     ide_state->ncq_queues = AHCI_MAX_CMDS;
580 
581     pr->scr_stat = 0;
582     pr->scr_err = 0;
583     pr->scr_act = 0;
584     pr->tfdata = 0x7F;
585     pr->sig = 0xFFFFFFFF;
586     d->busy_slot = -1;
587     d->init_d2h_sent = false;
588 
589     ide_state = &s->dev[port].port.ifs[0];
590     if (!ide_state->blk) {
591         return;
592     }
593 
594     /* reset ncq queue */
595     for (i = 0; i < AHCI_MAX_CMDS; i++) {
596         NCQTransferState *ncq_tfs = &s->dev[port].ncq_tfs[i];
597         ncq_tfs->halt = false;
598         if (!ncq_tfs->used) {
599             continue;
600         }
601 
602         if (ncq_tfs->aiocb) {
603             blk_aio_cancel(ncq_tfs->aiocb);
604             ncq_tfs->aiocb = NULL;
605         }
606 
607         /* Maybe we just finished the request thanks to blk_aio_cancel() */
608         if (!ncq_tfs->used) {
609             continue;
610         }
611 
612         qemu_sglist_destroy(&ncq_tfs->sglist);
613         ncq_tfs->used = 0;
614     }
615 
616     s->dev[port].port_state = STATE_RUN;
617     if (ide_state->drive_kind == IDE_CD) {
618         ahci_set_signature(d, SATA_SIGNATURE_CDROM);\
619         ide_state->status = SEEK_STAT | WRERR_STAT | READY_STAT;
620     } else {
621         ahci_set_signature(d, SATA_SIGNATURE_DISK);
622         ide_state->status = SEEK_STAT | WRERR_STAT;
623     }
624 
625     ide_state->error = 1;
626     ahci_init_d2h(d);
627 }
628 
629 static void debug_print_fis(uint8_t *fis, int cmd_len)
630 {
631 #if DEBUG_AHCI
632     int i;
633 
634     fprintf(stderr, "fis:");
635     for (i = 0; i < cmd_len; i++) {
636         if ((i & 0xf) == 0) {
637             fprintf(stderr, "\n%02x:",i);
638         }
639         fprintf(stderr, "%02x ",fis[i]);
640     }
641     fprintf(stderr, "\n");
642 #endif
643 }
644 
645 static bool ahci_map_fis_address(AHCIDevice *ad)
646 {
647     AHCIPortRegs *pr = &ad->port_regs;
648     map_page(ad->hba->as, &ad->res_fis,
649              ((uint64_t)pr->fis_addr_hi << 32) | pr->fis_addr, 256);
650     return ad->res_fis != NULL;
651 }
652 
653 static void ahci_unmap_fis_address(AHCIDevice *ad)
654 {
655     dma_memory_unmap(ad->hba->as, ad->res_fis, 256,
656                      DMA_DIRECTION_FROM_DEVICE, 256);
657     ad->res_fis = NULL;
658 }
659 
660 static bool ahci_map_clb_address(AHCIDevice *ad)
661 {
662     AHCIPortRegs *pr = &ad->port_regs;
663     ad->cur_cmd = NULL;
664     map_page(ad->hba->as, &ad->lst,
665              ((uint64_t)pr->lst_addr_hi << 32) | pr->lst_addr, 1024);
666     return ad->lst != NULL;
667 }
668 
669 static void ahci_unmap_clb_address(AHCIDevice *ad)
670 {
671     dma_memory_unmap(ad->hba->as, ad->lst, 1024,
672                      DMA_DIRECTION_FROM_DEVICE, 1024);
673     ad->lst = NULL;
674 }
675 
676 static void ahci_write_fis_sdb(AHCIState *s, NCQTransferState *ncq_tfs)
677 {
678     AHCIDevice *ad = ncq_tfs->drive;
679     AHCIPortRegs *pr = &ad->port_regs;
680     IDEState *ide_state;
681     SDBFIS *sdb_fis;
682 
683     if (!ad->res_fis ||
684         !(pr->cmd & PORT_CMD_FIS_RX)) {
685         return;
686     }
687 
688     sdb_fis = (SDBFIS *)&ad->res_fis[RES_FIS_SDBFIS];
689     ide_state = &ad->port.ifs[0];
690 
691     sdb_fis->type = SATA_FIS_TYPE_SDB;
692     /* Interrupt pending & Notification bit */
693     sdb_fis->flags = 0x40; /* Interrupt bit, always 1 for NCQ */
694     sdb_fis->status = ide_state->status & 0x77;
695     sdb_fis->error = ide_state->error;
696     /* update SAct field in SDB_FIS */
697     sdb_fis->payload = cpu_to_le32(ad->finished);
698 
699     /* Update shadow registers (except BSY 0x80 and DRQ 0x08) */
700     pr->tfdata = (ad->port.ifs[0].error << 8) |
701         (ad->port.ifs[0].status & 0x77) |
702         (pr->tfdata & 0x88);
703     pr->scr_act &= ~ad->finished;
704     ad->finished = 0;
705 
706     /* Trigger IRQ if interrupt bit is set (which currently, it always is) */
707     if (sdb_fis->flags & 0x40) {
708         ahci_trigger_irq(s, ad, PORT_IRQ_SDB_FIS);
709     }
710 }
711 
712 static void ahci_write_fis_pio(AHCIDevice *ad, uint16_t len)
713 {
714     AHCIPortRegs *pr = &ad->port_regs;
715     uint8_t *pio_fis;
716     IDEState *s = &ad->port.ifs[0];
717 
718     if (!ad->res_fis || !(pr->cmd & PORT_CMD_FIS_RX)) {
719         return;
720     }
721 
722     pio_fis = &ad->res_fis[RES_FIS_PSFIS];
723 
724     pio_fis[0] = SATA_FIS_TYPE_PIO_SETUP;
725     pio_fis[1] = (ad->hba->control_regs.irqstatus ? (1 << 6) : 0);
726     pio_fis[2] = s->status;
727     pio_fis[3] = s->error;
728 
729     pio_fis[4] = s->sector;
730     pio_fis[5] = s->lcyl;
731     pio_fis[6] = s->hcyl;
732     pio_fis[7] = s->select;
733     pio_fis[8] = s->hob_sector;
734     pio_fis[9] = s->hob_lcyl;
735     pio_fis[10] = s->hob_hcyl;
736     pio_fis[11] = 0;
737     pio_fis[12] = s->nsector & 0xFF;
738     pio_fis[13] = (s->nsector >> 8) & 0xFF;
739     pio_fis[14] = 0;
740     pio_fis[15] = s->status;
741     pio_fis[16] = len & 255;
742     pio_fis[17] = len >> 8;
743     pio_fis[18] = 0;
744     pio_fis[19] = 0;
745 
746     /* Update shadow registers: */
747     pr->tfdata = (ad->port.ifs[0].error << 8) |
748         ad->port.ifs[0].status;
749 
750     if (pio_fis[2] & ERR_STAT) {
751         ahci_trigger_irq(ad->hba, ad, PORT_IRQ_TF_ERR);
752     }
753 
754     ahci_trigger_irq(ad->hba, ad, PORT_IRQ_PIOS_FIS);
755 }
756 
757 static bool ahci_write_fis_d2h(AHCIDevice *ad)
758 {
759     AHCIPortRegs *pr = &ad->port_regs;
760     uint8_t *d2h_fis;
761     int i;
762     IDEState *s = &ad->port.ifs[0];
763 
764     if (!ad->res_fis || !(pr->cmd & PORT_CMD_FIS_RX)) {
765         return false;
766     }
767 
768     d2h_fis = &ad->res_fis[RES_FIS_RFIS];
769 
770     d2h_fis[0] = SATA_FIS_TYPE_REGISTER_D2H;
771     d2h_fis[1] = (ad->hba->control_regs.irqstatus ? (1 << 6) : 0);
772     d2h_fis[2] = s->status;
773     d2h_fis[3] = s->error;
774 
775     d2h_fis[4] = s->sector;
776     d2h_fis[5] = s->lcyl;
777     d2h_fis[6] = s->hcyl;
778     d2h_fis[7] = s->select;
779     d2h_fis[8] = s->hob_sector;
780     d2h_fis[9] = s->hob_lcyl;
781     d2h_fis[10] = s->hob_hcyl;
782     d2h_fis[11] = 0;
783     d2h_fis[12] = s->nsector & 0xFF;
784     d2h_fis[13] = (s->nsector >> 8) & 0xFF;
785     for (i = 14; i < 20; i++) {
786         d2h_fis[i] = 0;
787     }
788 
789     /* Update shadow registers: */
790     pr->tfdata = (ad->port.ifs[0].error << 8) |
791         ad->port.ifs[0].status;
792 
793     if (d2h_fis[2] & ERR_STAT) {
794         ahci_trigger_irq(ad->hba, ad, PORT_IRQ_TF_ERR);
795     }
796 
797     ahci_trigger_irq(ad->hba, ad, PORT_IRQ_D2H_REG_FIS);
798     return true;
799 }
800 
801 static int prdt_tbl_entry_size(const AHCI_SG *tbl)
802 {
803     /* flags_size is zero-based */
804     return (le32_to_cpu(tbl->flags_size) & AHCI_PRDT_SIZE_MASK) + 1;
805 }
806 
807 static int ahci_populate_sglist(AHCIDevice *ad, QEMUSGList *sglist,
808                                 AHCICmdHdr *cmd, int64_t limit, int32_t offset)
809 {
810     uint16_t opts = le16_to_cpu(cmd->opts);
811     uint16_t prdtl = le16_to_cpu(cmd->prdtl);
812     uint64_t cfis_addr = le64_to_cpu(cmd->tbl_addr);
813     uint64_t prdt_addr = cfis_addr + 0x80;
814     dma_addr_t prdt_len = (prdtl * sizeof(AHCI_SG));
815     dma_addr_t real_prdt_len = prdt_len;
816     uint8_t *prdt;
817     int i;
818     int r = 0;
819     uint64_t sum = 0;
820     int off_idx = -1;
821     int64_t off_pos = -1;
822     int tbl_entry_size;
823     IDEBus *bus = &ad->port;
824     BusState *qbus = BUS(bus);
825 
826     /*
827      * Note: AHCI PRDT can describe up to 256GiB. SATA/ATA only support
828      * transactions of up to 32MiB as of ATA8-ACS3 rev 1b, assuming a
829      * 512 byte sector size. We limit the PRDT in this implementation to
830      * a reasonably large 2GiB, which can accommodate the maximum transfer
831      * request for sector sizes up to 32K.
832      */
833 
834     if (!prdtl) {
835         DPRINTF(ad->port_no, "no sg list given by guest: 0x%08x\n", opts);
836         return -1;
837     }
838 
839     /* map PRDT */
840     if (!(prdt = dma_memory_map(ad->hba->as, prdt_addr, &prdt_len,
841                                 DMA_DIRECTION_TO_DEVICE))){
842         DPRINTF(ad->port_no, "map failed\n");
843         return -1;
844     }
845 
846     if (prdt_len < real_prdt_len) {
847         DPRINTF(ad->port_no, "mapped less than expected\n");
848         r = -1;
849         goto out;
850     }
851 
852     /* Get entries in the PRDT, init a qemu sglist accordingly */
853     if (prdtl > 0) {
854         AHCI_SG *tbl = (AHCI_SG *)prdt;
855         sum = 0;
856         for (i = 0; i < prdtl; i++) {
857             tbl_entry_size = prdt_tbl_entry_size(&tbl[i]);
858             if (offset < (sum + tbl_entry_size)) {
859                 off_idx = i;
860                 off_pos = offset - sum;
861                 break;
862             }
863             sum += tbl_entry_size;
864         }
865         if ((off_idx == -1) || (off_pos < 0) || (off_pos > tbl_entry_size)) {
866             DPRINTF(ad->port_no, "%s: Incorrect offset! "
867                             "off_idx: %d, off_pos: %"PRId64"\n",
868                             __func__, off_idx, off_pos);
869             r = -1;
870             goto out;
871         }
872 
873         qemu_sglist_init(sglist, qbus->parent, (prdtl - off_idx),
874                          ad->hba->as);
875         qemu_sglist_add(sglist, le64_to_cpu(tbl[off_idx].addr) + off_pos,
876                         MIN(prdt_tbl_entry_size(&tbl[off_idx]) - off_pos,
877                             limit));
878 
879         for (i = off_idx + 1; i < prdtl && sglist->size < limit; i++) {
880             qemu_sglist_add(sglist, le64_to_cpu(tbl[i].addr),
881                             MIN(prdt_tbl_entry_size(&tbl[i]),
882                                 limit - sglist->size));
883             if (sglist->size > INT32_MAX) {
884                 error_report("AHCI Physical Region Descriptor Table describes "
885                              "more than 2 GiB.");
886                 qemu_sglist_destroy(sglist);
887                 r = -1;
888                 goto out;
889             }
890         }
891     }
892 
893 out:
894     dma_memory_unmap(ad->hba->as, prdt, prdt_len,
895                      DMA_DIRECTION_TO_DEVICE, prdt_len);
896     return r;
897 }
898 
899 static void ncq_err(NCQTransferState *ncq_tfs)
900 {
901     IDEState *ide_state = &ncq_tfs->drive->port.ifs[0];
902 
903     ide_state->error = ABRT_ERR;
904     ide_state->status = READY_STAT | ERR_STAT;
905     ncq_tfs->drive->port_regs.scr_err |= (1 << ncq_tfs->tag);
906 }
907 
908 static void ncq_finish(NCQTransferState *ncq_tfs)
909 {
910     /* If we didn't error out, set our finished bit. Errored commands
911      * do not get a bit set for the SDB FIS ACT register, nor do they
912      * clear the outstanding bit in scr_act (PxSACT). */
913     if (!(ncq_tfs->drive->port_regs.scr_err & (1 << ncq_tfs->tag))) {
914         ncq_tfs->drive->finished |= (1 << ncq_tfs->tag);
915     }
916 
917     ahci_write_fis_sdb(ncq_tfs->drive->hba, ncq_tfs);
918 
919     DPRINTF(ncq_tfs->drive->port_no, "NCQ transfer tag %d finished\n",
920             ncq_tfs->tag);
921 
922     block_acct_done(blk_get_stats(ncq_tfs->drive->port.ifs[0].blk),
923                     &ncq_tfs->acct);
924     qemu_sglist_destroy(&ncq_tfs->sglist);
925     ncq_tfs->used = 0;
926 }
927 
928 static void ncq_cb(void *opaque, int ret)
929 {
930     NCQTransferState *ncq_tfs = (NCQTransferState *)opaque;
931     IDEState *ide_state = &ncq_tfs->drive->port.ifs[0];
932 
933     if (ret == -ECANCELED) {
934         return;
935     }
936 
937     if (ret < 0) {
938         bool is_read = ncq_tfs->cmd == READ_FPDMA_QUEUED;
939         BlockErrorAction action = blk_get_error_action(ide_state->blk,
940                                                        is_read, -ret);
941         if (action == BLOCK_ERROR_ACTION_STOP) {
942             ncq_tfs->halt = true;
943             ide_state->bus->error_status = IDE_RETRY_HBA;
944         } else if (action == BLOCK_ERROR_ACTION_REPORT) {
945             ncq_err(ncq_tfs);
946         }
947         blk_error_action(ide_state->blk, action, is_read, -ret);
948     } else {
949         ide_state->status = READY_STAT | SEEK_STAT;
950     }
951 
952     if (!ncq_tfs->halt) {
953         ncq_finish(ncq_tfs);
954     }
955 }
956 
957 static int is_ncq(uint8_t ata_cmd)
958 {
959     /* Based on SATA 3.2 section 13.6.3.2 */
960     switch (ata_cmd) {
961     case READ_FPDMA_QUEUED:
962     case WRITE_FPDMA_QUEUED:
963     case NCQ_NON_DATA:
964     case RECEIVE_FPDMA_QUEUED:
965     case SEND_FPDMA_QUEUED:
966         return 1;
967     default:
968         return 0;
969     }
970 }
971 
972 static void execute_ncq_command(NCQTransferState *ncq_tfs)
973 {
974     AHCIDevice *ad = ncq_tfs->drive;
975     IDEState *ide_state = &ad->port.ifs[0];
976     int port = ad->port_no;
977 
978     g_assert(is_ncq(ncq_tfs->cmd));
979     ncq_tfs->halt = false;
980 
981     switch (ncq_tfs->cmd) {
982     case READ_FPDMA_QUEUED:
983         DPRINTF(port, "NCQ reading %d sectors from LBA %"PRId64", tag %d\n",
984                 ncq_tfs->sector_count, ncq_tfs->lba, ncq_tfs->tag);
985 
986         DPRINTF(port, "tag %d aio read %"PRId64"\n",
987                 ncq_tfs->tag, ncq_tfs->lba);
988 
989         dma_acct_start(ide_state->blk, &ncq_tfs->acct,
990                        &ncq_tfs->sglist, BLOCK_ACCT_READ);
991         ncq_tfs->aiocb = dma_blk_read(ide_state->blk, &ncq_tfs->sglist,
992                                       ncq_tfs->lba, ncq_cb, ncq_tfs);
993         break;
994     case WRITE_FPDMA_QUEUED:
995         DPRINTF(port, "NCQ writing %d sectors to LBA %"PRId64", tag %d\n",
996                 ncq_tfs->sector_count, ncq_tfs->lba, ncq_tfs->tag);
997 
998         DPRINTF(port, "tag %d aio write %"PRId64"\n",
999                 ncq_tfs->tag, ncq_tfs->lba);
1000 
1001         dma_acct_start(ide_state->blk, &ncq_tfs->acct,
1002                        &ncq_tfs->sglist, BLOCK_ACCT_WRITE);
1003         ncq_tfs->aiocb = dma_blk_write(ide_state->blk, &ncq_tfs->sglist,
1004                                        ncq_tfs->lba, ncq_cb, ncq_tfs);
1005         break;
1006     default:
1007         DPRINTF(port, "error: unsupported NCQ command (0x%02x) received\n",
1008                 ncq_tfs->cmd);
1009         qemu_sglist_destroy(&ncq_tfs->sglist);
1010         ncq_err(ncq_tfs);
1011     }
1012 }
1013 
1014 
1015 static void process_ncq_command(AHCIState *s, int port, uint8_t *cmd_fis,
1016                                 uint8_t slot)
1017 {
1018     AHCIDevice *ad = &s->dev[port];
1019     IDEState *ide_state = &ad->port.ifs[0];
1020     NCQFrame *ncq_fis = (NCQFrame*)cmd_fis;
1021     uint8_t tag = ncq_fis->tag >> 3;
1022     NCQTransferState *ncq_tfs = &ad->ncq_tfs[tag];
1023     size_t size;
1024 
1025     g_assert(is_ncq(ncq_fis->command));
1026     if (ncq_tfs->used) {
1027         /* error - already in use */
1028         fprintf(stderr, "%s: tag %d already used\n", __FUNCTION__, tag);
1029         return;
1030     }
1031 
1032     ncq_tfs->used = 1;
1033     ncq_tfs->drive = ad;
1034     ncq_tfs->slot = slot;
1035     ncq_tfs->cmdh = &((AHCICmdHdr *)ad->lst)[slot];
1036     ncq_tfs->cmd = ncq_fis->command;
1037     ncq_tfs->lba = ((uint64_t)ncq_fis->lba5 << 40) |
1038                    ((uint64_t)ncq_fis->lba4 << 32) |
1039                    ((uint64_t)ncq_fis->lba3 << 24) |
1040                    ((uint64_t)ncq_fis->lba2 << 16) |
1041                    ((uint64_t)ncq_fis->lba1 << 8) |
1042                    (uint64_t)ncq_fis->lba0;
1043     ncq_tfs->tag = tag;
1044 
1045     /* Sanity-check the NCQ packet */
1046     if (tag != slot) {
1047         DPRINTF(port, "Warn: NCQ slot (%d) did not match the given tag (%d)\n",
1048                 slot, tag);
1049     }
1050 
1051     if (ncq_fis->aux0 || ncq_fis->aux1 || ncq_fis->aux2 || ncq_fis->aux3) {
1052         DPRINTF(port, "Warn: Attempt to use NCQ auxiliary fields.\n");
1053     }
1054     if (ncq_fis->prio || ncq_fis->icc) {
1055         DPRINTF(port, "Warn: Unsupported attempt to use PRIO/ICC fields\n");
1056     }
1057     if (ncq_fis->fua & NCQ_FIS_FUA_MASK) {
1058         DPRINTF(port, "Warn: Unsupported attempt to use Force Unit Access\n");
1059     }
1060     if (ncq_fis->tag & NCQ_FIS_RARC_MASK) {
1061         DPRINTF(port, "Warn: Unsupported attempt to use Rebuild Assist\n");
1062     }
1063 
1064     ncq_tfs->sector_count = ((ncq_fis->sector_count_high << 8) |
1065                              ncq_fis->sector_count_low);
1066     if (!ncq_tfs->sector_count) {
1067         ncq_tfs->sector_count = 0x10000;
1068     }
1069     size = ncq_tfs->sector_count * 512;
1070     ahci_populate_sglist(ad, &ncq_tfs->sglist, ncq_tfs->cmdh, size, 0);
1071 
1072     if (ncq_tfs->sglist.size < size) {
1073         error_report("ahci: PRDT length for NCQ command (0x%zx) "
1074                      "is smaller than the requested size (0x%zx)",
1075                      ncq_tfs->sglist.size, size);
1076         qemu_sglist_destroy(&ncq_tfs->sglist);
1077         ncq_err(ncq_tfs);
1078         ahci_trigger_irq(ad->hba, ad, PORT_IRQ_OVERFLOW);
1079         return;
1080     } else if (ncq_tfs->sglist.size != size) {
1081         DPRINTF(port, "Warn: PRDTL (0x%zx)"
1082                 " does not match requested size (0x%zx)",
1083                 ncq_tfs->sglist.size, size);
1084     }
1085 
1086     DPRINTF(port, "NCQ transfer LBA from %"PRId64" to %"PRId64", "
1087             "drive max %"PRId64"\n",
1088             ncq_tfs->lba, ncq_tfs->lba + ncq_tfs->sector_count - 1,
1089             ide_state->nb_sectors - 1);
1090 
1091     execute_ncq_command(ncq_tfs);
1092 }
1093 
1094 static AHCICmdHdr *get_cmd_header(AHCIState *s, uint8_t port, uint8_t slot)
1095 {
1096     if (port >= s->ports || slot >= AHCI_MAX_CMDS) {
1097         return NULL;
1098     }
1099 
1100     return s->dev[port].lst ? &((AHCICmdHdr *)s->dev[port].lst)[slot] : NULL;
1101 }
1102 
1103 static void handle_reg_h2d_fis(AHCIState *s, int port,
1104                                uint8_t slot, uint8_t *cmd_fis)
1105 {
1106     IDEState *ide_state = &s->dev[port].port.ifs[0];
1107     AHCICmdHdr *cmd = get_cmd_header(s, port, slot);
1108     uint16_t opts = le16_to_cpu(cmd->opts);
1109 
1110     if (cmd_fis[1] & 0x0F) {
1111         DPRINTF(port, "Port Multiplier not supported."
1112                 " cmd_fis[0]=%02x cmd_fis[1]=%02x cmd_fis[2]=%02x\n",
1113                 cmd_fis[0], cmd_fis[1], cmd_fis[2]);
1114         return;
1115     }
1116 
1117     if (cmd_fis[1] & 0x70) {
1118         DPRINTF(port, "Reserved flags set in H2D Register FIS."
1119                 " cmd_fis[0]=%02x cmd_fis[1]=%02x cmd_fis[2]=%02x\n",
1120                 cmd_fis[0], cmd_fis[1], cmd_fis[2]);
1121         return;
1122     }
1123 
1124     if (!(cmd_fis[1] & SATA_FIS_REG_H2D_UPDATE_COMMAND_REGISTER)) {
1125         switch (s->dev[port].port_state) {
1126         case STATE_RUN:
1127             if (cmd_fis[15] & ATA_SRST) {
1128                 s->dev[port].port_state = STATE_RESET;
1129             }
1130             break;
1131         case STATE_RESET:
1132             if (!(cmd_fis[15] & ATA_SRST)) {
1133                 ahci_reset_port(s, port);
1134             }
1135             break;
1136         }
1137         return;
1138     }
1139 
1140     /* Check for NCQ command */
1141     if (is_ncq(cmd_fis[2])) {
1142         process_ncq_command(s, port, cmd_fis, slot);
1143         return;
1144     }
1145 
1146     /* Decompose the FIS:
1147      * AHCI does not interpret FIS packets, it only forwards them.
1148      * SATA 1.0 describes how to decode LBA28 and CHS FIS packets.
1149      * Later specifications, e.g, SATA 3.2, describe LBA48 FIS packets.
1150      *
1151      * ATA4 describes sector number for LBA28/CHS commands.
1152      * ATA6 describes sector number for LBA48 commands.
1153      * ATA8 deprecates CHS fully, describing only LBA28/48.
1154      *
1155      * We dutifully convert the FIS into IDE registers, and allow the
1156      * core layer to interpret them as needed. */
1157     ide_state->feature = cmd_fis[3];
1158     ide_state->sector = cmd_fis[4];      /* LBA 7:0 */
1159     ide_state->lcyl = cmd_fis[5];        /* LBA 15:8  */
1160     ide_state->hcyl = cmd_fis[6];        /* LBA 23:16 */
1161     ide_state->select = cmd_fis[7];      /* LBA 27:24 (LBA28) */
1162     ide_state->hob_sector = cmd_fis[8];  /* LBA 31:24 */
1163     ide_state->hob_lcyl = cmd_fis[9];    /* LBA 39:32 */
1164     ide_state->hob_hcyl = cmd_fis[10];   /* LBA 47:40 */
1165     ide_state->hob_feature = cmd_fis[11];
1166     ide_state->nsector = (int64_t)((cmd_fis[13] << 8) | cmd_fis[12]);
1167     /* 14, 16, 17, 18, 19: Reserved (SATA 1.0) */
1168     /* 15: Only valid when UPDATE_COMMAND not set. */
1169 
1170     /* Copy the ACMD field (ATAPI packet, if any) from the AHCI command
1171      * table to ide_state->io_buffer */
1172     if (opts & AHCI_CMD_ATAPI) {
1173         memcpy(ide_state->io_buffer, &cmd_fis[AHCI_COMMAND_TABLE_ACMD], 0x10);
1174         debug_print_fis(ide_state->io_buffer, 0x10);
1175         s->dev[port].done_atapi_packet = false;
1176         /* XXX send PIO setup FIS */
1177     }
1178 
1179     ide_state->error = 0;
1180 
1181     /* Reset transferred byte counter */
1182     cmd->status = 0;
1183 
1184     /* We're ready to process the command in FIS byte 2. */
1185     ide_exec_cmd(&s->dev[port].port, cmd_fis[2]);
1186 }
1187 
1188 static int handle_cmd(AHCIState *s, int port, uint8_t slot)
1189 {
1190     IDEState *ide_state;
1191     uint64_t tbl_addr;
1192     AHCICmdHdr *cmd;
1193     uint8_t *cmd_fis;
1194     dma_addr_t cmd_len;
1195 
1196     if (s->dev[port].port.ifs[0].status & (BUSY_STAT|DRQ_STAT)) {
1197         /* Engine currently busy, try again later */
1198         DPRINTF(port, "engine busy\n");
1199         return -1;
1200     }
1201 
1202     if (!s->dev[port].lst) {
1203         DPRINTF(port, "error: lst not given but cmd handled");
1204         return -1;
1205     }
1206     cmd = get_cmd_header(s, port, slot);
1207     /* remember current slot handle for later */
1208     s->dev[port].cur_cmd = cmd;
1209 
1210     /* The device we are working for */
1211     ide_state = &s->dev[port].port.ifs[0];
1212     if (!ide_state->blk) {
1213         DPRINTF(port, "error: guest accessed unused port");
1214         return -1;
1215     }
1216 
1217     tbl_addr = le64_to_cpu(cmd->tbl_addr);
1218     cmd_len = 0x80;
1219     cmd_fis = dma_memory_map(s->as, tbl_addr, &cmd_len,
1220                              DMA_DIRECTION_FROM_DEVICE);
1221     if (!cmd_fis) {
1222         DPRINTF(port, "error: guest passed us an invalid cmd fis\n");
1223         return -1;
1224     } else if (cmd_len != 0x80) {
1225         ahci_trigger_irq(s, &s->dev[port], PORT_IRQ_HBUS_ERR);
1226         DPRINTF(port, "error: dma_memory_map failed: "
1227                 "(len(%02"PRIx64") != 0x80)\n",
1228                 cmd_len);
1229         goto out;
1230     }
1231     debug_print_fis(cmd_fis, 0x80);
1232 
1233     switch (cmd_fis[0]) {
1234         case SATA_FIS_TYPE_REGISTER_H2D:
1235             handle_reg_h2d_fis(s, port, slot, cmd_fis);
1236             break;
1237         default:
1238             DPRINTF(port, "unknown command cmd_fis[0]=%02x cmd_fis[1]=%02x "
1239                           "cmd_fis[2]=%02x\n", cmd_fis[0], cmd_fis[1],
1240                           cmd_fis[2]);
1241             break;
1242     }
1243 
1244 out:
1245     dma_memory_unmap(s->as, cmd_fis, cmd_len, DMA_DIRECTION_FROM_DEVICE,
1246                      cmd_len);
1247 
1248     if (s->dev[port].port.ifs[0].status & (BUSY_STAT|DRQ_STAT)) {
1249         /* async command, complete later */
1250         s->dev[port].busy_slot = slot;
1251         return -1;
1252     }
1253 
1254     /* done handling the command */
1255     return 0;
1256 }
1257 
1258 /* DMA dev <-> ram */
1259 static void ahci_start_transfer(IDEDMA *dma)
1260 {
1261     AHCIDevice *ad = DO_UPCAST(AHCIDevice, dma, dma);
1262     IDEState *s = &ad->port.ifs[0];
1263     uint32_t size = (uint32_t)(s->data_end - s->data_ptr);
1264     /* write == ram -> device */
1265     uint16_t opts = le16_to_cpu(ad->cur_cmd->opts);
1266     int is_write = opts & AHCI_CMD_WRITE;
1267     int is_atapi = opts & AHCI_CMD_ATAPI;
1268     int has_sglist = 0;
1269 
1270     if (is_atapi && !ad->done_atapi_packet) {
1271         /* already prepopulated iobuffer */
1272         ad->done_atapi_packet = true;
1273         size = 0;
1274         goto out;
1275     }
1276 
1277     if (ahci_dma_prepare_buf(dma, size)) {
1278         has_sglist = 1;
1279     }
1280 
1281     DPRINTF(ad->port_no, "%sing %d bytes on %s w/%s sglist\n",
1282             is_write ? "writ" : "read", size, is_atapi ? "atapi" : "ata",
1283             has_sglist ? "" : "o");
1284 
1285     if (has_sglist && size) {
1286         if (is_write) {
1287             dma_buf_write(s->data_ptr, size, &s->sg);
1288         } else {
1289             dma_buf_read(s->data_ptr, size, &s->sg);
1290         }
1291     }
1292 
1293 out:
1294     /* declare that we processed everything */
1295     s->data_ptr = s->data_end;
1296 
1297     /* Update number of transferred bytes, destroy sglist */
1298     dma_buf_commit(s, size);
1299 
1300     s->end_transfer_func(s);
1301 
1302     if (!(s->status & DRQ_STAT)) {
1303         /* done with PIO send/receive */
1304         ahci_write_fis_pio(ad, le32_to_cpu(ad->cur_cmd->status));
1305     }
1306 }
1307 
1308 static void ahci_start_dma(IDEDMA *dma, IDEState *s,
1309                            BlockCompletionFunc *dma_cb)
1310 {
1311     AHCIDevice *ad = DO_UPCAST(AHCIDevice, dma, dma);
1312     DPRINTF(ad->port_no, "\n");
1313     s->io_buffer_offset = 0;
1314     dma_cb(s, 0);
1315 }
1316 
1317 static void ahci_restart_dma(IDEDMA *dma)
1318 {
1319     /* Nothing to do, ahci_start_dma already resets s->io_buffer_offset.  */
1320 }
1321 
1322 /**
1323  * IDE/PIO restarts are handled by the core layer, but NCQ commands
1324  * need an extra kick from the AHCI HBA.
1325  */
1326 static void ahci_restart(IDEDMA *dma)
1327 {
1328     AHCIDevice *ad = DO_UPCAST(AHCIDevice, dma, dma);
1329     int i;
1330 
1331     for (i = 0; i < AHCI_MAX_CMDS; i++) {
1332         NCQTransferState *ncq_tfs = &ad->ncq_tfs[i];
1333         if (ncq_tfs->halt) {
1334             execute_ncq_command(ncq_tfs);
1335         }
1336     }
1337 }
1338 
1339 /**
1340  * Called in DMA and PIO R/W chains to read the PRDT.
1341  * Not shared with NCQ pathways.
1342  */
1343 static int32_t ahci_dma_prepare_buf(IDEDMA *dma, int32_t limit)
1344 {
1345     AHCIDevice *ad = DO_UPCAST(AHCIDevice, dma, dma);
1346     IDEState *s = &ad->port.ifs[0];
1347 
1348     if (ahci_populate_sglist(ad, &s->sg, ad->cur_cmd,
1349                              limit, s->io_buffer_offset) == -1) {
1350         DPRINTF(ad->port_no, "ahci_dma_prepare_buf failed.\n");
1351         return -1;
1352     }
1353     s->io_buffer_size = s->sg.size;
1354 
1355     DPRINTF(ad->port_no, "len=%#x\n", s->io_buffer_size);
1356     return s->io_buffer_size;
1357 }
1358 
1359 /**
1360  * Updates the command header with a bytes-read value.
1361  * Called via dma_buf_commit, for both DMA and PIO paths.
1362  * sglist destruction is handled within dma_buf_commit.
1363  */
1364 static void ahci_commit_buf(IDEDMA *dma, uint32_t tx_bytes)
1365 {
1366     AHCIDevice *ad = DO_UPCAST(AHCIDevice, dma, dma);
1367 
1368     tx_bytes += le32_to_cpu(ad->cur_cmd->status);
1369     ad->cur_cmd->status = cpu_to_le32(tx_bytes);
1370 }
1371 
1372 static int ahci_dma_rw_buf(IDEDMA *dma, int is_write)
1373 {
1374     AHCIDevice *ad = DO_UPCAST(AHCIDevice, dma, dma);
1375     IDEState *s = &ad->port.ifs[0];
1376     uint8_t *p = s->io_buffer + s->io_buffer_index;
1377     int l = s->io_buffer_size - s->io_buffer_index;
1378 
1379     if (ahci_populate_sglist(ad, &s->sg, ad->cur_cmd, l, s->io_buffer_offset)) {
1380         return 0;
1381     }
1382 
1383     if (is_write) {
1384         dma_buf_read(p, l, &s->sg);
1385     } else {
1386         dma_buf_write(p, l, &s->sg);
1387     }
1388 
1389     /* free sglist, update byte count */
1390     dma_buf_commit(s, l);
1391 
1392     s->io_buffer_index += l;
1393 
1394     DPRINTF(ad->port_no, "len=%#x\n", l);
1395 
1396     return 1;
1397 }
1398 
1399 static void ahci_cmd_done(IDEDMA *dma)
1400 {
1401     AHCIDevice *ad = DO_UPCAST(AHCIDevice, dma, dma);
1402 
1403     DPRINTF(ad->port_no, "cmd done\n");
1404 
1405     /* update d2h status */
1406     ahci_write_fis_d2h(ad);
1407 
1408     if (!ad->check_bh) {
1409         /* maybe we still have something to process, check later */
1410         ad->check_bh = qemu_bh_new(ahci_check_cmd_bh, ad);
1411         qemu_bh_schedule(ad->check_bh);
1412     }
1413 }
1414 
1415 static void ahci_irq_set(void *opaque, int n, int level)
1416 {
1417 }
1418 
1419 static const IDEDMAOps ahci_dma_ops = {
1420     .start_dma = ahci_start_dma,
1421     .restart = ahci_restart,
1422     .restart_dma = ahci_restart_dma,
1423     .start_transfer = ahci_start_transfer,
1424     .prepare_buf = ahci_dma_prepare_buf,
1425     .commit_buf = ahci_commit_buf,
1426     .rw_buf = ahci_dma_rw_buf,
1427     .cmd_done = ahci_cmd_done,
1428 };
1429 
1430 void ahci_init(AHCIState *s, DeviceState *qdev, AddressSpace *as, int ports)
1431 {
1432     qemu_irq *irqs;
1433     int i;
1434 
1435     s->as = as;
1436     s->ports = ports;
1437     s->dev = g_new0(AHCIDevice, ports);
1438     s->container = qdev;
1439     ahci_reg_init(s);
1440     /* XXX BAR size should be 1k, but that breaks, so bump it to 4k for now */
1441     memory_region_init_io(&s->mem, OBJECT(qdev), &ahci_mem_ops, s,
1442                           "ahci", AHCI_MEM_BAR_SIZE);
1443     memory_region_init_io(&s->idp, OBJECT(qdev), &ahci_idp_ops, s,
1444                           "ahci-idp", 32);
1445 
1446     irqs = qemu_allocate_irqs(ahci_irq_set, s, s->ports);
1447 
1448     for (i = 0; i < s->ports; i++) {
1449         AHCIDevice *ad = &s->dev[i];
1450 
1451         ide_bus_new(&ad->port, sizeof(ad->port), qdev, i, 1);
1452         ide_init2(&ad->port, irqs[i]);
1453 
1454         ad->hba = s;
1455         ad->port_no = i;
1456         ad->port.dma = &ad->dma;
1457         ad->port.dma->ops = &ahci_dma_ops;
1458         ide_register_restart_cb(&ad->port);
1459     }
1460 }
1461 
1462 void ahci_uninit(AHCIState *s)
1463 {
1464     g_free(s->dev);
1465 }
1466 
1467 void ahci_reset(AHCIState *s)
1468 {
1469     AHCIPortRegs *pr;
1470     int i;
1471 
1472     s->control_regs.irqstatus = 0;
1473     /* AHCI Enable (AE)
1474      * The implementation of this bit is dependent upon the value of the
1475      * CAP.SAM bit. If CAP.SAM is '0', then GHC.AE shall be read-write and
1476      * shall have a reset value of '0'. If CAP.SAM is '1', then AE shall be
1477      * read-only and shall have a reset value of '1'.
1478      *
1479      * We set HOST_CAP_AHCI so we must enable AHCI at reset.
1480      */
1481     s->control_regs.ghc = HOST_CTL_AHCI_EN;
1482 
1483     for (i = 0; i < s->ports; i++) {
1484         pr = &s->dev[i].port_regs;
1485         pr->irq_stat = 0;
1486         pr->irq_mask = 0;
1487         pr->scr_ctl = 0;
1488         pr->cmd = PORT_CMD_SPIN_UP | PORT_CMD_POWER_ON;
1489         ahci_reset_port(s, i);
1490     }
1491 }
1492 
1493 static const VMStateDescription vmstate_ncq_tfs = {
1494     .name = "ncq state",
1495     .version_id = 1,
1496     .fields = (VMStateField[]) {
1497         VMSTATE_UINT32(sector_count, NCQTransferState),
1498         VMSTATE_UINT64(lba, NCQTransferState),
1499         VMSTATE_UINT8(tag, NCQTransferState),
1500         VMSTATE_UINT8(cmd, NCQTransferState),
1501         VMSTATE_UINT8(slot, NCQTransferState),
1502         VMSTATE_BOOL(used, NCQTransferState),
1503         VMSTATE_BOOL(halt, NCQTransferState),
1504         VMSTATE_END_OF_LIST()
1505     },
1506 };
1507 
1508 static const VMStateDescription vmstate_ahci_device = {
1509     .name = "ahci port",
1510     .version_id = 1,
1511     .fields = (VMStateField[]) {
1512         VMSTATE_IDE_BUS(port, AHCIDevice),
1513         VMSTATE_IDE_DRIVE(port.ifs[0], AHCIDevice),
1514         VMSTATE_UINT32(port_state, AHCIDevice),
1515         VMSTATE_UINT32(finished, AHCIDevice),
1516         VMSTATE_UINT32(port_regs.lst_addr, AHCIDevice),
1517         VMSTATE_UINT32(port_regs.lst_addr_hi, AHCIDevice),
1518         VMSTATE_UINT32(port_regs.fis_addr, AHCIDevice),
1519         VMSTATE_UINT32(port_regs.fis_addr_hi, AHCIDevice),
1520         VMSTATE_UINT32(port_regs.irq_stat, AHCIDevice),
1521         VMSTATE_UINT32(port_regs.irq_mask, AHCIDevice),
1522         VMSTATE_UINT32(port_regs.cmd, AHCIDevice),
1523         VMSTATE_UINT32(port_regs.tfdata, AHCIDevice),
1524         VMSTATE_UINT32(port_regs.sig, AHCIDevice),
1525         VMSTATE_UINT32(port_regs.scr_stat, AHCIDevice),
1526         VMSTATE_UINT32(port_regs.scr_ctl, AHCIDevice),
1527         VMSTATE_UINT32(port_regs.scr_err, AHCIDevice),
1528         VMSTATE_UINT32(port_regs.scr_act, AHCIDevice),
1529         VMSTATE_UINT32(port_regs.cmd_issue, AHCIDevice),
1530         VMSTATE_BOOL(done_atapi_packet, AHCIDevice),
1531         VMSTATE_INT32(busy_slot, AHCIDevice),
1532         VMSTATE_BOOL(init_d2h_sent, AHCIDevice),
1533         VMSTATE_STRUCT_ARRAY(ncq_tfs, AHCIDevice, AHCI_MAX_CMDS,
1534                              1, vmstate_ncq_tfs, NCQTransferState),
1535         VMSTATE_END_OF_LIST()
1536     },
1537 };
1538 
1539 static int ahci_state_post_load(void *opaque, int version_id)
1540 {
1541     int i, j;
1542     struct AHCIDevice *ad;
1543     NCQTransferState *ncq_tfs;
1544     AHCIState *s = opaque;
1545 
1546     for (i = 0; i < s->ports; i++) {
1547         ad = &s->dev[i];
1548 
1549         /* Only remap the CLB address if appropriate, disallowing a state
1550          * transition from 'on' to 'off' it should be consistent here. */
1551         if (ahci_cond_start_engines(ad, false) != 0) {
1552             return -1;
1553         }
1554 
1555         for (j = 0; j < AHCI_MAX_CMDS; j++) {
1556             ncq_tfs = &ad->ncq_tfs[j];
1557             ncq_tfs->drive = ad;
1558 
1559             if (ncq_tfs->used != ncq_tfs->halt) {
1560                 return -1;
1561             }
1562             if (!ncq_tfs->halt) {
1563                 continue;
1564             }
1565             if (!is_ncq(ncq_tfs->cmd)) {
1566                 return -1;
1567             }
1568             if (ncq_tfs->slot != ncq_tfs->tag) {
1569                 return -1;
1570             }
1571             /* If ncq_tfs->halt is justly set, the engine should be engaged,
1572              * and the command list buffer should be mapped. */
1573             ncq_tfs->cmdh = get_cmd_header(s, i, ncq_tfs->slot);
1574             if (!ncq_tfs->cmdh) {
1575                 return -1;
1576             }
1577             ahci_populate_sglist(ncq_tfs->drive, &ncq_tfs->sglist,
1578                                  ncq_tfs->cmdh, ncq_tfs->sector_count * 512,
1579                                  0);
1580             if (ncq_tfs->sector_count != ncq_tfs->sglist.size >> 9) {
1581                 return -1;
1582             }
1583         }
1584 
1585 
1586         /*
1587          * If an error is present, ad->busy_slot will be valid and not -1.
1588          * In this case, an operation is waiting to resume and will re-check
1589          * for additional AHCI commands to execute upon completion.
1590          *
1591          * In the case where no error was present, busy_slot will be -1,
1592          * and we should check to see if there are additional commands waiting.
1593          */
1594         if (ad->busy_slot == -1) {
1595             check_cmd(s, i);
1596         } else {
1597             /* We are in the middle of a command, and may need to access
1598              * the command header in guest memory again. */
1599             if (ad->busy_slot < 0 || ad->busy_slot >= AHCI_MAX_CMDS) {
1600                 return -1;
1601             }
1602             ad->cur_cmd = get_cmd_header(s, i, ad->busy_slot);
1603         }
1604     }
1605 
1606     return 0;
1607 }
1608 
1609 const VMStateDescription vmstate_ahci = {
1610     .name = "ahci",
1611     .version_id = 1,
1612     .post_load = ahci_state_post_load,
1613     .fields = (VMStateField[]) {
1614         VMSTATE_STRUCT_VARRAY_POINTER_INT32(dev, AHCIState, ports,
1615                                      vmstate_ahci_device, AHCIDevice),
1616         VMSTATE_UINT32(control_regs.cap, AHCIState),
1617         VMSTATE_UINT32(control_regs.ghc, AHCIState),
1618         VMSTATE_UINT32(control_regs.irqstatus, AHCIState),
1619         VMSTATE_UINT32(control_regs.impl, AHCIState),
1620         VMSTATE_UINT32(control_regs.version, AHCIState),
1621         VMSTATE_UINT32(idp_index, AHCIState),
1622         VMSTATE_INT32_EQUAL(ports, AHCIState),
1623         VMSTATE_END_OF_LIST()
1624     },
1625 };
1626 
1627 static const VMStateDescription vmstate_sysbus_ahci = {
1628     .name = "sysbus-ahci",
1629     .fields = (VMStateField[]) {
1630         VMSTATE_AHCI(ahci, SysbusAHCIState),
1631         VMSTATE_END_OF_LIST()
1632     },
1633 };
1634 
1635 static void sysbus_ahci_reset(DeviceState *dev)
1636 {
1637     SysbusAHCIState *s = SYSBUS_AHCI(dev);
1638 
1639     ahci_reset(&s->ahci);
1640 }
1641 
1642 static void sysbus_ahci_realize(DeviceState *dev, Error **errp)
1643 {
1644     SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
1645     SysbusAHCIState *s = SYSBUS_AHCI(dev);
1646 
1647     ahci_init(&s->ahci, dev, &address_space_memory, s->num_ports);
1648 
1649     sysbus_init_mmio(sbd, &s->ahci.mem);
1650     sysbus_init_irq(sbd, &s->ahci.irq);
1651 }
1652 
1653 static Property sysbus_ahci_properties[] = {
1654     DEFINE_PROP_UINT32("num-ports", SysbusAHCIState, num_ports, 1),
1655     DEFINE_PROP_END_OF_LIST(),
1656 };
1657 
1658 static void sysbus_ahci_class_init(ObjectClass *klass, void *data)
1659 {
1660     DeviceClass *dc = DEVICE_CLASS(klass);
1661 
1662     dc->realize = sysbus_ahci_realize;
1663     dc->vmsd = &vmstate_sysbus_ahci;
1664     dc->props = sysbus_ahci_properties;
1665     dc->reset = sysbus_ahci_reset;
1666     set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
1667 }
1668 
1669 static const TypeInfo sysbus_ahci_info = {
1670     .name          = TYPE_SYSBUS_AHCI,
1671     .parent        = TYPE_SYS_BUS_DEVICE,
1672     .instance_size = sizeof(SysbusAHCIState),
1673     .class_init    = sysbus_ahci_class_init,
1674 };
1675 
1676 static void sysbus_ahci_register_types(void)
1677 {
1678     type_register_static(&sysbus_ahci_info);
1679 }
1680 
1681 type_init(sysbus_ahci_register_types)
1682 
1683 void ahci_ide_create_devs(PCIDevice *dev, DriveInfo **hd)
1684 {
1685     AHCIPCIState *d = ICH_AHCI(dev);
1686     AHCIState *ahci = &d->ahci;
1687     int i;
1688 
1689     for (i = 0; i < ahci->ports; i++) {
1690         if (hd[i] == NULL) {
1691             continue;
1692         }
1693         ide_create_drive(&ahci->dev[i].port, 0, hd[i]);
1694     }
1695 
1696 }
1697