xref: /openbmc/qemu/hw/ide/ahci.c (revision bb639f82)
1 /*
2  * QEMU AHCI Emulation
3  *
4  * Copyright (c) 2010 qiaochong@loongson.cn
5  * Copyright (c) 2010 Roland Elek <elek.roland@gmail.com>
6  * Copyright (c) 2010 Sebastian Herbszt <herbszt@gmx.de>
7  * Copyright (c) 2010 Alexander Graf <agraf@suse.de>
8  *
9  * This library is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public
11  * License as published by the Free Software Foundation; either
12  * version 2 of the License, or (at your option) any later version.
13  *
14  * This library is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
21  *
22  */
23 
24 #include <hw/hw.h>
25 #include <hw/pci/msi.h>
26 #include <hw/i386/pc.h>
27 #include <hw/pci/pci.h>
28 
29 #include "qemu/error-report.h"
30 #include "sysemu/block-backend.h"
31 #include "sysemu/dma.h"
32 #include "internal.h"
33 #include <hw/ide/pci.h>
34 #include <hw/ide/ahci.h>
35 
36 #define DEBUG_AHCI 0
37 
38 #define DPRINTF(port, fmt, ...) \
39 do { \
40     if (DEBUG_AHCI) { \
41         fprintf(stderr, "ahci: %s: [%d] ", __func__, port); \
42         fprintf(stderr, fmt, ## __VA_ARGS__); \
43     } \
44 } while (0)
45 
46 static void check_cmd(AHCIState *s, int port);
47 static int handle_cmd(AHCIState *s, int port, uint8_t slot);
48 static void ahci_reset_port(AHCIState *s, int port);
49 static void ahci_write_fis_d2h(AHCIDevice *ad, uint8_t *cmd_fis);
50 static void ahci_init_d2h(AHCIDevice *ad);
51 static int ahci_dma_prepare_buf(IDEDMA *dma, int32_t limit);
52 static void ahci_commit_buf(IDEDMA *dma, uint32_t tx_bytes);
53 static bool ahci_map_clb_address(AHCIDevice *ad);
54 static bool ahci_map_fis_address(AHCIDevice *ad);
55 static void ahci_unmap_clb_address(AHCIDevice *ad);
56 static void ahci_unmap_fis_address(AHCIDevice *ad);
57 
58 
59 static uint32_t  ahci_port_read(AHCIState *s, int port, int offset)
60 {
61     uint32_t val;
62     AHCIPortRegs *pr;
63     pr = &s->dev[port].port_regs;
64 
65     switch (offset) {
66     case PORT_LST_ADDR:
67         val = pr->lst_addr;
68         break;
69     case PORT_LST_ADDR_HI:
70         val = pr->lst_addr_hi;
71         break;
72     case PORT_FIS_ADDR:
73         val = pr->fis_addr;
74         break;
75     case PORT_FIS_ADDR_HI:
76         val = pr->fis_addr_hi;
77         break;
78     case PORT_IRQ_STAT:
79         val = pr->irq_stat;
80         break;
81     case PORT_IRQ_MASK:
82         val = pr->irq_mask;
83         break;
84     case PORT_CMD:
85         val = pr->cmd;
86         break;
87     case PORT_TFDATA:
88         val = pr->tfdata;
89         break;
90     case PORT_SIG:
91         val = pr->sig;
92         break;
93     case PORT_SCR_STAT:
94         if (s->dev[port].port.ifs[0].blk) {
95             val = SATA_SCR_SSTATUS_DET_DEV_PRESENT_PHY_UP |
96                   SATA_SCR_SSTATUS_SPD_GEN1 | SATA_SCR_SSTATUS_IPM_ACTIVE;
97         } else {
98             val = SATA_SCR_SSTATUS_DET_NODEV;
99         }
100         break;
101     case PORT_SCR_CTL:
102         val = pr->scr_ctl;
103         break;
104     case PORT_SCR_ERR:
105         val = pr->scr_err;
106         break;
107     case PORT_SCR_ACT:
108         val = pr->scr_act;
109         break;
110     case PORT_CMD_ISSUE:
111         val = pr->cmd_issue;
112         break;
113     case PORT_RESERVED:
114     default:
115         val = 0;
116     }
117     DPRINTF(port, "offset: 0x%x val: 0x%x\n", offset, val);
118     return val;
119 
120 }
121 
122 static void ahci_irq_raise(AHCIState *s, AHCIDevice *dev)
123 {
124     DeviceState *dev_state = s->container;
125     PCIDevice *pci_dev = (PCIDevice *) object_dynamic_cast(OBJECT(dev_state),
126                                                            TYPE_PCI_DEVICE);
127 
128     DPRINTF(0, "raise irq\n");
129 
130     if (pci_dev && msi_enabled(pci_dev)) {
131         msi_notify(pci_dev, 0);
132     } else {
133         qemu_irq_raise(s->irq);
134     }
135 }
136 
137 static void ahci_irq_lower(AHCIState *s, AHCIDevice *dev)
138 {
139     DeviceState *dev_state = s->container;
140     PCIDevice *pci_dev = (PCIDevice *) object_dynamic_cast(OBJECT(dev_state),
141                                                            TYPE_PCI_DEVICE);
142 
143     DPRINTF(0, "lower irq\n");
144 
145     if (!pci_dev || !msi_enabled(pci_dev)) {
146         qemu_irq_lower(s->irq);
147     }
148 }
149 
150 static void ahci_check_irq(AHCIState *s)
151 {
152     int i;
153 
154     DPRINTF(-1, "check irq %#x\n", s->control_regs.irqstatus);
155 
156     s->control_regs.irqstatus = 0;
157     for (i = 0; i < s->ports; i++) {
158         AHCIPortRegs *pr = &s->dev[i].port_regs;
159         if (pr->irq_stat & pr->irq_mask) {
160             s->control_regs.irqstatus |= (1 << i);
161         }
162     }
163 
164     if (s->control_regs.irqstatus &&
165         (s->control_regs.ghc & HOST_CTL_IRQ_EN)) {
166             ahci_irq_raise(s, NULL);
167     } else {
168         ahci_irq_lower(s, NULL);
169     }
170 }
171 
172 static void ahci_trigger_irq(AHCIState *s, AHCIDevice *d,
173                              int irq_type)
174 {
175     DPRINTF(d->port_no, "trigger irq %#x -> %x\n",
176             irq_type, d->port_regs.irq_mask & irq_type);
177 
178     d->port_regs.irq_stat |= irq_type;
179     ahci_check_irq(s);
180 }
181 
182 static void map_page(AddressSpace *as, uint8_t **ptr, uint64_t addr,
183                      uint32_t wanted)
184 {
185     hwaddr len = wanted;
186 
187     if (*ptr) {
188         dma_memory_unmap(as, *ptr, len, DMA_DIRECTION_FROM_DEVICE, len);
189     }
190 
191     *ptr = dma_memory_map(as, addr, &len, DMA_DIRECTION_FROM_DEVICE);
192     if (len < wanted) {
193         dma_memory_unmap(as, *ptr, len, DMA_DIRECTION_FROM_DEVICE, len);
194         *ptr = NULL;
195     }
196 }
197 
198 /**
199  * Check the cmd register to see if we should start or stop
200  * the DMA or FIS RX engines.
201  *
202  * @ad: Device to engage.
203  * @allow_stop: Allow device to transition from started to stopped?
204  *   'no' is useful for migration post_load, which does not expect a transition.
205  *
206  * @return 0 on success, -1 on error.
207  */
208 static int ahci_cond_start_engines(AHCIDevice *ad, bool allow_stop)
209 {
210     AHCIPortRegs *pr = &ad->port_regs;
211 
212     if (pr->cmd & PORT_CMD_START) {
213         if (ahci_map_clb_address(ad)) {
214             pr->cmd |= PORT_CMD_LIST_ON;
215         } else {
216             error_report("AHCI: Failed to start DMA engine: "
217                          "bad command list buffer address");
218             return -1;
219         }
220     } else if (pr->cmd & PORT_CMD_LIST_ON) {
221         if (allow_stop) {
222             ahci_unmap_clb_address(ad);
223             pr->cmd = pr->cmd & ~(PORT_CMD_LIST_ON);
224         } else {
225             error_report("AHCI: DMA engine should be off, "
226                          "but appears to still be running");
227             return -1;
228         }
229     }
230 
231     if (pr->cmd & PORT_CMD_FIS_RX) {
232         if (ahci_map_fis_address(ad)) {
233             pr->cmd |= PORT_CMD_FIS_ON;
234         } else {
235             error_report("AHCI: Failed to start FIS receive engine: "
236                          "bad FIS receive buffer address");
237             return -1;
238         }
239     } else if (pr->cmd & PORT_CMD_FIS_ON) {
240         if (allow_stop) {
241             ahci_unmap_fis_address(ad);
242             pr->cmd = pr->cmd & ~(PORT_CMD_FIS_ON);
243         } else {
244             error_report("AHCI: FIS receive engine should be off, "
245                          "but appears to still be running");
246             return -1;
247         }
248     }
249 
250     return 0;
251 }
252 
253 static void  ahci_port_write(AHCIState *s, int port, int offset, uint32_t val)
254 {
255     AHCIPortRegs *pr = &s->dev[port].port_regs;
256 
257     DPRINTF(port, "offset: 0x%x val: 0x%x\n", offset, val);
258     switch (offset) {
259         case PORT_LST_ADDR:
260             pr->lst_addr = val;
261             break;
262         case PORT_LST_ADDR_HI:
263             pr->lst_addr_hi = val;
264             break;
265         case PORT_FIS_ADDR:
266             pr->fis_addr = val;
267             break;
268         case PORT_FIS_ADDR_HI:
269             pr->fis_addr_hi = val;
270             break;
271         case PORT_IRQ_STAT:
272             pr->irq_stat &= ~val;
273             ahci_check_irq(s);
274             break;
275         case PORT_IRQ_MASK:
276             pr->irq_mask = val & 0xfdc000ff;
277             ahci_check_irq(s);
278             break;
279         case PORT_CMD:
280             /* Block any Read-only fields from being set;
281              * including LIST_ON and FIS_ON.
282              * The spec requires to set ICC bits to zero after the ICC change
283              * is done. We don't support ICC state changes, therefore always
284              * force the ICC bits to zero.
285              */
286             pr->cmd = (pr->cmd & PORT_CMD_RO_MASK) |
287                       (val & ~(PORT_CMD_RO_MASK|PORT_CMD_ICC_MASK));
288 
289             /* Check FIS RX and CLB engines, allow transition to false: */
290             ahci_cond_start_engines(&s->dev[port], true);
291 
292             /* XXX usually the FIS would be pending on the bus here and
293                    issuing deferred until the OS enables FIS receival.
294                    Instead, we only submit it once - which works in most
295                    cases, but is a hack. */
296             if ((pr->cmd & PORT_CMD_FIS_ON) &&
297                 !s->dev[port].init_d2h_sent) {
298                 ahci_init_d2h(&s->dev[port]);
299                 s->dev[port].init_d2h_sent = true;
300             }
301 
302             check_cmd(s, port);
303             break;
304         case PORT_TFDATA:
305             /* Read Only. */
306             break;
307         case PORT_SIG:
308             /* Read Only */
309             break;
310         case PORT_SCR_STAT:
311             /* Read Only */
312             break;
313         case PORT_SCR_CTL:
314             if (((pr->scr_ctl & AHCI_SCR_SCTL_DET) == 1) &&
315                 ((val & AHCI_SCR_SCTL_DET) == 0)) {
316                 ahci_reset_port(s, port);
317             }
318             pr->scr_ctl = val;
319             break;
320         case PORT_SCR_ERR:
321             pr->scr_err &= ~val;
322             break;
323         case PORT_SCR_ACT:
324             /* RW1 */
325             pr->scr_act |= val;
326             break;
327         case PORT_CMD_ISSUE:
328             pr->cmd_issue |= val;
329             check_cmd(s, port);
330             break;
331         default:
332             break;
333     }
334 }
335 
336 static uint64_t ahci_mem_read_32(void *opaque, hwaddr addr)
337 {
338     AHCIState *s = opaque;
339     uint32_t val = 0;
340 
341     if (addr < AHCI_GENERIC_HOST_CONTROL_REGS_MAX_ADDR) {
342         switch (addr) {
343         case HOST_CAP:
344             val = s->control_regs.cap;
345             break;
346         case HOST_CTL:
347             val = s->control_regs.ghc;
348             break;
349         case HOST_IRQ_STAT:
350             val = s->control_regs.irqstatus;
351             break;
352         case HOST_PORTS_IMPL:
353             val = s->control_regs.impl;
354             break;
355         case HOST_VERSION:
356             val = s->control_regs.version;
357             break;
358         }
359 
360         DPRINTF(-1, "(addr 0x%08X), val 0x%08X\n", (unsigned) addr, val);
361     } else if ((addr >= AHCI_PORT_REGS_START_ADDR) &&
362                (addr < (AHCI_PORT_REGS_START_ADDR +
363                 (s->ports * AHCI_PORT_ADDR_OFFSET_LEN)))) {
364         val = ahci_port_read(s, (addr - AHCI_PORT_REGS_START_ADDR) >> 7,
365                              addr & AHCI_PORT_ADDR_OFFSET_MASK);
366     }
367 
368     return val;
369 }
370 
371 
372 /**
373  * AHCI 1.3 section 3 ("HBA Memory Registers")
374  * Support unaligned 8/16/32 bit reads, and 64 bit aligned reads.
375  * Caller is responsible for masking unwanted higher order bytes.
376  */
377 static uint64_t ahci_mem_read(void *opaque, hwaddr addr, unsigned size)
378 {
379     hwaddr aligned = addr & ~0x3;
380     int ofst = addr - aligned;
381     uint64_t lo = ahci_mem_read_32(opaque, aligned);
382     uint64_t hi;
383 
384     /* if < 8 byte read does not cross 4 byte boundary */
385     if (ofst + size <= 4) {
386         return lo >> (ofst * 8);
387     }
388     g_assert_cmpint(size, >, 1);
389 
390     /* If the 64bit read is unaligned, we will produce undefined
391      * results. AHCI does not support unaligned 64bit reads. */
392     hi = ahci_mem_read_32(opaque, aligned + 4);
393     return (hi << 32 | lo) >> (ofst * 8);
394 }
395 
396 
397 static void ahci_mem_write(void *opaque, hwaddr addr,
398                            uint64_t val, unsigned size)
399 {
400     AHCIState *s = opaque;
401 
402     /* Only aligned reads are allowed on AHCI */
403     if (addr & 3) {
404         fprintf(stderr, "ahci: Mis-aligned write to addr 0x"
405                 TARGET_FMT_plx "\n", addr);
406         return;
407     }
408 
409     if (addr < AHCI_GENERIC_HOST_CONTROL_REGS_MAX_ADDR) {
410         DPRINTF(-1, "(addr 0x%08X), val 0x%08"PRIX64"\n", (unsigned) addr, val);
411 
412         switch (addr) {
413             case HOST_CAP: /* R/WO, RO */
414                 /* FIXME handle R/WO */
415                 break;
416             case HOST_CTL: /* R/W */
417                 if (val & HOST_CTL_RESET) {
418                     DPRINTF(-1, "HBA Reset\n");
419                     ahci_reset(s);
420                 } else {
421                     s->control_regs.ghc = (val & 0x3) | HOST_CTL_AHCI_EN;
422                     ahci_check_irq(s);
423                 }
424                 break;
425             case HOST_IRQ_STAT: /* R/WC, RO */
426                 s->control_regs.irqstatus &= ~val;
427                 ahci_check_irq(s);
428                 break;
429             case HOST_PORTS_IMPL: /* R/WO, RO */
430                 /* FIXME handle R/WO */
431                 break;
432             case HOST_VERSION: /* RO */
433                 /* FIXME report write? */
434                 break;
435             default:
436                 DPRINTF(-1, "write to unknown register 0x%x\n", (unsigned)addr);
437         }
438     } else if ((addr >= AHCI_PORT_REGS_START_ADDR) &&
439                (addr < (AHCI_PORT_REGS_START_ADDR +
440                 (s->ports * AHCI_PORT_ADDR_OFFSET_LEN)))) {
441         ahci_port_write(s, (addr - AHCI_PORT_REGS_START_ADDR) >> 7,
442                         addr & AHCI_PORT_ADDR_OFFSET_MASK, val);
443     }
444 
445 }
446 
447 static const MemoryRegionOps ahci_mem_ops = {
448     .read = ahci_mem_read,
449     .write = ahci_mem_write,
450     .endianness = DEVICE_LITTLE_ENDIAN,
451 };
452 
453 static uint64_t ahci_idp_read(void *opaque, hwaddr addr,
454                               unsigned size)
455 {
456     AHCIState *s = opaque;
457 
458     if (addr == s->idp_offset) {
459         /* index register */
460         return s->idp_index;
461     } else if (addr == s->idp_offset + 4) {
462         /* data register - do memory read at location selected by index */
463         return ahci_mem_read(opaque, s->idp_index, size);
464     } else {
465         return 0;
466     }
467 }
468 
469 static void ahci_idp_write(void *opaque, hwaddr addr,
470                            uint64_t val, unsigned size)
471 {
472     AHCIState *s = opaque;
473 
474     if (addr == s->idp_offset) {
475         /* index register - mask off reserved bits */
476         s->idp_index = (uint32_t)val & ((AHCI_MEM_BAR_SIZE - 1) & ~3);
477     } else if (addr == s->idp_offset + 4) {
478         /* data register - do memory write at location selected by index */
479         ahci_mem_write(opaque, s->idp_index, val, size);
480     }
481 }
482 
483 static const MemoryRegionOps ahci_idp_ops = {
484     .read = ahci_idp_read,
485     .write = ahci_idp_write,
486     .endianness = DEVICE_LITTLE_ENDIAN,
487 };
488 
489 
490 static void ahci_reg_init(AHCIState *s)
491 {
492     int i;
493 
494     s->control_regs.cap = (s->ports - 1) |
495                           (AHCI_NUM_COMMAND_SLOTS << 8) |
496                           (AHCI_SUPPORTED_SPEED_GEN1 << AHCI_SUPPORTED_SPEED) |
497                           HOST_CAP_NCQ | HOST_CAP_AHCI;
498 
499     s->control_regs.impl = (1 << s->ports) - 1;
500 
501     s->control_regs.version = AHCI_VERSION_1_0;
502 
503     for (i = 0; i < s->ports; i++) {
504         s->dev[i].port_state = STATE_RUN;
505     }
506 }
507 
508 static void check_cmd(AHCIState *s, int port)
509 {
510     AHCIPortRegs *pr = &s->dev[port].port_regs;
511     uint8_t slot;
512 
513     if ((pr->cmd & PORT_CMD_START) && pr->cmd_issue) {
514         for (slot = 0; (slot < 32) && pr->cmd_issue; slot++) {
515             if ((pr->cmd_issue & (1U << slot)) &&
516                 !handle_cmd(s, port, slot)) {
517                 pr->cmd_issue &= ~(1U << slot);
518             }
519         }
520     }
521 }
522 
523 static void ahci_check_cmd_bh(void *opaque)
524 {
525     AHCIDevice *ad = opaque;
526 
527     qemu_bh_delete(ad->check_bh);
528     ad->check_bh = NULL;
529 
530     if ((ad->busy_slot != -1) &&
531         !(ad->port.ifs[0].status & (BUSY_STAT|DRQ_STAT))) {
532         /* no longer busy */
533         ad->port_regs.cmd_issue &= ~(1 << ad->busy_slot);
534         ad->busy_slot = -1;
535     }
536 
537     check_cmd(ad->hba, ad->port_no);
538 }
539 
540 static void ahci_init_d2h(AHCIDevice *ad)
541 {
542     uint8_t init_fis[20];
543     IDEState *ide_state = &ad->port.ifs[0];
544 
545     memset(init_fis, 0, sizeof(init_fis));
546 
547     init_fis[4] = 1;
548     init_fis[12] = 1;
549 
550     if (ide_state->drive_kind == IDE_CD) {
551         init_fis[5] = ide_state->lcyl;
552         init_fis[6] = ide_state->hcyl;
553     }
554 
555     ahci_write_fis_d2h(ad, init_fis);
556 }
557 
558 static void ahci_reset_port(AHCIState *s, int port)
559 {
560     AHCIDevice *d = &s->dev[port];
561     AHCIPortRegs *pr = &d->port_regs;
562     IDEState *ide_state = &d->port.ifs[0];
563     int i;
564 
565     DPRINTF(port, "reset port\n");
566 
567     ide_bus_reset(&d->port);
568     ide_state->ncq_queues = AHCI_MAX_CMDS;
569 
570     pr->scr_stat = 0;
571     pr->scr_err = 0;
572     pr->scr_act = 0;
573     pr->tfdata = 0x7F;
574     pr->sig = 0xFFFFFFFF;
575     d->busy_slot = -1;
576     d->init_d2h_sent = false;
577 
578     ide_state = &s->dev[port].port.ifs[0];
579     if (!ide_state->blk) {
580         return;
581     }
582 
583     /* reset ncq queue */
584     for (i = 0; i < AHCI_MAX_CMDS; i++) {
585         NCQTransferState *ncq_tfs = &s->dev[port].ncq_tfs[i];
586         ncq_tfs->halt = false;
587         if (!ncq_tfs->used) {
588             continue;
589         }
590 
591         if (ncq_tfs->aiocb) {
592             blk_aio_cancel(ncq_tfs->aiocb);
593             ncq_tfs->aiocb = NULL;
594         }
595 
596         /* Maybe we just finished the request thanks to blk_aio_cancel() */
597         if (!ncq_tfs->used) {
598             continue;
599         }
600 
601         qemu_sglist_destroy(&ncq_tfs->sglist);
602         ncq_tfs->used = 0;
603     }
604 
605     s->dev[port].port_state = STATE_RUN;
606     if (!ide_state->blk) {
607         pr->sig = 0;
608         ide_state->status = SEEK_STAT | WRERR_STAT;
609     } else if (ide_state->drive_kind == IDE_CD) {
610         pr->sig = SATA_SIGNATURE_CDROM;
611         ide_state->lcyl = 0x14;
612         ide_state->hcyl = 0xeb;
613         DPRINTF(port, "set lcyl = %d\n", ide_state->lcyl);
614         ide_state->status = SEEK_STAT | WRERR_STAT | READY_STAT;
615     } else {
616         pr->sig = SATA_SIGNATURE_DISK;
617         ide_state->status = SEEK_STAT | WRERR_STAT;
618     }
619 
620     ide_state->error = 1;
621     ahci_init_d2h(d);
622 }
623 
624 static void debug_print_fis(uint8_t *fis, int cmd_len)
625 {
626 #if DEBUG_AHCI
627     int i;
628 
629     fprintf(stderr, "fis:");
630     for (i = 0; i < cmd_len; i++) {
631         if ((i & 0xf) == 0) {
632             fprintf(stderr, "\n%02x:",i);
633         }
634         fprintf(stderr, "%02x ",fis[i]);
635     }
636     fprintf(stderr, "\n");
637 #endif
638 }
639 
640 static bool ahci_map_fis_address(AHCIDevice *ad)
641 {
642     AHCIPortRegs *pr = &ad->port_regs;
643     map_page(ad->hba->as, &ad->res_fis,
644              ((uint64_t)pr->fis_addr_hi << 32) | pr->fis_addr, 256);
645     return ad->res_fis != NULL;
646 }
647 
648 static void ahci_unmap_fis_address(AHCIDevice *ad)
649 {
650     dma_memory_unmap(ad->hba->as, ad->res_fis, 256,
651                      DMA_DIRECTION_FROM_DEVICE, 256);
652     ad->res_fis = NULL;
653 }
654 
655 static bool ahci_map_clb_address(AHCIDevice *ad)
656 {
657     AHCIPortRegs *pr = &ad->port_regs;
658     ad->cur_cmd = NULL;
659     map_page(ad->hba->as, &ad->lst,
660              ((uint64_t)pr->lst_addr_hi << 32) | pr->lst_addr, 1024);
661     return ad->lst != NULL;
662 }
663 
664 static void ahci_unmap_clb_address(AHCIDevice *ad)
665 {
666     dma_memory_unmap(ad->hba->as, ad->lst, 1024,
667                      DMA_DIRECTION_FROM_DEVICE, 1024);
668     ad->lst = NULL;
669 }
670 
671 static void ahci_write_fis_sdb(AHCIState *s, NCQTransferState *ncq_tfs)
672 {
673     AHCIDevice *ad = ncq_tfs->drive;
674     AHCIPortRegs *pr = &ad->port_regs;
675     IDEState *ide_state;
676     SDBFIS *sdb_fis;
677 
678     if (!ad->res_fis ||
679         !(pr->cmd & PORT_CMD_FIS_RX)) {
680         return;
681     }
682 
683     sdb_fis = (SDBFIS *)&ad->res_fis[RES_FIS_SDBFIS];
684     ide_state = &ad->port.ifs[0];
685 
686     sdb_fis->type = SATA_FIS_TYPE_SDB;
687     /* Interrupt pending & Notification bit */
688     sdb_fis->flags = 0x40; /* Interrupt bit, always 1 for NCQ */
689     sdb_fis->status = ide_state->status & 0x77;
690     sdb_fis->error = ide_state->error;
691     /* update SAct field in SDB_FIS */
692     sdb_fis->payload = cpu_to_le32(ad->finished);
693 
694     /* Update shadow registers (except BSY 0x80 and DRQ 0x08) */
695     pr->tfdata = (ad->port.ifs[0].error << 8) |
696         (ad->port.ifs[0].status & 0x77) |
697         (pr->tfdata & 0x88);
698     pr->scr_act &= ~ad->finished;
699     ad->finished = 0;
700 
701     /* Trigger IRQ if interrupt bit is set (which currently, it always is) */
702     if (sdb_fis->flags & 0x40) {
703         ahci_trigger_irq(s, ad, PORT_IRQ_SDB_FIS);
704     }
705 }
706 
707 static void ahci_write_fis_pio(AHCIDevice *ad, uint16_t len)
708 {
709     AHCIPortRegs *pr = &ad->port_regs;
710     uint8_t *pio_fis;
711     IDEState *s = &ad->port.ifs[0];
712 
713     if (!ad->res_fis || !(pr->cmd & PORT_CMD_FIS_RX)) {
714         return;
715     }
716 
717     pio_fis = &ad->res_fis[RES_FIS_PSFIS];
718 
719     pio_fis[0] = SATA_FIS_TYPE_PIO_SETUP;
720     pio_fis[1] = (ad->hba->control_regs.irqstatus ? (1 << 6) : 0);
721     pio_fis[2] = s->status;
722     pio_fis[3] = s->error;
723 
724     pio_fis[4] = s->sector;
725     pio_fis[5] = s->lcyl;
726     pio_fis[6] = s->hcyl;
727     pio_fis[7] = s->select;
728     pio_fis[8] = s->hob_sector;
729     pio_fis[9] = s->hob_lcyl;
730     pio_fis[10] = s->hob_hcyl;
731     pio_fis[11] = 0;
732     pio_fis[12] = s->nsector & 0xFF;
733     pio_fis[13] = (s->nsector >> 8) & 0xFF;
734     pio_fis[14] = 0;
735     pio_fis[15] = s->status;
736     pio_fis[16] = len & 255;
737     pio_fis[17] = len >> 8;
738     pio_fis[18] = 0;
739     pio_fis[19] = 0;
740 
741     /* Update shadow registers: */
742     pr->tfdata = (ad->port.ifs[0].error << 8) |
743         ad->port.ifs[0].status;
744 
745     if (pio_fis[2] & ERR_STAT) {
746         ahci_trigger_irq(ad->hba, ad, PORT_IRQ_TF_ERR);
747     }
748 
749     ahci_trigger_irq(ad->hba, ad, PORT_IRQ_PIOS_FIS);
750 }
751 
752 static void ahci_write_fis_d2h(AHCIDevice *ad, uint8_t *cmd_fis)
753 {
754     AHCIPortRegs *pr = &ad->port_regs;
755     uint8_t *d2h_fis;
756     int i;
757     IDEState *s = &ad->port.ifs[0];
758 
759     if (!ad->res_fis || !(pr->cmd & PORT_CMD_FIS_RX)) {
760         return;
761     }
762 
763     d2h_fis = &ad->res_fis[RES_FIS_RFIS];
764 
765     d2h_fis[0] = SATA_FIS_TYPE_REGISTER_D2H;
766     d2h_fis[1] = (ad->hba->control_regs.irqstatus ? (1 << 6) : 0);
767     d2h_fis[2] = s->status;
768     d2h_fis[3] = s->error;
769 
770     d2h_fis[4] = s->sector;
771     d2h_fis[5] = s->lcyl;
772     d2h_fis[6] = s->hcyl;
773     d2h_fis[7] = s->select;
774     d2h_fis[8] = s->hob_sector;
775     d2h_fis[9] = s->hob_lcyl;
776     d2h_fis[10] = s->hob_hcyl;
777     d2h_fis[11] = 0;
778     d2h_fis[12] = s->nsector & 0xFF;
779     d2h_fis[13] = (s->nsector >> 8) & 0xFF;
780     for (i = 14; i < 20; i++) {
781         d2h_fis[i] = 0;
782     }
783 
784     /* Update shadow registers: */
785     pr->tfdata = (ad->port.ifs[0].error << 8) |
786         ad->port.ifs[0].status;
787 
788     if (d2h_fis[2] & ERR_STAT) {
789         ahci_trigger_irq(ad->hba, ad, PORT_IRQ_TF_ERR);
790     }
791 
792     ahci_trigger_irq(ad->hba, ad, PORT_IRQ_D2H_REG_FIS);
793 }
794 
795 static int prdt_tbl_entry_size(const AHCI_SG *tbl)
796 {
797     /* flags_size is zero-based */
798     return (le32_to_cpu(tbl->flags_size) & AHCI_PRDT_SIZE_MASK) + 1;
799 }
800 
801 static int ahci_populate_sglist(AHCIDevice *ad, QEMUSGList *sglist,
802                                 AHCICmdHdr *cmd, int64_t limit, int32_t offset)
803 {
804     uint16_t opts = le16_to_cpu(cmd->opts);
805     uint16_t prdtl = le16_to_cpu(cmd->prdtl);
806     uint64_t cfis_addr = le64_to_cpu(cmd->tbl_addr);
807     uint64_t prdt_addr = cfis_addr + 0x80;
808     dma_addr_t prdt_len = (prdtl * sizeof(AHCI_SG));
809     dma_addr_t real_prdt_len = prdt_len;
810     uint8_t *prdt;
811     int i;
812     int r = 0;
813     uint64_t sum = 0;
814     int off_idx = -1;
815     int64_t off_pos = -1;
816     int tbl_entry_size;
817     IDEBus *bus = &ad->port;
818     BusState *qbus = BUS(bus);
819 
820     /*
821      * Note: AHCI PRDT can describe up to 256GiB. SATA/ATA only support
822      * transactions of up to 32MiB as of ATA8-ACS3 rev 1b, assuming a
823      * 512 byte sector size. We limit the PRDT in this implementation to
824      * a reasonably large 2GiB, which can accommodate the maximum transfer
825      * request for sector sizes up to 32K.
826      */
827 
828     if (!prdtl) {
829         DPRINTF(ad->port_no, "no sg list given by guest: 0x%08x\n", opts);
830         return -1;
831     }
832 
833     /* map PRDT */
834     if (!(prdt = dma_memory_map(ad->hba->as, prdt_addr, &prdt_len,
835                                 DMA_DIRECTION_TO_DEVICE))){
836         DPRINTF(ad->port_no, "map failed\n");
837         return -1;
838     }
839 
840     if (prdt_len < real_prdt_len) {
841         DPRINTF(ad->port_no, "mapped less than expected\n");
842         r = -1;
843         goto out;
844     }
845 
846     /* Get entries in the PRDT, init a qemu sglist accordingly */
847     if (prdtl > 0) {
848         AHCI_SG *tbl = (AHCI_SG *)prdt;
849         sum = 0;
850         for (i = 0; i < prdtl; i++) {
851             tbl_entry_size = prdt_tbl_entry_size(&tbl[i]);
852             if (offset < (sum + tbl_entry_size)) {
853                 off_idx = i;
854                 off_pos = offset - sum;
855                 break;
856             }
857             sum += tbl_entry_size;
858         }
859         if ((off_idx == -1) || (off_pos < 0) || (off_pos > tbl_entry_size)) {
860             DPRINTF(ad->port_no, "%s: Incorrect offset! "
861                             "off_idx: %d, off_pos: %"PRId64"\n",
862                             __func__, off_idx, off_pos);
863             r = -1;
864             goto out;
865         }
866 
867         qemu_sglist_init(sglist, qbus->parent, (prdtl - off_idx),
868                          ad->hba->as);
869         qemu_sglist_add(sglist, le64_to_cpu(tbl[off_idx].addr) + off_pos,
870                         MIN(prdt_tbl_entry_size(&tbl[off_idx]) - off_pos,
871                             limit));
872 
873         for (i = off_idx + 1; i < prdtl && sglist->size < limit; i++) {
874             qemu_sglist_add(sglist, le64_to_cpu(tbl[i].addr),
875                             MIN(prdt_tbl_entry_size(&tbl[i]),
876                                 limit - sglist->size));
877             if (sglist->size > INT32_MAX) {
878                 error_report("AHCI Physical Region Descriptor Table describes "
879                              "more than 2 GiB.\n");
880                 qemu_sglist_destroy(sglist);
881                 r = -1;
882                 goto out;
883             }
884         }
885     }
886 
887 out:
888     dma_memory_unmap(ad->hba->as, prdt, prdt_len,
889                      DMA_DIRECTION_TO_DEVICE, prdt_len);
890     return r;
891 }
892 
893 static void ncq_err(NCQTransferState *ncq_tfs)
894 {
895     IDEState *ide_state = &ncq_tfs->drive->port.ifs[0];
896 
897     ide_state->error = ABRT_ERR;
898     ide_state->status = READY_STAT | ERR_STAT;
899     ncq_tfs->drive->port_regs.scr_err |= (1 << ncq_tfs->tag);
900 }
901 
902 static void ncq_finish(NCQTransferState *ncq_tfs)
903 {
904     /* If we didn't error out, set our finished bit. Errored commands
905      * do not get a bit set for the SDB FIS ACT register, nor do they
906      * clear the outstanding bit in scr_act (PxSACT). */
907     if (!(ncq_tfs->drive->port_regs.scr_err & (1 << ncq_tfs->tag))) {
908         ncq_tfs->drive->finished |= (1 << ncq_tfs->tag);
909     }
910 
911     ahci_write_fis_sdb(ncq_tfs->drive->hba, ncq_tfs);
912 
913     DPRINTF(ncq_tfs->drive->port_no, "NCQ transfer tag %d finished\n",
914             ncq_tfs->tag);
915 
916     block_acct_done(blk_get_stats(ncq_tfs->drive->port.ifs[0].blk),
917                     &ncq_tfs->acct);
918     qemu_sglist_destroy(&ncq_tfs->sglist);
919     ncq_tfs->used = 0;
920 }
921 
922 static void ncq_cb(void *opaque, int ret)
923 {
924     NCQTransferState *ncq_tfs = (NCQTransferState *)opaque;
925     IDEState *ide_state = &ncq_tfs->drive->port.ifs[0];
926 
927     if (ret == -ECANCELED) {
928         return;
929     }
930 
931     if (ret < 0) {
932         bool is_read = ncq_tfs->cmd == READ_FPDMA_QUEUED;
933         BlockErrorAction action = blk_get_error_action(ide_state->blk,
934                                                        is_read, -ret);
935         if (action == BLOCK_ERROR_ACTION_STOP) {
936             ncq_tfs->halt = true;
937             ide_state->bus->error_status = IDE_RETRY_HBA;
938         } else if (action == BLOCK_ERROR_ACTION_REPORT) {
939             ncq_err(ncq_tfs);
940         }
941         blk_error_action(ide_state->blk, action, is_read, -ret);
942     } else {
943         ide_state->status = READY_STAT | SEEK_STAT;
944     }
945 
946     if (!ncq_tfs->halt) {
947         ncq_finish(ncq_tfs);
948     }
949 }
950 
951 static int is_ncq(uint8_t ata_cmd)
952 {
953     /* Based on SATA 3.2 section 13.6.3.2 */
954     switch (ata_cmd) {
955     case READ_FPDMA_QUEUED:
956     case WRITE_FPDMA_QUEUED:
957     case NCQ_NON_DATA:
958     case RECEIVE_FPDMA_QUEUED:
959     case SEND_FPDMA_QUEUED:
960         return 1;
961     default:
962         return 0;
963     }
964 }
965 
966 static void execute_ncq_command(NCQTransferState *ncq_tfs)
967 {
968     AHCIDevice *ad = ncq_tfs->drive;
969     IDEState *ide_state = &ad->port.ifs[0];
970     int port = ad->port_no;
971 
972     g_assert(is_ncq(ncq_tfs->cmd));
973     ncq_tfs->halt = false;
974 
975     switch (ncq_tfs->cmd) {
976     case READ_FPDMA_QUEUED:
977         DPRINTF(port, "NCQ reading %d sectors from LBA %"PRId64", tag %d\n",
978                 ncq_tfs->sector_count, ncq_tfs->lba, ncq_tfs->tag);
979 
980         DPRINTF(port, "tag %d aio read %"PRId64"\n",
981                 ncq_tfs->tag, ncq_tfs->lba);
982 
983         dma_acct_start(ide_state->blk, &ncq_tfs->acct,
984                        &ncq_tfs->sglist, BLOCK_ACCT_READ);
985         ncq_tfs->aiocb = dma_blk_read(ide_state->blk, &ncq_tfs->sglist,
986                                       ncq_tfs->lba, ncq_cb, ncq_tfs);
987         break;
988     case WRITE_FPDMA_QUEUED:
989         DPRINTF(port, "NCQ writing %d sectors to LBA %"PRId64", tag %d\n",
990                 ncq_tfs->sector_count, ncq_tfs->lba, ncq_tfs->tag);
991 
992         DPRINTF(port, "tag %d aio write %"PRId64"\n",
993                 ncq_tfs->tag, ncq_tfs->lba);
994 
995         dma_acct_start(ide_state->blk, &ncq_tfs->acct,
996                        &ncq_tfs->sglist, BLOCK_ACCT_WRITE);
997         ncq_tfs->aiocb = dma_blk_write(ide_state->blk, &ncq_tfs->sglist,
998                                        ncq_tfs->lba, ncq_cb, ncq_tfs);
999         break;
1000     default:
1001         DPRINTF(port, "error: unsupported NCQ command (0x%02x) received\n",
1002                 ncq_tfs->cmd);
1003         qemu_sglist_destroy(&ncq_tfs->sglist);
1004         ncq_err(ncq_tfs);
1005     }
1006 }
1007 
1008 
1009 static void process_ncq_command(AHCIState *s, int port, uint8_t *cmd_fis,
1010                                 uint8_t slot)
1011 {
1012     AHCIDevice *ad = &s->dev[port];
1013     IDEState *ide_state = &ad->port.ifs[0];
1014     NCQFrame *ncq_fis = (NCQFrame*)cmd_fis;
1015     uint8_t tag = ncq_fis->tag >> 3;
1016     NCQTransferState *ncq_tfs = &ad->ncq_tfs[tag];
1017     size_t size;
1018 
1019     g_assert(is_ncq(ncq_fis->command));
1020     if (ncq_tfs->used) {
1021         /* error - already in use */
1022         fprintf(stderr, "%s: tag %d already used\n", __FUNCTION__, tag);
1023         return;
1024     }
1025 
1026     ncq_tfs->used = 1;
1027     ncq_tfs->drive = ad;
1028     ncq_tfs->slot = slot;
1029     ncq_tfs->cmdh = &((AHCICmdHdr *)ad->lst)[slot];
1030     ncq_tfs->cmd = ncq_fis->command;
1031     ncq_tfs->lba = ((uint64_t)ncq_fis->lba5 << 40) |
1032                    ((uint64_t)ncq_fis->lba4 << 32) |
1033                    ((uint64_t)ncq_fis->lba3 << 24) |
1034                    ((uint64_t)ncq_fis->lba2 << 16) |
1035                    ((uint64_t)ncq_fis->lba1 << 8) |
1036                    (uint64_t)ncq_fis->lba0;
1037     ncq_tfs->tag = tag;
1038 
1039     /* Sanity-check the NCQ packet */
1040     if (tag != slot) {
1041         DPRINTF(port, "Warn: NCQ slot (%d) did not match the given tag (%d)\n",
1042                 slot, tag);
1043     }
1044 
1045     if (ncq_fis->aux0 || ncq_fis->aux1 || ncq_fis->aux2 || ncq_fis->aux3) {
1046         DPRINTF(port, "Warn: Attempt to use NCQ auxiliary fields.\n");
1047     }
1048     if (ncq_fis->prio || ncq_fis->icc) {
1049         DPRINTF(port, "Warn: Unsupported attempt to use PRIO/ICC fields\n");
1050     }
1051     if (ncq_fis->fua & NCQ_FIS_FUA_MASK) {
1052         DPRINTF(port, "Warn: Unsupported attempt to use Force Unit Access\n");
1053     }
1054     if (ncq_fis->tag & NCQ_FIS_RARC_MASK) {
1055         DPRINTF(port, "Warn: Unsupported attempt to use Rebuild Assist\n");
1056     }
1057 
1058     ncq_tfs->sector_count = ((ncq_fis->sector_count_high << 8) |
1059                              ncq_fis->sector_count_low);
1060     if (!ncq_tfs->sector_count) {
1061         ncq_tfs->sector_count = 0x10000;
1062     }
1063     size = ncq_tfs->sector_count * 512;
1064     ahci_populate_sglist(ad, &ncq_tfs->sglist, ncq_tfs->cmdh, size, 0);
1065 
1066     if (ncq_tfs->sglist.size < size) {
1067         error_report("ahci: PRDT length for NCQ command (0x%zx) "
1068                      "is smaller than the requested size (0x%zx)",
1069                      ncq_tfs->sglist.size, size);
1070         qemu_sglist_destroy(&ncq_tfs->sglist);
1071         ncq_err(ncq_tfs);
1072         ahci_trigger_irq(ad->hba, ad, PORT_IRQ_OVERFLOW);
1073         return;
1074     } else if (ncq_tfs->sglist.size != size) {
1075         DPRINTF(port, "Warn: PRDTL (0x%zx)"
1076                 " does not match requested size (0x%zx)",
1077                 ncq_tfs->sglist.size, size);
1078     }
1079 
1080     DPRINTF(port, "NCQ transfer LBA from %"PRId64" to %"PRId64", "
1081             "drive max %"PRId64"\n",
1082             ncq_tfs->lba, ncq_tfs->lba + ncq_tfs->sector_count - 1,
1083             ide_state->nb_sectors - 1);
1084 
1085     execute_ncq_command(ncq_tfs);
1086 }
1087 
1088 static AHCICmdHdr *get_cmd_header(AHCIState *s, uint8_t port, uint8_t slot)
1089 {
1090     if (port >= s->ports || slot >= AHCI_MAX_CMDS) {
1091         return NULL;
1092     }
1093 
1094     return s->dev[port].lst ? &((AHCICmdHdr *)s->dev[port].lst)[slot] : NULL;
1095 }
1096 
1097 static void handle_reg_h2d_fis(AHCIState *s, int port,
1098                                uint8_t slot, uint8_t *cmd_fis)
1099 {
1100     IDEState *ide_state = &s->dev[port].port.ifs[0];
1101     AHCICmdHdr *cmd = get_cmd_header(s, port, slot);
1102     uint16_t opts = le16_to_cpu(cmd->opts);
1103 
1104     if (cmd_fis[1] & 0x0F) {
1105         DPRINTF(port, "Port Multiplier not supported."
1106                 " cmd_fis[0]=%02x cmd_fis[1]=%02x cmd_fis[2]=%02x\n",
1107                 cmd_fis[0], cmd_fis[1], cmd_fis[2]);
1108         return;
1109     }
1110 
1111     if (cmd_fis[1] & 0x70) {
1112         DPRINTF(port, "Reserved flags set in H2D Register FIS."
1113                 " cmd_fis[0]=%02x cmd_fis[1]=%02x cmd_fis[2]=%02x\n",
1114                 cmd_fis[0], cmd_fis[1], cmd_fis[2]);
1115         return;
1116     }
1117 
1118     if (!(cmd_fis[1] & SATA_FIS_REG_H2D_UPDATE_COMMAND_REGISTER)) {
1119         switch (s->dev[port].port_state) {
1120         case STATE_RUN:
1121             if (cmd_fis[15] & ATA_SRST) {
1122                 s->dev[port].port_state = STATE_RESET;
1123             }
1124             break;
1125         case STATE_RESET:
1126             if (!(cmd_fis[15] & ATA_SRST)) {
1127                 ahci_reset_port(s, port);
1128             }
1129             break;
1130         }
1131         return;
1132     }
1133 
1134     /* Check for NCQ command */
1135     if (is_ncq(cmd_fis[2])) {
1136         process_ncq_command(s, port, cmd_fis, slot);
1137         return;
1138     }
1139 
1140     /* Decompose the FIS:
1141      * AHCI does not interpret FIS packets, it only forwards them.
1142      * SATA 1.0 describes how to decode LBA28 and CHS FIS packets.
1143      * Later specifications, e.g, SATA 3.2, describe LBA48 FIS packets.
1144      *
1145      * ATA4 describes sector number for LBA28/CHS commands.
1146      * ATA6 describes sector number for LBA48 commands.
1147      * ATA8 deprecates CHS fully, describing only LBA28/48.
1148      *
1149      * We dutifully convert the FIS into IDE registers, and allow the
1150      * core layer to interpret them as needed. */
1151     ide_state->feature = cmd_fis[3];
1152     ide_state->sector = cmd_fis[4];      /* LBA 7:0 */
1153     ide_state->lcyl = cmd_fis[5];        /* LBA 15:8  */
1154     ide_state->hcyl = cmd_fis[6];        /* LBA 23:16 */
1155     ide_state->select = cmd_fis[7];      /* LBA 27:24 (LBA28) */
1156     ide_state->hob_sector = cmd_fis[8];  /* LBA 31:24 */
1157     ide_state->hob_lcyl = cmd_fis[9];    /* LBA 39:32 */
1158     ide_state->hob_hcyl = cmd_fis[10];   /* LBA 47:40 */
1159     ide_state->hob_feature = cmd_fis[11];
1160     ide_state->nsector = (int64_t)((cmd_fis[13] << 8) | cmd_fis[12]);
1161     /* 14, 16, 17, 18, 19: Reserved (SATA 1.0) */
1162     /* 15: Only valid when UPDATE_COMMAND not set. */
1163 
1164     /* Copy the ACMD field (ATAPI packet, if any) from the AHCI command
1165      * table to ide_state->io_buffer */
1166     if (opts & AHCI_CMD_ATAPI) {
1167         memcpy(ide_state->io_buffer, &cmd_fis[AHCI_COMMAND_TABLE_ACMD], 0x10);
1168         debug_print_fis(ide_state->io_buffer, 0x10);
1169         s->dev[port].done_atapi_packet = false;
1170         /* XXX send PIO setup FIS */
1171     }
1172 
1173     ide_state->error = 0;
1174 
1175     /* Reset transferred byte counter */
1176     cmd->status = 0;
1177 
1178     /* We're ready to process the command in FIS byte 2. */
1179     ide_exec_cmd(&s->dev[port].port, cmd_fis[2]);
1180 }
1181 
1182 static int handle_cmd(AHCIState *s, int port, uint8_t slot)
1183 {
1184     IDEState *ide_state;
1185     uint64_t tbl_addr;
1186     AHCICmdHdr *cmd;
1187     uint8_t *cmd_fis;
1188     dma_addr_t cmd_len;
1189 
1190     if (s->dev[port].port.ifs[0].status & (BUSY_STAT|DRQ_STAT)) {
1191         /* Engine currently busy, try again later */
1192         DPRINTF(port, "engine busy\n");
1193         return -1;
1194     }
1195 
1196     if (!s->dev[port].lst) {
1197         DPRINTF(port, "error: lst not given but cmd handled");
1198         return -1;
1199     }
1200     cmd = get_cmd_header(s, port, slot);
1201     /* remember current slot handle for later */
1202     s->dev[port].cur_cmd = cmd;
1203 
1204     /* The device we are working for */
1205     ide_state = &s->dev[port].port.ifs[0];
1206     if (!ide_state->blk) {
1207         DPRINTF(port, "error: guest accessed unused port");
1208         return -1;
1209     }
1210 
1211     tbl_addr = le64_to_cpu(cmd->tbl_addr);
1212     cmd_len = 0x80;
1213     cmd_fis = dma_memory_map(s->as, tbl_addr, &cmd_len,
1214                              DMA_DIRECTION_FROM_DEVICE);
1215     if (!cmd_fis) {
1216         DPRINTF(port, "error: guest passed us an invalid cmd fis\n");
1217         return -1;
1218     } else if (cmd_len != 0x80) {
1219         ahci_trigger_irq(s, &s->dev[port], PORT_IRQ_HBUS_ERR);
1220         DPRINTF(port, "error: dma_memory_map failed: "
1221                 "(len(%02"PRIx64") != 0x80)\n",
1222                 cmd_len);
1223         goto out;
1224     }
1225     debug_print_fis(cmd_fis, 0x80);
1226 
1227     switch (cmd_fis[0]) {
1228         case SATA_FIS_TYPE_REGISTER_H2D:
1229             handle_reg_h2d_fis(s, port, slot, cmd_fis);
1230             break;
1231         default:
1232             DPRINTF(port, "unknown command cmd_fis[0]=%02x cmd_fis[1]=%02x "
1233                           "cmd_fis[2]=%02x\n", cmd_fis[0], cmd_fis[1],
1234                           cmd_fis[2]);
1235             break;
1236     }
1237 
1238 out:
1239     dma_memory_unmap(s->as, cmd_fis, cmd_len, DMA_DIRECTION_FROM_DEVICE,
1240                      cmd_len);
1241 
1242     if (s->dev[port].port.ifs[0].status & (BUSY_STAT|DRQ_STAT)) {
1243         /* async command, complete later */
1244         s->dev[port].busy_slot = slot;
1245         return -1;
1246     }
1247 
1248     /* done handling the command */
1249     return 0;
1250 }
1251 
1252 /* DMA dev <-> ram */
1253 static void ahci_start_transfer(IDEDMA *dma)
1254 {
1255     AHCIDevice *ad = DO_UPCAST(AHCIDevice, dma, dma);
1256     IDEState *s = &ad->port.ifs[0];
1257     uint32_t size = (uint32_t)(s->data_end - s->data_ptr);
1258     /* write == ram -> device */
1259     uint16_t opts = le16_to_cpu(ad->cur_cmd->opts);
1260     int is_write = opts & AHCI_CMD_WRITE;
1261     int is_atapi = opts & AHCI_CMD_ATAPI;
1262     int has_sglist = 0;
1263 
1264     if (is_atapi && !ad->done_atapi_packet) {
1265         /* already prepopulated iobuffer */
1266         ad->done_atapi_packet = true;
1267         size = 0;
1268         goto out;
1269     }
1270 
1271     if (ahci_dma_prepare_buf(dma, size)) {
1272         has_sglist = 1;
1273     }
1274 
1275     DPRINTF(ad->port_no, "%sing %d bytes on %s w/%s sglist\n",
1276             is_write ? "writ" : "read", size, is_atapi ? "atapi" : "ata",
1277             has_sglist ? "" : "o");
1278 
1279     if (has_sglist && size) {
1280         if (is_write) {
1281             dma_buf_write(s->data_ptr, size, &s->sg);
1282         } else {
1283             dma_buf_read(s->data_ptr, size, &s->sg);
1284         }
1285     }
1286 
1287 out:
1288     /* declare that we processed everything */
1289     s->data_ptr = s->data_end;
1290 
1291     /* Update number of transferred bytes, destroy sglist */
1292     ahci_commit_buf(dma, size);
1293 
1294     s->end_transfer_func(s);
1295 
1296     if (!(s->status & DRQ_STAT)) {
1297         /* done with PIO send/receive */
1298         ahci_write_fis_pio(ad, le32_to_cpu(ad->cur_cmd->status));
1299     }
1300 }
1301 
1302 static void ahci_start_dma(IDEDMA *dma, IDEState *s,
1303                            BlockCompletionFunc *dma_cb)
1304 {
1305     AHCIDevice *ad = DO_UPCAST(AHCIDevice, dma, dma);
1306     DPRINTF(ad->port_no, "\n");
1307     s->io_buffer_offset = 0;
1308     dma_cb(s, 0);
1309 }
1310 
1311 static void ahci_restart_dma(IDEDMA *dma)
1312 {
1313     /* Nothing to do, ahci_start_dma already resets s->io_buffer_offset.  */
1314 }
1315 
1316 /**
1317  * IDE/PIO restarts are handled by the core layer, but NCQ commands
1318  * need an extra kick from the AHCI HBA.
1319  */
1320 static void ahci_restart(IDEDMA *dma)
1321 {
1322     AHCIDevice *ad = DO_UPCAST(AHCIDevice, dma, dma);
1323     int i;
1324 
1325     for (i = 0; i < AHCI_MAX_CMDS; i++) {
1326         NCQTransferState *ncq_tfs = &ad->ncq_tfs[i];
1327         if (ncq_tfs->halt) {
1328             execute_ncq_command(ncq_tfs);
1329         }
1330     }
1331 }
1332 
1333 /**
1334  * Called in DMA R/W chains to read the PRDT, utilizing ahci_populate_sglist.
1335  * Not currently invoked by PIO R/W chains,
1336  * which invoke ahci_populate_sglist via ahci_start_transfer.
1337  */
1338 static int32_t ahci_dma_prepare_buf(IDEDMA *dma, int32_t limit)
1339 {
1340     AHCIDevice *ad = DO_UPCAST(AHCIDevice, dma, dma);
1341     IDEState *s = &ad->port.ifs[0];
1342 
1343     if (ahci_populate_sglist(ad, &s->sg, ad->cur_cmd,
1344                              limit, s->io_buffer_offset) == -1) {
1345         DPRINTF(ad->port_no, "ahci_dma_prepare_buf failed.\n");
1346         return -1;
1347     }
1348     s->io_buffer_size = s->sg.size;
1349 
1350     DPRINTF(ad->port_no, "len=%#x\n", s->io_buffer_size);
1351     return s->io_buffer_size;
1352 }
1353 
1354 /**
1355  * Destroys the scatter-gather list,
1356  * and updates the command header with a bytes-read value.
1357  * called explicitly via ahci_dma_rw_buf (ATAPI DMA),
1358  * and ahci_start_transfer (PIO R/W),
1359  * and called via callback from ide_dma_cb for DMA R/W paths.
1360  */
1361 static void ahci_commit_buf(IDEDMA *dma, uint32_t tx_bytes)
1362 {
1363     AHCIDevice *ad = DO_UPCAST(AHCIDevice, dma, dma);
1364     IDEState *s = &ad->port.ifs[0];
1365 
1366     tx_bytes += le32_to_cpu(ad->cur_cmd->status);
1367     ad->cur_cmd->status = cpu_to_le32(tx_bytes);
1368 
1369     qemu_sglist_destroy(&s->sg);
1370 }
1371 
1372 static int ahci_dma_rw_buf(IDEDMA *dma, int is_write)
1373 {
1374     AHCIDevice *ad = DO_UPCAST(AHCIDevice, dma, dma);
1375     IDEState *s = &ad->port.ifs[0];
1376     uint8_t *p = s->io_buffer + s->io_buffer_index;
1377     int l = s->io_buffer_size - s->io_buffer_index;
1378 
1379     if (ahci_populate_sglist(ad, &s->sg, ad->cur_cmd, l, s->io_buffer_offset)) {
1380         return 0;
1381     }
1382 
1383     if (is_write) {
1384         dma_buf_read(p, l, &s->sg);
1385     } else {
1386         dma_buf_write(p, l, &s->sg);
1387     }
1388 
1389     /* free sglist, update byte count */
1390     ahci_commit_buf(dma, l);
1391 
1392     s->io_buffer_index += l;
1393     s->io_buffer_offset += l;
1394 
1395     DPRINTF(ad->port_no, "len=%#x\n", l);
1396 
1397     return 1;
1398 }
1399 
1400 static void ahci_cmd_done(IDEDMA *dma)
1401 {
1402     AHCIDevice *ad = DO_UPCAST(AHCIDevice, dma, dma);
1403 
1404     DPRINTF(ad->port_no, "cmd done\n");
1405 
1406     /* update d2h status */
1407     ahci_write_fis_d2h(ad, NULL);
1408 
1409     if (!ad->check_bh) {
1410         /* maybe we still have something to process, check later */
1411         ad->check_bh = qemu_bh_new(ahci_check_cmd_bh, ad);
1412         qemu_bh_schedule(ad->check_bh);
1413     }
1414 }
1415 
1416 static void ahci_irq_set(void *opaque, int n, int level)
1417 {
1418 }
1419 
1420 static const IDEDMAOps ahci_dma_ops = {
1421     .start_dma = ahci_start_dma,
1422     .restart = ahci_restart,
1423     .restart_dma = ahci_restart_dma,
1424     .start_transfer = ahci_start_transfer,
1425     .prepare_buf = ahci_dma_prepare_buf,
1426     .commit_buf = ahci_commit_buf,
1427     .rw_buf = ahci_dma_rw_buf,
1428     .cmd_done = ahci_cmd_done,
1429 };
1430 
1431 void ahci_init(AHCIState *s, DeviceState *qdev, AddressSpace *as, int ports)
1432 {
1433     qemu_irq *irqs;
1434     int i;
1435 
1436     s->as = as;
1437     s->ports = ports;
1438     s->dev = g_new0(AHCIDevice, ports);
1439     s->container = qdev;
1440     ahci_reg_init(s);
1441     /* XXX BAR size should be 1k, but that breaks, so bump it to 4k for now */
1442     memory_region_init_io(&s->mem, OBJECT(qdev), &ahci_mem_ops, s,
1443                           "ahci", AHCI_MEM_BAR_SIZE);
1444     memory_region_init_io(&s->idp, OBJECT(qdev), &ahci_idp_ops, s,
1445                           "ahci-idp", 32);
1446 
1447     irqs = qemu_allocate_irqs(ahci_irq_set, s, s->ports);
1448 
1449     for (i = 0; i < s->ports; i++) {
1450         AHCIDevice *ad = &s->dev[i];
1451 
1452         ide_bus_new(&ad->port, sizeof(ad->port), qdev, i, 1);
1453         ide_init2(&ad->port, irqs[i]);
1454 
1455         ad->hba = s;
1456         ad->port_no = i;
1457         ad->port.dma = &ad->dma;
1458         ad->port.dma->ops = &ahci_dma_ops;
1459         ide_register_restart_cb(&ad->port);
1460     }
1461 }
1462 
1463 void ahci_uninit(AHCIState *s)
1464 {
1465     g_free(s->dev);
1466 }
1467 
1468 void ahci_reset(AHCIState *s)
1469 {
1470     AHCIPortRegs *pr;
1471     int i;
1472 
1473     s->control_regs.irqstatus = 0;
1474     /* AHCI Enable (AE)
1475      * The implementation of this bit is dependent upon the value of the
1476      * CAP.SAM bit. If CAP.SAM is '0', then GHC.AE shall be read-write and
1477      * shall have a reset value of '0'. If CAP.SAM is '1', then AE shall be
1478      * read-only and shall have a reset value of '1'.
1479      *
1480      * We set HOST_CAP_AHCI so we must enable AHCI at reset.
1481      */
1482     s->control_regs.ghc = HOST_CTL_AHCI_EN;
1483 
1484     for (i = 0; i < s->ports; i++) {
1485         pr = &s->dev[i].port_regs;
1486         pr->irq_stat = 0;
1487         pr->irq_mask = 0;
1488         pr->scr_ctl = 0;
1489         pr->cmd = PORT_CMD_SPIN_UP | PORT_CMD_POWER_ON;
1490         ahci_reset_port(s, i);
1491     }
1492 }
1493 
1494 static const VMStateDescription vmstate_ncq_tfs = {
1495     .name = "ncq state",
1496     .version_id = 1,
1497     .fields = (VMStateField[]) {
1498         VMSTATE_UINT32(sector_count, NCQTransferState),
1499         VMSTATE_UINT64(lba, NCQTransferState),
1500         VMSTATE_UINT8(tag, NCQTransferState),
1501         VMSTATE_UINT8(cmd, NCQTransferState),
1502         VMSTATE_UINT8(slot, NCQTransferState),
1503         VMSTATE_BOOL(used, NCQTransferState),
1504         VMSTATE_BOOL(halt, NCQTransferState),
1505         VMSTATE_END_OF_LIST()
1506     },
1507 };
1508 
1509 static const VMStateDescription vmstate_ahci_device = {
1510     .name = "ahci port",
1511     .version_id = 1,
1512     .fields = (VMStateField[]) {
1513         VMSTATE_IDE_BUS(port, AHCIDevice),
1514         VMSTATE_IDE_DRIVE(port.ifs[0], AHCIDevice),
1515         VMSTATE_UINT32(port_state, AHCIDevice),
1516         VMSTATE_UINT32(finished, AHCIDevice),
1517         VMSTATE_UINT32(port_regs.lst_addr, AHCIDevice),
1518         VMSTATE_UINT32(port_regs.lst_addr_hi, AHCIDevice),
1519         VMSTATE_UINT32(port_regs.fis_addr, AHCIDevice),
1520         VMSTATE_UINT32(port_regs.fis_addr_hi, AHCIDevice),
1521         VMSTATE_UINT32(port_regs.irq_stat, AHCIDevice),
1522         VMSTATE_UINT32(port_regs.irq_mask, AHCIDevice),
1523         VMSTATE_UINT32(port_regs.cmd, AHCIDevice),
1524         VMSTATE_UINT32(port_regs.tfdata, AHCIDevice),
1525         VMSTATE_UINT32(port_regs.sig, AHCIDevice),
1526         VMSTATE_UINT32(port_regs.scr_stat, AHCIDevice),
1527         VMSTATE_UINT32(port_regs.scr_ctl, AHCIDevice),
1528         VMSTATE_UINT32(port_regs.scr_err, AHCIDevice),
1529         VMSTATE_UINT32(port_regs.scr_act, AHCIDevice),
1530         VMSTATE_UINT32(port_regs.cmd_issue, AHCIDevice),
1531         VMSTATE_BOOL(done_atapi_packet, AHCIDevice),
1532         VMSTATE_INT32(busy_slot, AHCIDevice),
1533         VMSTATE_BOOL(init_d2h_sent, AHCIDevice),
1534         VMSTATE_STRUCT_ARRAY(ncq_tfs, AHCIDevice, AHCI_MAX_CMDS,
1535                              1, vmstate_ncq_tfs, NCQTransferState),
1536         VMSTATE_END_OF_LIST()
1537     },
1538 };
1539 
1540 static int ahci_state_post_load(void *opaque, int version_id)
1541 {
1542     int i, j;
1543     struct AHCIDevice *ad;
1544     NCQTransferState *ncq_tfs;
1545     AHCIState *s = opaque;
1546 
1547     for (i = 0; i < s->ports; i++) {
1548         ad = &s->dev[i];
1549 
1550         /* Only remap the CLB address if appropriate, disallowing a state
1551          * transition from 'on' to 'off' it should be consistent here. */
1552         if (ahci_cond_start_engines(ad, false) != 0) {
1553             return -1;
1554         }
1555 
1556         for (j = 0; j < AHCI_MAX_CMDS; j++) {
1557             ncq_tfs = &ad->ncq_tfs[j];
1558             ncq_tfs->drive = ad;
1559 
1560             if (ncq_tfs->used != ncq_tfs->halt) {
1561                 return -1;
1562             }
1563             if (!ncq_tfs->halt) {
1564                 continue;
1565             }
1566             if (!is_ncq(ncq_tfs->cmd)) {
1567                 return -1;
1568             }
1569             if (ncq_tfs->slot != ncq_tfs->tag) {
1570                 return -1;
1571             }
1572             /* If ncq_tfs->halt is justly set, the engine should be engaged,
1573              * and the command list buffer should be mapped. */
1574             ncq_tfs->cmdh = get_cmd_header(s, i, ncq_tfs->slot);
1575             if (!ncq_tfs->cmdh) {
1576                 return -1;
1577             }
1578             ahci_populate_sglist(ncq_tfs->drive, &ncq_tfs->sglist,
1579                                  ncq_tfs->cmdh, ncq_tfs->sector_count * 512,
1580                                  0);
1581             if (ncq_tfs->sector_count != ncq_tfs->sglist.size >> 9) {
1582                 return -1;
1583             }
1584         }
1585 
1586 
1587         /*
1588          * If an error is present, ad->busy_slot will be valid and not -1.
1589          * In this case, an operation is waiting to resume and will re-check
1590          * for additional AHCI commands to execute upon completion.
1591          *
1592          * In the case where no error was present, busy_slot will be -1,
1593          * and we should check to see if there are additional commands waiting.
1594          */
1595         if (ad->busy_slot == -1) {
1596             check_cmd(s, i);
1597         } else {
1598             /* We are in the middle of a command, and may need to access
1599              * the command header in guest memory again. */
1600             if (ad->busy_slot < 0 || ad->busy_slot >= AHCI_MAX_CMDS) {
1601                 return -1;
1602             }
1603             ad->cur_cmd = get_cmd_header(s, i, ad->busy_slot);
1604         }
1605     }
1606 
1607     return 0;
1608 }
1609 
1610 const VMStateDescription vmstate_ahci = {
1611     .name = "ahci",
1612     .version_id = 1,
1613     .post_load = ahci_state_post_load,
1614     .fields = (VMStateField[]) {
1615         VMSTATE_STRUCT_VARRAY_POINTER_INT32(dev, AHCIState, ports,
1616                                      vmstate_ahci_device, AHCIDevice),
1617         VMSTATE_UINT32(control_regs.cap, AHCIState),
1618         VMSTATE_UINT32(control_regs.ghc, AHCIState),
1619         VMSTATE_UINT32(control_regs.irqstatus, AHCIState),
1620         VMSTATE_UINT32(control_regs.impl, AHCIState),
1621         VMSTATE_UINT32(control_regs.version, AHCIState),
1622         VMSTATE_UINT32(idp_index, AHCIState),
1623         VMSTATE_INT32_EQUAL(ports, AHCIState),
1624         VMSTATE_END_OF_LIST()
1625     },
1626 };
1627 
1628 static const VMStateDescription vmstate_sysbus_ahci = {
1629     .name = "sysbus-ahci",
1630     .fields = (VMStateField[]) {
1631         VMSTATE_AHCI(ahci, SysbusAHCIState),
1632         VMSTATE_END_OF_LIST()
1633     },
1634 };
1635 
1636 static void sysbus_ahci_reset(DeviceState *dev)
1637 {
1638     SysbusAHCIState *s = SYSBUS_AHCI(dev);
1639 
1640     ahci_reset(&s->ahci);
1641 }
1642 
1643 static void sysbus_ahci_realize(DeviceState *dev, Error **errp)
1644 {
1645     SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
1646     SysbusAHCIState *s = SYSBUS_AHCI(dev);
1647 
1648     ahci_init(&s->ahci, dev, &address_space_memory, s->num_ports);
1649 
1650     sysbus_init_mmio(sbd, &s->ahci.mem);
1651     sysbus_init_irq(sbd, &s->ahci.irq);
1652 }
1653 
1654 static Property sysbus_ahci_properties[] = {
1655     DEFINE_PROP_UINT32("num-ports", SysbusAHCIState, num_ports, 1),
1656     DEFINE_PROP_END_OF_LIST(),
1657 };
1658 
1659 static void sysbus_ahci_class_init(ObjectClass *klass, void *data)
1660 {
1661     DeviceClass *dc = DEVICE_CLASS(klass);
1662 
1663     dc->realize = sysbus_ahci_realize;
1664     dc->vmsd = &vmstate_sysbus_ahci;
1665     dc->props = sysbus_ahci_properties;
1666     dc->reset = sysbus_ahci_reset;
1667     set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
1668 }
1669 
1670 static const TypeInfo sysbus_ahci_info = {
1671     .name          = TYPE_SYSBUS_AHCI,
1672     .parent        = TYPE_SYS_BUS_DEVICE,
1673     .instance_size = sizeof(SysbusAHCIState),
1674     .class_init    = sysbus_ahci_class_init,
1675 };
1676 
1677 static void sysbus_ahci_register_types(void)
1678 {
1679     type_register_static(&sysbus_ahci_info);
1680 }
1681 
1682 type_init(sysbus_ahci_register_types)
1683 
1684 void ahci_ide_create_devs(PCIDevice *dev, DriveInfo **hd)
1685 {
1686     AHCIPCIState *d = ICH_AHCI(dev);
1687     AHCIState *ahci = &d->ahci;
1688     int i;
1689 
1690     for (i = 0; i < ahci->ports; i++) {
1691         if (hd[i] == NULL) {
1692             continue;
1693         }
1694         ide_create_drive(&ahci->dev[i].port, 0, hd[i]);
1695     }
1696 
1697 }
1698