/*
 *  xen paravirt network card backend
 *
 *  (c) Gerd Hoffmann <kraxel@redhat.com>
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; under version 2 of the License.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License along
 *  with this program; if not, see <http://www.gnu.org/licenses/>.
 *
 *  Contributions after 2012-01-13 are licensed under the terms of the
 *  GNU GPL, version 2 or (at your option) any later version.
 */

#include "qemu/osdep.h"
#include "qemu/main-loop.h"
#include "qemu/cutils.h"
#include "qemu/log.h"
#include "qemu/qemu-print.h"
#include "qapi/qmp/qdict.h"
#include "qapi/error.h"

#include <sys/socket.h>
#include <sys/ioctl.h>
#include <sys/wait.h>

#include "net/net.h"
#include "net/checksum.h"
#include "net/util.h"

#include "hw/xen/xen-backend.h"
#include "hw/xen/xen-bus-helper.h"
#include "hw/qdev-properties.h"
#include "hw/qdev-properties-system.h"

#include "hw/xen/interface/io/netif.h"
#include "hw/xen/interface/io/xs_wire.h"

#include "trace.h"

/* ------------------------------------------------------------- */

struct XenNetDev {
    struct XenDevice      xendev;  /* must be first */
    XenEventChannel       *event_channel;
    int                   dev;
    int                   tx_work;
    unsigned int          tx_ring_ref;
    unsigned int          rx_ring_ref;
    struct netif_tx_sring *txs;
    struct netif_rx_sring *rxs;
    netif_tx_back_ring_t  tx_ring;
    netif_rx_back_ring_t  rx_ring;
    NICConf               conf;
    NICState              *nic;
};

typedef struct XenNetDev XenNetDev;

#define TYPE_XEN_NET_DEVICE "xen-net-device"
OBJECT_DECLARE_SIMPLE_TYPE(XenNetDev, XEN_NET_DEVICE)

/* ------------------------------------------------------------- */

static void net_tx_response(struct XenNetDev *netdev, netif_tx_request_t *txp, int8_t st)
{
    RING_IDX i = netdev->tx_ring.rsp_prod_pvt;
    netif_tx_response_t *resp;
    int notify;

    resp = RING_GET_RESPONSE(&netdev->tx_ring, i);
    resp->id     = txp->id;
    resp->status = st;

#if 0
    if (txp->flags & NETTXF_extra_info) {
        RING_GET_RESPONSE(&netdev->tx_ring, ++i)->status = NETIF_RSP_NULL;
    }
#endif

    netdev->tx_ring.rsp_prod_pvt = ++i;
    RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netdev->tx_ring, notify);
    if (notify) {
        xen_device_notify_event_channel(XEN_DEVICE(netdev),
                                        netdev->event_channel, NULL);
    }

    if (i == netdev->tx_ring.req_cons) {
        int more_to_do;
        RING_FINAL_CHECK_FOR_REQUESTS(&netdev->tx_ring, more_to_do);
        if (more_to_do) {
            netdev->tx_work++;
        }
    }
}

static void net_tx_error(struct XenNetDev *netdev, netif_tx_request_t *txp, RING_IDX end)
{
#if 0
    /*
     * Hmm, why netback fails everything in the ring?
     * Should we do that even when not supporting SG and TSO?
     */
    RING_IDX cons = netdev->tx_ring.req_cons;

    do {
        make_tx_response(netif, txp, NETIF_RSP_ERROR);
        if (cons >= end) {
            break;
        }
        txp = RING_GET_REQUEST(&netdev->tx_ring, cons++);
    } while (1);
    netdev->tx_ring.req_cons = cons;
    netif_schedule_work(netif);
    netif_put(netif);
#else
    net_tx_response(netdev, txp, NETIF_RSP_ERROR);
#endif
}

static bool net_tx_packets(struct XenNetDev *netdev)
{
    bool done_something = false;
    netif_tx_request_t txreq;
    RING_IDX rc, rp;
    void *page;
    void *tmpbuf = NULL;

    assert(qemu_mutex_iothread_locked());

    for (;;) {
        rc = netdev->tx_ring.req_cons;
        rp = netdev->tx_ring.sring->req_prod;
        xen_rmb(); /* Ensure we see queued requests up to 'rp'. */

        while ((rc != rp)) {
            if (RING_REQUEST_CONS_OVERFLOW(&netdev->tx_ring, rc)) {
                break;
            }
            memcpy(&txreq, RING_GET_REQUEST(&netdev->tx_ring, rc), sizeof(txreq));
            netdev->tx_ring.req_cons = ++rc;
            done_something = true;

#if 1
            /* should not happen in theory, we don't announce the *
             * feature-{sg,gso,whatelse} flags in xenstore (yet?) */
            if (txreq.flags & NETTXF_extra_info) {
                qemu_log_mask(LOG_UNIMP, "vif%u: FIXME: extra info flag\n",
                              netdev->dev);
                net_tx_error(netdev, &txreq, rc);
                continue;
            }
            if (txreq.flags & NETTXF_more_data) {
                qemu_log_mask(LOG_UNIMP, "vif%u: FIXME: more data flag\n",
                              netdev->dev);
                net_tx_error(netdev, &txreq, rc);
                continue;
            }
#endif

            if (txreq.size < 14) {
                qemu_log_mask(LOG_GUEST_ERROR, "vif%u: bad packet size: %d\n",
                              netdev->dev, txreq.size);
                net_tx_error(netdev, &txreq, rc);
                continue;
            }

            if ((txreq.offset + txreq.size) > XEN_PAGE_SIZE) {
                qemu_log_mask(LOG_GUEST_ERROR, "vif%u: error: page crossing\n",
                              netdev->dev);
                net_tx_error(netdev, &txreq, rc);
                continue;
            }

            trace_xen_netdev_tx(netdev->dev, txreq.gref, txreq.offset,
                                txreq.size, txreq.flags,
                                (txreq.flags & NETTXF_csum_blank)     ? " csum_blank"     : "",
                                (txreq.flags & NETTXF_data_validated) ? " data_validated" : "",
                                (txreq.flags & NETTXF_more_data)      ? " more_data"      : "",
                                (txreq.flags & NETTXF_extra_info)     ? " extra_info"     : "");

            page = xen_device_map_grant_refs(&netdev->xendev, &txreq.gref, 1,
                                             PROT_READ, NULL);
            if (page == NULL) {
                qemu_log_mask(LOG_GUEST_ERROR,
                              "vif%u: tx gref dereference failed (%d)\n",
                              netdev->dev, txreq.gref);
                net_tx_error(netdev, &txreq, rc);
                continue;
            }
            if (txreq.flags & NETTXF_csum_blank) {
                /* have read-only mapping -> can't fill checksum in-place */
                if (!tmpbuf) {
                    tmpbuf = g_malloc(XEN_PAGE_SIZE);
                }
                memcpy(tmpbuf, page + txreq.offset, txreq.size);
                net_checksum_calculate(tmpbuf, txreq.size, CSUM_ALL);
                qemu_send_packet(qemu_get_queue(netdev->nic), tmpbuf,
                                 txreq.size);
            } else {
                qemu_send_packet(qemu_get_queue(netdev->nic),
                                 page + txreq.offset, txreq.size);
            }
            xen_device_unmap_grant_refs(&netdev->xendev, page, &txreq.gref, 1,
                                        NULL);
            net_tx_response(netdev, &txreq, NETIF_RSP_OKAY);
        }
        if (!netdev->tx_work) {
            break;
        }
        netdev->tx_work = 0;
    }
    g_free(tmpbuf);
    return done_something;
}

/* ------------------------------------------------------------- */

static void net_rx_response(struct XenNetDev *netdev,
                            netif_rx_request_t *req, int8_t st,
                            uint16_t offset, uint16_t size,
                            uint16_t flags)
{
    RING_IDX i = netdev->rx_ring.rsp_prod_pvt;
    netif_rx_response_t *resp;
    int notify;

    resp = RING_GET_RESPONSE(&netdev->rx_ring, i);
    resp->offset     = offset;
    resp->flags      = flags;
    resp->id         = req->id;
    resp->status     = (int16_t)size;
    if (st < 0) {
        resp->status = (int16_t)st;
    }

    trace_xen_netdev_rx(netdev->dev, i, resp->status, resp->flags);

    netdev->rx_ring.rsp_prod_pvt = ++i;
    RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netdev->rx_ring, notify);
    if (notify) {
        xen_device_notify_event_channel(XEN_DEVICE(netdev),
                                        netdev->event_channel, NULL);
    }
}

#define NET_IP_ALIGN 2

static ssize_t net_rx_packet(NetClientState *nc, const uint8_t *buf, size_t size)
{
    struct XenNetDev *netdev = qemu_get_nic_opaque(nc);
    netif_rx_request_t rxreq;
    RING_IDX rc, rp;
    void *page;

    assert(qemu_mutex_iothread_locked());

    if (xen_device_backend_get_state(&netdev->xendev) != XenbusStateConnected) {
        return -1;
    }

    rc = netdev->rx_ring.req_cons;
    rp = netdev->rx_ring.sring->req_prod;
    xen_rmb(); /* Ensure we see queued requests up to 'rp'. */

    if (rc == rp || RING_REQUEST_CONS_OVERFLOW(&netdev->rx_ring, rc)) {
        return 0;
    }
    if (size > XEN_PAGE_SIZE - NET_IP_ALIGN) {
        qemu_log_mask(LOG_GUEST_ERROR, "vif%u: packet too big (%lu > %ld)",
                      netdev->dev, (unsigned long)size,
                      XEN_PAGE_SIZE - NET_IP_ALIGN);
        return -1;
    }

    memcpy(&rxreq, RING_GET_REQUEST(&netdev->rx_ring, rc), sizeof(rxreq));
    netdev->rx_ring.req_cons = ++rc;

    page = xen_device_map_grant_refs(&netdev->xendev, &rxreq.gref, 1,
                                     PROT_WRITE, NULL);
    if (page == NULL) {
        qemu_log_mask(LOG_GUEST_ERROR,
                      "vif%u: rx gref dereference failed (%d)\n",
                      netdev->dev, rxreq.gref);
        net_rx_response(netdev, &rxreq, NETIF_RSP_ERROR, 0, 0, 0);
        return -1;
    }
    memcpy(page + NET_IP_ALIGN, buf, size);
    xen_device_unmap_grant_refs(&netdev->xendev, page, &rxreq.gref, 1, NULL);
    net_rx_response(netdev, &rxreq, NETIF_RSP_OKAY, NET_IP_ALIGN, size, 0);

    return size;
}

/* ------------------------------------------------------------- */

static NetClientInfo net_xen_info = {
    .type = NET_CLIENT_DRIVER_NIC,
    .size = sizeof(NICState),
    .receive = net_rx_packet,
};

static void xen_netdev_realize(XenDevice *xendev, Error **errp)
{
    ERRP_GUARD();
    XenNetDev *netdev = XEN_NET_DEVICE(xendev);
    NetClientState *nc;

    qemu_macaddr_default_if_unset(&netdev->conf.macaddr);

    xen_device_frontend_printf(xendev, "mac", "%02x:%02x:%02x:%02x:%02x:%02x",
                               netdev->conf.macaddr.a[0],
                               netdev->conf.macaddr.a[1],
                               netdev->conf.macaddr.a[2],
                               netdev->conf.macaddr.a[3],
                               netdev->conf.macaddr.a[4],
                               netdev->conf.macaddr.a[5]);

    netdev->nic = qemu_new_nic(&net_xen_info, &netdev->conf,
                               object_get_typename(OBJECT(xendev)),
                               DEVICE(xendev)->id, netdev);

    nc = qemu_get_queue(netdev->nic);
    qemu_format_nic_info_str(nc, netdev->conf.macaddr.a);

    /* fill info */
    xen_device_backend_printf(xendev, "feature-rx-copy", "%u", 1);
    xen_device_backend_printf(xendev, "feature-rx-flip", "%u", 0);

    trace_xen_netdev_realize(netdev->dev, nc->info_str, nc->peer ?
                             nc->peer->name : "(none)");
}

static bool net_event(void *_xendev)
{
    XenNetDev *netdev = XEN_NET_DEVICE(_xendev);
    bool done_something;

    done_something = net_tx_packets(netdev);
    qemu_flush_queued_packets(qemu_get_queue(netdev->nic));
    return done_something;
}

static bool xen_netdev_connect(XenDevice *xendev, Error **errp)
{
    XenNetDev *netdev = XEN_NET_DEVICE(xendev);
    unsigned int port, rx_copy;

    assert(qemu_mutex_iothread_locked());

    if (xen_device_frontend_scanf(xendev, "tx-ring-ref", "%u",
                                  &netdev->tx_ring_ref) != 1) {
        error_setg(errp, "failed to read tx-ring-ref");
        return false;
    }

    if (xen_device_frontend_scanf(xendev, "rx-ring-ref", "%u",
                                  &netdev->rx_ring_ref) != 1) {
        error_setg(errp, "failed to read rx-ring-ref");
        return false;
    }

    if (xen_device_frontend_scanf(xendev, "event-channel", "%u",
                                  &port) != 1) {
        error_setg(errp, "failed to read event-channel");
        return false;
    }

    if (xen_device_frontend_scanf(xendev, "request-rx-copy", "%u",
                                  &rx_copy) != 1) {
        rx_copy = 0;
    }
    if (rx_copy == 0) {
        error_setg(errp, "frontend doesn't support rx-copy");
        return false;
    }

    netdev->txs = xen_device_map_grant_refs(xendev,
                                            &netdev->tx_ring_ref, 1,
                                            PROT_READ | PROT_WRITE,
                                            errp);
    if (!netdev->txs) {
        error_prepend(errp, "failed to map tx grant ref: ");
        return false;
    }

    netdev->rxs = xen_device_map_grant_refs(xendev,
                                            &netdev->rx_ring_ref, 1,
                                            PROT_READ | PROT_WRITE,
                                            errp);
    if (!netdev->rxs) {
        error_prepend(errp, "failed to map rx grant ref: ");
        return false;
    }

    BACK_RING_INIT(&netdev->tx_ring, netdev->txs, XEN_PAGE_SIZE);
    BACK_RING_INIT(&netdev->rx_ring, netdev->rxs, XEN_PAGE_SIZE);

    netdev->event_channel = xen_device_bind_event_channel(xendev, port,
                                                          net_event,
                                                          netdev,
                                                          errp);
    if (!netdev->event_channel) {
        return false;
    }

    trace_xen_netdev_connect(netdev->dev, netdev->tx_ring_ref,
                             netdev->rx_ring_ref, port);

    net_tx_packets(netdev);
    return true;
}

static void xen_netdev_disconnect(XenDevice *xendev, Error **errp)
{
    XenNetDev *netdev = XEN_NET_DEVICE(xendev);

    trace_xen_netdev_disconnect(netdev->dev);

    assert(qemu_mutex_iothread_locked());

    netdev->tx_ring.sring = NULL;
    netdev->rx_ring.sring = NULL;

    if (netdev->event_channel) {
        xen_device_unbind_event_channel(xendev, netdev->event_channel,
                                        errp);
        netdev->event_channel = NULL;
    }
    if (netdev->txs) {
        xen_device_unmap_grant_refs(xendev, netdev->txs,
                                    &netdev->tx_ring_ref, 1, errp);
        netdev->txs = NULL;
    }
    if (netdev->rxs) {
        xen_device_unmap_grant_refs(xendev, netdev->rxs,
                                    &netdev->rx_ring_ref, 1, errp);
        netdev->rxs = NULL;
    }
}

/* -------------------------------------------------------------------- */


static void xen_netdev_frontend_changed(XenDevice *xendev,
                                       enum xenbus_state frontend_state,
                                       Error **errp)
{
    ERRP_GUARD();
    enum xenbus_state backend_state = xen_device_backend_get_state(xendev);

    trace_xen_netdev_frontend_changed(xendev->name, frontend_state);

    switch (frontend_state) {
    case XenbusStateConnected:
        if (backend_state == XenbusStateConnected) {
            break;
        }

        xen_netdev_disconnect(xendev, errp);
        if (*errp) {
            break;
        }

        if (!xen_netdev_connect(xendev, errp)) {
            xen_netdev_disconnect(xendev, NULL);
            xen_device_backend_set_state(xendev, XenbusStateClosing);
            break;
        }

        xen_device_backend_set_state(xendev, XenbusStateConnected);
        break;

    case XenbusStateClosing:
        xen_device_backend_set_state(xendev, XenbusStateClosing);
        break;

    case XenbusStateClosed:
    case XenbusStateUnknown:
        xen_netdev_disconnect(xendev, errp);
        if (*errp) {
            break;
        }

        xen_device_backend_set_state(xendev, XenbusStateClosed);
        break;

    case XenbusStateInitialised:
        /*
         * Linux netback does nothing on the frontend going (back) to
         * XenbusStateInitialised, so do the same here.
         */
    default:
        break;
    }
}

static char *xen_netdev_get_name(XenDevice *xendev, Error **errp)
{
    XenNetDev *netdev = XEN_NET_DEVICE(xendev);

    if (netdev->dev == -1) {
        XenBus *xenbus = XEN_BUS(qdev_get_parent_bus(DEVICE(xendev)));
        char fe_path[XENSTORE_ABS_PATH_MAX + 1];
        int idx = (xen_mode == XEN_EMULATE) ? 0 : 1;
        char *value;

        /* Theoretically we could go up to INT_MAX here but that's overkill */
        while (idx < 100) {
            snprintf(fe_path, sizeof(fe_path),
                     "/local/domain/%u/device/vif/%u",
                     xendev->frontend_id, idx);
            value = qemu_xen_xs_read(xenbus->xsh, XBT_NULL, fe_path, NULL);
            if (!value) {
                if (errno == ENOENT) {
                    netdev->dev = idx;
                    goto found;
                }
                error_setg(errp, "cannot read %s: %s", fe_path,
                           strerror(errno));
                return NULL;
            }
            free(value);
            idx++;
        }
        error_setg(errp, "cannot find device index for netdev device");
        return NULL;
    }
 found:
    return g_strdup_printf("%u", netdev->dev);
}

static void xen_netdev_unrealize(XenDevice *xendev)
{
    XenNetDev *netdev = XEN_NET_DEVICE(xendev);

    trace_xen_netdev_unrealize(netdev->dev);

    /* Disconnect from the frontend in case this has not already happened */
    xen_netdev_disconnect(xendev, NULL);

    if (netdev->nic) {
        qemu_del_nic(netdev->nic);
    }
}

/* ------------------------------------------------------------- */

static Property xen_netdev_properties[] = {
    DEFINE_NIC_PROPERTIES(XenNetDev, conf),
    DEFINE_PROP_INT32("idx", XenNetDev, dev, -1),
    DEFINE_PROP_END_OF_LIST(),
};

static void xen_netdev_class_init(ObjectClass *class, void *data)
{
    DeviceClass *dev_class = DEVICE_CLASS(class);
    XenDeviceClass *xendev_class = XEN_DEVICE_CLASS(class);

    xendev_class->backend = "qnic";
    xendev_class->device = "vif";
    xendev_class->get_name = xen_netdev_get_name;
    xendev_class->realize = xen_netdev_realize;
    xendev_class->frontend_changed = xen_netdev_frontend_changed;
    xendev_class->unrealize = xen_netdev_unrealize;
    set_bit(DEVICE_CATEGORY_NETWORK, dev_class->categories);
    dev_class->user_creatable = true;

    device_class_set_props(dev_class, xen_netdev_properties);
}

static const TypeInfo xen_net_type_info = {
    .name = TYPE_XEN_NET_DEVICE,
    .parent = TYPE_XEN_DEVICE,
    .instance_size = sizeof(XenNetDev),
    .class_init = xen_netdev_class_init,
};

static void xen_net_register_types(void)
{
    type_register_static(&xen_net_type_info);
}

type_init(xen_net_register_types)

/* Called to instantiate a XenNetDev when the backend is detected. */
static void xen_net_device_create(XenBackendInstance *backend,
                                  QDict *opts, Error **errp)
{
    ERRP_GUARD();
    XenBus *xenbus = xen_backend_get_bus(backend);
    const char *name = xen_backend_get_name(backend);
    XenDevice *xendev = NULL;
    unsigned long number;
    const char *macstr;
    XenNetDev *net;
    MACAddr mac;

    if (qemu_strtoul(name, NULL, 10, &number) || number >= INT_MAX) {
        error_setg(errp, "failed to parse name '%s'", name);
        goto fail;
    }

    trace_xen_netdev_create(number);

    macstr = qdict_get_try_str(opts, "mac");
    if (macstr == NULL) {
        error_setg(errp, "no MAC address found");
        goto fail;
    }

    if (net_parse_macaddr(mac.a, macstr) < 0) {
        error_setg(errp, "failed to parse MAC address");
        goto fail;
    }

    xendev = XEN_DEVICE(qdev_new(TYPE_XEN_NET_DEVICE));
    net = XEN_NET_DEVICE(xendev);

    net->dev = number;
    memcpy(&net->conf.macaddr, &mac, sizeof(mac));

    if (qdev_realize_and_unref(DEVICE(xendev), BUS(xenbus), errp)) {
        xen_backend_set_device(backend, xendev);
        return;
    }

    error_prepend(errp, "realization of net device %lu failed: ",
                  number);

 fail:
    if (xendev) {
        object_unparent(OBJECT(xendev));
    }
}

static void xen_net_device_destroy(XenBackendInstance *backend,
                                   Error **errp)
{
    ERRP_GUARD();
    XenDevice *xendev = xen_backend_get_device(backend);
    XenNetDev *netdev = XEN_NET_DEVICE(xendev);

    trace_xen_netdev_destroy(netdev->dev);

    object_unparent(OBJECT(xendev));
}

static const XenBackendInfo xen_net_backend_info  = {
    .type = "qnic",
    .create = xen_net_device_create,
    .destroy = xen_net_device_destroy,
};

static void xen_net_register_backend(void)
{
    xen_backend_register(&xen_net_backend_info);
}

xen_backend_init(xen_net_register_backend);