Virtualization

Emulation basics

Virtualization basics

../_images/ditaa-91f08f7db4b54069e16694eab8d75c06400fc47b.png

Classic virtualization

Software virtualization

MMU virtualization

Shadow page tables

 

../_images/ditaa-8632e22c6d89bd18f97c9cef127444486b5077df.png

Lazy shadow sync

I/O emulation

 

../_images/ditaa-bb69666d75b9670e542682753fb8cc9b77ff8894.png

Example: qemu SiFive UART emulation

/*
 * QEMU model of the UART on the SiFive E300 and U500 series SOCs.
 *
 * Copyright (c) 2016 Stefan O'Rear
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
 * version 2 or later, as published by the Free Software Foundation.
 *
 * This program is distributed in the hope it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
 * more details.
 *
 * You should have received a copy of the GNU General Public License along with
 * this program.  If not, see <http://www.gnu.org/licenses/>.
 */

#include "qemu/osdep.h"
#include "qapi/error.h"
#include "qemu/log.h"
#include "chardev/char.h"
#include "chardev/char-fe.h"
#include "hw/irq.h"
#include "hw/char/sifive_uart.h"

/*
 * Not yet implemented:
 *
 * Transmit FIFO using "qemu/fifo8.h"
 */

/* Returns the state of the IP (interrupt pending) register */
static uint64_t uart_ip(SiFiveUARTState *s)
{
    uint64_t ret = 0;

    uint64_t txcnt = SIFIVE_UART_GET_TXCNT(s->txctrl);
    uint64_t rxcnt = SIFIVE_UART_GET_RXCNT(s->rxctrl);

    if (txcnt != 0) {
        ret |= SIFIVE_UART_IP_TXWM;
    }
    if (s->rx_fifo_len > rxcnt) {
        ret |= SIFIVE_UART_IP_RXWM;
    }

    return ret;
}

static void update_irq(SiFiveUARTState *s)
{
    int cond = 0;
    if ((s->ie & SIFIVE_UART_IE_TXWM) ||
        ((s->ie & SIFIVE_UART_IE_RXWM) && s->rx_fifo_len)) {
        cond = 1;
    }
    if (cond) {
        qemu_irq_raise(s->irq);
    } else {
        qemu_irq_lower(s->irq);
    }
}

static uint64_t
uart_read(void *opaque, hwaddr addr, unsigned int size)
{
    SiFiveUARTState *s = opaque;
    unsigned char r;
    switch (addr) {
    case SIFIVE_UART_RXFIFO:
        if (s->rx_fifo_len) {
            r = s->rx_fifo[0];
            memmove(s->rx_fifo, s->rx_fifo + 1, s->rx_fifo_len - 1);
            s->rx_fifo_len--;
            qemu_chr_fe_accept_input(&s->chr);
            update_irq(s);
            return r;
        }
        return 0x80000000;

    case SIFIVE_UART_TXFIFO:
        return 0; /* Should check tx fifo */
    case SIFIVE_UART_IE:
        return s->ie;
    case SIFIVE_UART_IP:
        return uart_ip(s);
    case SIFIVE_UART_TXCTRL:
        return s->txctrl;
    case SIFIVE_UART_RXCTRL:
        return s->rxctrl;
    case SIFIVE_UART_DIV:
        return s->div;
    }

    qemu_log_mask(LOG_GUEST_ERROR, "%s: bad read: addr=0x%x\n",
                  __func__, (int)addr);
    return 0;
}

static void
uart_write(void *opaque, hwaddr addr,
           uint64_t val64, unsigned int size)
{
    SiFiveUARTState *s = opaque;
    uint32_t value = val64;
    unsigned char ch = value;

    switch (addr) {
    case SIFIVE_UART_TXFIFO:
        qemu_chr_fe_write(&s->chr, &ch, 1);
        update_irq(s);
        return;
    case SIFIVE_UART_IE:
        s->ie = val64;
        update_irq(s);
        return;
    case SIFIVE_UART_TXCTRL:
        s->txctrl = val64;
        return;
    case SIFIVE_UART_RXCTRL:
        s->rxctrl = val64;
        return;
    case SIFIVE_UART_DIV:
        s->div = val64;
        return;
    }
    qemu_log_mask(LOG_GUEST_ERROR, "%s: bad write: addr=0x%x v=0x%x\n",
                  __func__, (int)addr, (int)value);
}

static const MemoryRegionOps uart_ops = {
    .read = uart_read,
    .write = uart_write,
    .endianness = DEVICE_NATIVE_ENDIAN,
    .valid = {
        .min_access_size = 4,
        .max_access_size = 4
    }
};

static void uart_rx(void *opaque, const uint8_t *buf, int size)
{
    SiFiveUARTState *s = opaque;

    /* Got a byte.  */
    if (s->rx_fifo_len >= sizeof(s->rx_fifo)) {
        printf("WARNING: UART dropped char.\n");
        return;
    }
    s->rx_fifo[s->rx_fifo_len++] = *buf;

    update_irq(s);
}

static int uart_can_rx(void *opaque)
{
    SiFiveUARTState *s = opaque;

    return s->rx_fifo_len < sizeof(s->rx_fifo);
}

static void uart_event(void *opaque, QEMUChrEvent event)
{
}

static int uart_be_change(void *opaque)
{
    SiFiveUARTState *s = opaque;

    qemu_chr_fe_set_handlers(&s->chr, uart_can_rx, uart_rx, uart_event,
        uart_be_change, s, NULL, true);

    return 0;
}

/*
 * Create UART device.
 */
SiFiveUARTState *sifive_uart_create(MemoryRegion *address_space, hwaddr base,
    Chardev *chr, qemu_irq irq)
{
    SiFiveUARTState *s = g_malloc0(sizeof(SiFiveUARTState));
    s->irq = irq;
    qemu_chr_fe_init(&s->chr, chr, &error_abort);
    qemu_chr_fe_set_handlers(&s->chr, uart_can_rx, uart_rx, uart_event,
        uart_be_change, s, NULL, true);
    memory_region_init_io(&s->mmio, NULL, &uart_ops, s,
                          TYPE_SIFIVE_UART, SIFIVE_UART_MAX);
    memory_region_add_subregion(address_space, base, &s->mmio);
    return s;
}

Paravirtualization

Intel VT-x

Virtual Machine Control Structure

VM entry & exit

VM execution control fields

Extend Page Tables

../_images/ditaa-cc9a2e995be74ee99646ea4bf0e551d766fa92ef.png

VPID

I/O virtualization

../_images/ditaa-3901edd823cdc7a6f429ebc37cbc541e650abc96.png

I/O MMU

VT-d protects and translates VM physical addresses using an I/O MMU (DMA remaping)

../_images/ditaa-d880751969de8642b2613caaca345d71acea4500.png

Interrupt posting

I/O virtualization

../_images/ditaa-2cb0eb0056bb775d1446843d62241fd660662c96.png

SR-IOV

qemu

KVM

../_images/ditaa-f8fcc760ef5dad50d1038ed3426d0fcce12fd3e6.png

KVM

Xen

Xen

../_images/xen-overview.png