git: 9180daa1e345 - main - bhyve: add basic E820 implementation
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Wed, 26 Apr 2023 07:59:33 UTC
The branch main has been updated by corvink: URL: https://cgit.FreeBSD.org/src/commit/?id=9180daa1e34577aaccf3ff64cc63a5179c4f09d8 commit 9180daa1e34577aaccf3ff64cc63a5179c4f09d8 Author: Corvin Köhne <corvink@FreeBSD.org> AuthorDate: 2021-09-09 09:37:03 +0000 Commit: Corvin Köhne <corvink@FreeBSD.org> CommitDate: 2023-04-26 07:58:27 +0000 bhyve: add basic E820 implementation There are some use cases where bhyve has to prepare some special memory regions. E.g. GPU passthrough for Intel integrated graphic devices needs to reserve some memory for the graphic device. So, bhyve has to inform the guest about those memory regions. This information can be passed by the qemu fwcfg interface. As qemu creates an E820 table, we can reuse the existing fwcfg item "etc/e820". This commit is the first one of a series. It only adds a basic implementation for the creation of the E820 table. Some subsequent commits will add more items to the E820 table and register it as fwcfg item. Reviewed by: markj MFC after: 1 week Sponsored by: Beckhoff Automation GmbH & Co. KG Differential Revision: https://reviews.freebsd.org/D39545 --- usr.sbin/bhyve/e820.c | 233 ++++++++++++++++++++++++++++++++++++++++++++++++++ usr.sbin/bhyve/e820.h | 28 ++++++ 2 files changed, 261 insertions(+) diff --git a/usr.sbin/bhyve/e820.c b/usr.sbin/bhyve/e820.c new file mode 100644 index 000000000000..746d34d6521c --- /dev/null +++ b/usr.sbin/bhyve/e820.c @@ -0,0 +1,233 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2021 Beckhoff Automation GmbH & Co. KG + * Author: Corvin Köhne <c.koehne@beckhoff.com> + */ + +#include <sys/types.h> +#include <sys/queue.h> + +#include <machine/vmm.h> + +#include <assert.h> +#include <err.h> +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "e820.h" +#include "qemu_fwcfg.h" + +#define E820_FWCFG_FILE_NAME "etc/e820" + +#define KB (1024UL) +#define MB (1024 * KB) +#define GB (1024 * MB) + +struct e820_element { + TAILQ_ENTRY(e820_element) chain; + uint64_t base; + uint64_t end; + enum e820_memory_type type; +}; +static TAILQ_HEAD(e820_table, e820_element) e820_table = TAILQ_HEAD_INITIALIZER( + e820_table); + +static struct e820_element * +e820_element_alloc(uint64_t base, uint64_t end, enum e820_memory_type type) +{ + struct e820_element *element; + + element = calloc(1, sizeof(*element)); + if (element == NULL) { + return (NULL); + } + + element->base = base; + element->end = end; + element->type = type; + + return (element); +} + +struct qemu_fwcfg_item * +e820_get_fwcfg_item(void) +{ + struct qemu_fwcfg_item *fwcfg_item; + struct e820_element *element; + struct e820_entry *entries; + int count, i; + + count = 0; + TAILQ_FOREACH(element, &e820_table, chain) { + ++count; + } + if (count == 0) { + warnx("%s: E820 table empty", __func__); + return (NULL); + } + + fwcfg_item = calloc(1, sizeof(struct qemu_fwcfg_item)); + if (fwcfg_item == NULL) { + return (NULL); + } + + fwcfg_item->size = count * sizeof(struct e820_entry); + fwcfg_item->data = calloc(count, sizeof(struct e820_entry)); + if (fwcfg_item->data == NULL) { + free(fwcfg_item); + return (NULL); + } + + i = 0; + entries = (struct e820_entry *)fwcfg_item->data; + TAILQ_FOREACH(element, &e820_table, chain) { + struct e820_entry *entry = &entries[i]; + + entry->base = element->base; + entry->length = element->end - element->base; + entry->type = element->type; + + ++i; + } + + return (fwcfg_item); +} + +static int +e820_add_entry(const uint64_t base, const uint64_t end, + const enum e820_memory_type type) +{ + struct e820_element *new_element; + struct e820_element *element; + struct e820_element *ram_element; + + assert(end >= base); + + new_element = e820_element_alloc(base, end, type); + if (new_element == NULL) { + return (ENOMEM); + } + + /* + * E820 table should always be sorted in ascending order. Therefore, + * search for a range whose end is larger than the base parameter. + */ + TAILQ_FOREACH(element, &e820_table, chain) { + if (element->end > base) { + break; + } + } + + /* + * System memory requires special handling. + */ + if (type == E820_TYPE_MEMORY) { + /* + * base is larger than of any existing element. Add new system + * memory at the end of the table. + */ + if (element == NULL) { + TAILQ_INSERT_TAIL(&e820_table, new_element, chain); + return (0); + } + + /* + * System memory shouldn't overlap with any existing element. + */ + assert(end >= element->base); + + TAILQ_INSERT_BEFORE(element, new_element, chain); + + return (0); + } + + assert(element != NULL); + /* Non system memory should be allocated inside system memory. */ + assert(element->type == E820_TYPE_MEMORY); + /* New element should fit into existing system memory element. */ + assert(base >= element->base && end <= element->end); + + if (base == element->base) { + /* + * New element at system memory base boundary. Add new + * element before current and adjust the base of the old + * element. + * + * Old table: + * [ 0x1000, 0x4000] RAM <-- element + * New table: + * [ 0x1000, 0x2000] Reserved + * [ 0x2000, 0x4000] RAM <-- element + */ + TAILQ_INSERT_BEFORE(element, new_element, chain); + element->base = end; + } else if (end == element->end) { + /* + * New element at system memory end boundary. Add new + * element after current and adjust the end of the + * current element. + * + * Old table: + * [ 0x1000, 0x4000] RAM <-- element + * New table: + * [ 0x1000, 0x3000] RAM <-- element + * [ 0x3000, 0x4000] Reserved + */ + TAILQ_INSERT_AFTER(&e820_table, element, new_element, chain); + element->end = base; + } else { + /* + * New element inside system memory entry. Split it by + * adding a system memory element and the new element + * before current. + * + * Old table: + * [ 0x1000, 0x4000] RAM <-- element + * New table: + * [ 0x1000, 0x2000] RAM + * [ 0x2000, 0x3000] Reserved + * [ 0x3000, 0x4000] RAM <-- element + */ + ram_element = e820_element_alloc(element->base, base, + E820_TYPE_MEMORY); + if (ram_element == NULL) { + return (ENOMEM); + } + TAILQ_INSERT_BEFORE(element, ram_element, chain); + TAILQ_INSERT_BEFORE(element, new_element, chain); + element->base = end; + } + + return (0); +} + +int +e820_init(struct vmctx *const ctx) +{ + uint64_t lowmem_size, highmem_size; + int error; + + TAILQ_INIT(&e820_table); + + lowmem_size = vm_get_lowmem_size(ctx); + error = e820_add_entry(0, lowmem_size, E820_TYPE_MEMORY); + if (error) { + warnx("%s: Could not add lowmem", __func__); + return (error); + } + + highmem_size = vm_get_highmem_size(ctx); + if (highmem_size != 0) { + error = e820_add_entry(4 * GB, 4 * GB + highmem_size, + E820_TYPE_MEMORY); + if (error) { + warnx("%s: Could not add highmem", __func__); + return (error); + } + } + + return (0); +} diff --git a/usr.sbin/bhyve/e820.h b/usr.sbin/bhyve/e820.h new file mode 100644 index 000000000000..6843ad5dc736 --- /dev/null +++ b/usr.sbin/bhyve/e820.h @@ -0,0 +1,28 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2021 Beckhoff Automation GmbH & Co. KG + * Author: Corvin Köhne <c.koehne@beckhoff.com> + */ + +#pragma once + +#include <vmmapi.h> + +#include "qemu_fwcfg.h" + +enum e820_memory_type { + E820_TYPE_MEMORY = 1, + E820_TYPE_RESERVED = 2, + E820_TYPE_ACPI = 3, + E820_TYPE_NVS = 4 +}; + +struct e820_entry { + uint64_t base; + uint64_t length; + uint32_t type; +} __packed; + +struct qemu_fwcfg_item *e820_get_fwcfg_item(void); +int e820_init(struct vmctx *const ctx);