mirror of
https://github.com/qemu/qemu.git
synced 2025-08-02 00:23:14 +00:00
vfio-user: implement VFIO_USER_DMA_MAP/UNMAP
When the vfio-user container gets mapping updates, share them with the vfio-user by sending a message; this can include the region fd, allowing the server to directly mmap() the region as needed. For performance, we only wait for the message responses when we're doing with a series of updates via the listener_commit() callback. Originally-by: John Johnson <john.g.johnson@oracle.com> Signed-off-by: Jagannathan Raman <jag.raman@oracle.com> Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com> Signed-off-by: John Levon <john.levon@nutanix.com> Reviewed-by: Cédric Le Goater <clg@redhat.com> Link: https://lore.kernel.org/qemu-devel/20250625193012.2316242-14-john.levon@nutanix.com Signed-off-by: Cédric Le Goater <clg@redhat.com>
This commit is contained in:

committed by
Cédric Le Goater

parent
0192323581
commit
18e899e63d
@ -12,23 +12,152 @@
|
||||
|
||||
#include "hw/vfio-user/container.h"
|
||||
#include "hw/vfio-user/device.h"
|
||||
#include "hw/vfio-user/trace.h"
|
||||
#include "hw/vfio/vfio-cpr.h"
|
||||
#include "hw/vfio/vfio-device.h"
|
||||
#include "hw/vfio/vfio-listener.h"
|
||||
#include "qapi/error.h"
|
||||
|
||||
/*
|
||||
* When DMA space is the physical address space, the region add/del listeners
|
||||
* will fire during memory update transactions. These depend on BQL being held,
|
||||
* so do any resulting map/demap ops async while keeping BQL.
|
||||
*/
|
||||
static void vfio_user_listener_begin(VFIOContainerBase *bcontainer)
|
||||
{
|
||||
VFIOUserContainer *container = container_of(bcontainer, VFIOUserContainer,
|
||||
bcontainer);
|
||||
|
||||
container->proxy->async_ops = true;
|
||||
}
|
||||
|
||||
static void vfio_user_listener_commit(VFIOContainerBase *bcontainer)
|
||||
{
|
||||
VFIOUserContainer *container = container_of(bcontainer, VFIOUserContainer,
|
||||
bcontainer);
|
||||
|
||||
/* wait here for any async requests sent during the transaction */
|
||||
container->proxy->async_ops = false;
|
||||
vfio_user_wait_reqs(container->proxy);
|
||||
}
|
||||
|
||||
static int vfio_user_dma_unmap(const VFIOContainerBase *bcontainer,
|
||||
hwaddr iova, ram_addr_t size,
|
||||
IOMMUTLBEntry *iotlb, bool unmap_all)
|
||||
{
|
||||
return -ENOTSUP;
|
||||
VFIOUserContainer *container = container_of(bcontainer, VFIOUserContainer,
|
||||
bcontainer);
|
||||
Error *local_err = NULL;
|
||||
int ret = 0;
|
||||
|
||||
VFIOUserDMAUnmap *msgp = g_malloc(sizeof(*msgp));
|
||||
|
||||
vfio_user_request_msg(&msgp->hdr, VFIO_USER_DMA_UNMAP, sizeof(*msgp), 0);
|
||||
msgp->argsz = sizeof(struct vfio_iommu_type1_dma_unmap);
|
||||
msgp->flags = unmap_all ? VFIO_DMA_UNMAP_FLAG_ALL : 0;
|
||||
msgp->iova = iova;
|
||||
msgp->size = size;
|
||||
trace_vfio_user_dma_unmap(msgp->iova, msgp->size, msgp->flags,
|
||||
container->proxy->async_ops);
|
||||
|
||||
if (container->proxy->async_ops) {
|
||||
if (!vfio_user_send_nowait(container->proxy, &msgp->hdr, NULL,
|
||||
0, &local_err)) {
|
||||
error_report_err(local_err);
|
||||
ret = -EFAULT;
|
||||
} else {
|
||||
ret = 0;
|
||||
}
|
||||
} else {
|
||||
if (!vfio_user_send_wait(container->proxy, &msgp->hdr, NULL,
|
||||
0, &local_err)) {
|
||||
error_report_err(local_err);
|
||||
ret = -EFAULT;
|
||||
}
|
||||
|
||||
if (msgp->hdr.flags & VFIO_USER_ERROR) {
|
||||
ret = -msgp->hdr.error_reply;
|
||||
}
|
||||
|
||||
g_free(msgp);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int vfio_user_dma_map(const VFIOContainerBase *bcontainer, hwaddr iova,
|
||||
ram_addr_t size, void *vaddr, bool readonly,
|
||||
MemoryRegion *mrp)
|
||||
{
|
||||
return -ENOTSUP;
|
||||
VFIOUserContainer *container = container_of(bcontainer, VFIOUserContainer,
|
||||
bcontainer);
|
||||
int fd = memory_region_get_fd(mrp);
|
||||
Error *local_err = NULL;
|
||||
int ret;
|
||||
|
||||
VFIOUserFDs *fds = NULL;
|
||||
VFIOUserDMAMap *msgp = g_malloc0(sizeof(*msgp));
|
||||
|
||||
vfio_user_request_msg(&msgp->hdr, VFIO_USER_DMA_MAP, sizeof(*msgp), 0);
|
||||
msgp->argsz = sizeof(struct vfio_iommu_type1_dma_map);
|
||||
msgp->flags = VFIO_DMA_MAP_FLAG_READ;
|
||||
msgp->offset = 0;
|
||||
msgp->iova = iova;
|
||||
msgp->size = size;
|
||||
|
||||
/*
|
||||
* vaddr enters as a QEMU process address; make it either a file offset
|
||||
* for mapped areas or leave as 0.
|
||||
*/
|
||||
if (fd != -1) {
|
||||
msgp->offset = qemu_ram_block_host_offset(mrp->ram_block, vaddr);
|
||||
}
|
||||
|
||||
if (!readonly) {
|
||||
msgp->flags |= VFIO_DMA_MAP_FLAG_WRITE;
|
||||
}
|
||||
|
||||
trace_vfio_user_dma_map(msgp->iova, msgp->size, msgp->offset, msgp->flags,
|
||||
container->proxy->async_ops);
|
||||
|
||||
/*
|
||||
* The async_ops case sends without blocking. They're later waited for in
|
||||
* vfio_send_wait_reqs.
|
||||
*/
|
||||
if (container->proxy->async_ops) {
|
||||
/* can't use auto variable since we don't block */
|
||||
if (fd != -1) {
|
||||
fds = vfio_user_getfds(1);
|
||||
fds->send_fds = 1;
|
||||
fds->fds[0] = fd;
|
||||
}
|
||||
|
||||
if (!vfio_user_send_nowait(container->proxy, &msgp->hdr, fds,
|
||||
0, &local_err)) {
|
||||
error_report_err(local_err);
|
||||
ret = -EFAULT;
|
||||
} else {
|
||||
ret = 0;
|
||||
}
|
||||
} else {
|
||||
VFIOUserFDs local_fds = { 1, 0, &fd };
|
||||
|
||||
fds = fd != -1 ? &local_fds : NULL;
|
||||
|
||||
if (!vfio_user_send_wait(container->proxy, &msgp->hdr, fds,
|
||||
0, &local_err)) {
|
||||
error_report_err(local_err);
|
||||
ret = -EFAULT;
|
||||
}
|
||||
|
||||
if (msgp->hdr.flags & VFIO_USER_ERROR) {
|
||||
ret = -msgp->hdr.error_reply;
|
||||
}
|
||||
|
||||
g_free(msgp);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int
|
||||
@ -218,6 +347,8 @@ static void vfio_iommu_user_class_init(ObjectClass *klass, const void *data)
|
||||
VFIOIOMMUClass *vioc = VFIO_IOMMU_CLASS(klass);
|
||||
|
||||
vioc->setup = vfio_user_setup;
|
||||
vioc->listener_begin = vfio_user_listener_begin,
|
||||
vioc->listener_commit = vfio_user_listener_commit,
|
||||
vioc->dma_map = vfio_user_dma_map;
|
||||
vioc->dma_unmap = vfio_user_dma_unmap;
|
||||
vioc->attach_device = vfio_user_device_attach;
|
||||
|
@ -112,6 +112,31 @@ typedef struct {
|
||||
*/
|
||||
#define VFIO_USER_DEF_MAX_BITMAP (256 * 1024 * 1024)
|
||||
|
||||
/*
|
||||
* VFIO_USER_DMA_MAP
|
||||
* imported from struct vfio_iommu_type1_dma_map
|
||||
*/
|
||||
typedef struct {
|
||||
VFIOUserHdr hdr;
|
||||
uint32_t argsz;
|
||||
uint32_t flags;
|
||||
uint64_t offset; /* FD offset */
|
||||
uint64_t iova;
|
||||
uint64_t size;
|
||||
} VFIOUserDMAMap;
|
||||
|
||||
/*
|
||||
* VFIO_USER_DMA_UNMAP
|
||||
* imported from struct vfio_iommu_type1_dma_unmap
|
||||
*/
|
||||
typedef struct {
|
||||
VFIOUserHdr hdr;
|
||||
uint32_t argsz;
|
||||
uint32_t flags;
|
||||
uint64_t iova;
|
||||
uint64_t size;
|
||||
} VFIOUserDMAUnmap;
|
||||
|
||||
/*
|
||||
* VFIO_USER_DEVICE_GET_INFO
|
||||
* imported from struct vfio_device_info
|
||||
@ -175,4 +200,11 @@ typedef struct {
|
||||
char data[];
|
||||
} VFIOUserRegionRW;
|
||||
|
||||
/*imported from struct vfio_bitmap */
|
||||
typedef struct {
|
||||
uint64_t pgsize;
|
||||
uint64_t size;
|
||||
char data[];
|
||||
} VFIOUserBitmap;
|
||||
|
||||
#endif /* VFIO_USER_PROTOCOL_H */
|
||||
|
@ -27,7 +27,6 @@ static IOThread *vfio_user_iothread;
|
||||
static void vfio_user_shutdown(VFIOUserProxy *proxy);
|
||||
static VFIOUserMsg *vfio_user_getmsg(VFIOUserProxy *proxy, VFIOUserHdr *hdr,
|
||||
VFIOUserFDs *fds);
|
||||
static VFIOUserFDs *vfio_user_getfds(int numfds);
|
||||
static void vfio_user_recycle(VFIOUserProxy *proxy, VFIOUserMsg *msg);
|
||||
|
||||
static void vfio_user_recv(void *opaque);
|
||||
@ -132,7 +131,7 @@ static void vfio_user_recycle(VFIOUserProxy *proxy, VFIOUserMsg *msg)
|
||||
QTAILQ_INSERT_HEAD(&proxy->free, msg, next);
|
||||
}
|
||||
|
||||
static VFIOUserFDs *vfio_user_getfds(int numfds)
|
||||
VFIOUserFDs *vfio_user_getfds(int numfds)
|
||||
{
|
||||
VFIOUserFDs *fds = g_malloc0(sizeof(*fds) + (numfds * sizeof(int)));
|
||||
|
||||
@ -618,6 +617,43 @@ static bool vfio_user_send_queued(VFIOUserProxy *proxy, VFIOUserMsg *msg,
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* nowait send - vfio_wait_reqs() can wait for it later
|
||||
*
|
||||
* Returns false if we did not successfully receive a reply message, in which
|
||||
* case @errp will be populated.
|
||||
*
|
||||
* In either case, ownership of @hdr and @fds is taken, and the caller must
|
||||
* *not* free them itself.
|
||||
*/
|
||||
bool vfio_user_send_nowait(VFIOUserProxy *proxy, VFIOUserHdr *hdr,
|
||||
VFIOUserFDs *fds, int rsize, Error **errp)
|
||||
{
|
||||
VFIOUserMsg *msg;
|
||||
|
||||
QEMU_LOCK_GUARD(&proxy->lock);
|
||||
|
||||
msg = vfio_user_getmsg(proxy, hdr, fds);
|
||||
msg->id = hdr->id;
|
||||
msg->rsize = rsize ? rsize : hdr->size;
|
||||
msg->type = VFIO_MSG_NOWAIT;
|
||||
|
||||
if (hdr->flags & VFIO_USER_NO_REPLY) {
|
||||
error_setg_errno(errp, EINVAL, "%s on NO_REPLY message", __func__);
|
||||
vfio_user_recycle(proxy, msg);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!vfio_user_send_queued(proxy, msg, errp)) {
|
||||
vfio_user_recycle(proxy, msg);
|
||||
return false;
|
||||
}
|
||||
|
||||
proxy->last_nowait = msg;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns false if we did not successfully receive a reply message, in which
|
||||
* case @errp will be populated.
|
||||
@ -666,6 +702,50 @@ bool vfio_user_send_wait(VFIOUserProxy *proxy, VFIOUserHdr *hdr,
|
||||
return ok;
|
||||
}
|
||||
|
||||
void vfio_user_wait_reqs(VFIOUserProxy *proxy)
|
||||
{
|
||||
VFIOUserMsg *msg;
|
||||
|
||||
/*
|
||||
* Any DMA map/unmap requests sent in the middle
|
||||
* of a memory region transaction were sent nowait.
|
||||
* Wait for them here.
|
||||
*/
|
||||
qemu_mutex_lock(&proxy->lock);
|
||||
if (proxy->last_nowait != NULL) {
|
||||
/*
|
||||
* Change type to WAIT to wait for reply
|
||||
*/
|
||||
msg = proxy->last_nowait;
|
||||
msg->type = VFIO_MSG_WAIT;
|
||||
proxy->last_nowait = NULL;
|
||||
while (!msg->complete) {
|
||||
if (!qemu_cond_timedwait(&msg->cv, &proxy->lock, wait_time)) {
|
||||
VFIOUserMsgQ *list;
|
||||
|
||||
list = msg->pending ? &proxy->pending : &proxy->outgoing;
|
||||
QTAILQ_REMOVE(list, msg, next);
|
||||
error_printf("vfio_wait_reqs - timed out\n");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (msg->hdr->flags & VFIO_USER_ERROR) {
|
||||
error_printf("vfio_user_wait_reqs - error reply on async ");
|
||||
error_printf("request: command %x error %s\n", msg->hdr->command,
|
||||
strerror(msg->hdr->error_reply));
|
||||
}
|
||||
|
||||
/*
|
||||
* Change type back to NOWAIT to free
|
||||
*/
|
||||
msg->type = VFIO_MSG_NOWAIT;
|
||||
vfio_user_recycle(proxy, msg);
|
||||
}
|
||||
|
||||
qemu_mutex_unlock(&proxy->lock);
|
||||
}
|
||||
|
||||
static QLIST_HEAD(, VFIOUserProxy) vfio_user_sockets =
|
||||
QLIST_HEAD_INITIALIZER(vfio_user_sockets);
|
||||
|
||||
|
@ -70,6 +70,7 @@ typedef struct VFIOUserProxy {
|
||||
QemuCond close_cv;
|
||||
AioContext *ctx;
|
||||
QEMUBH *req_bh;
|
||||
bool async_ops;
|
||||
|
||||
/*
|
||||
* above only changed when BQL is held
|
||||
@ -99,9 +100,14 @@ void vfio_user_set_handler(VFIODevice *vbasedev,
|
||||
void *reqarg);
|
||||
bool vfio_user_validate_version(VFIOUserProxy *proxy, Error **errp);
|
||||
|
||||
VFIOUserFDs *vfio_user_getfds(int numfds);
|
||||
|
||||
void vfio_user_request_msg(VFIOUserHdr *hdr, uint16_t cmd,
|
||||
uint32_t size, uint32_t flags);
|
||||
void vfio_user_wait_reqs(VFIOUserProxy *proxy);
|
||||
bool vfio_user_send_wait(VFIOUserProxy *proxy, VFIOUserHdr *hdr,
|
||||
VFIOUserFDs *fds, int rsize, Error **errp);
|
||||
bool vfio_user_send_nowait(VFIOUserProxy *proxy, VFIOUserHdr *hdr,
|
||||
VFIOUserFDs *fds, int rsize, Error **errp);
|
||||
|
||||
#endif /* VFIO_USER_PROXY_H */
|
||||
|
@ -13,3 +13,7 @@ vfio_user_get_region_info(uint32_t index, uint32_t flags, uint64_t size) " index
|
||||
vfio_user_region_rw(uint32_t region, uint64_t off, uint32_t count) " region %d offset 0x%"PRIx64" count %d"
|
||||
vfio_user_get_irq_info(uint32_t index, uint32_t flags, uint32_t count) " index %d flags 0x%x count %d"
|
||||
vfio_user_set_irqs(uint32_t index, uint32_t start, uint32_t count, uint32_t flags) " index %d start %d count %d flags 0x%x"
|
||||
|
||||
# container.c
|
||||
vfio_user_dma_map(uint64_t iova, uint64_t size, uint64_t off, uint32_t flags, bool async_ops) " iova 0x%"PRIx64" size 0x%"PRIx64" off 0x%"PRIx64" flags 0x%x async_ops %d"
|
||||
vfio_user_dma_unmap(uint64_t iova, uint64_t size, uint32_t flags, bool async_ops) " iova 0x%"PRIx64" size 0x%"PRIx64" flags 0x%x async_ops %d"
|
||||
|
Reference in New Issue
Block a user