Files
qemu/hw/virtio/vhost-user-fs.c
Haoqian He bc85aae420 vhost-user: return failure if backend crash when live migration
Live migration should be terminated if the vhost-user backend crashes
before the migration completes.

Specifically, since the vhost device will be stopped when VM is stopped
before the end of the live migration, in current implementation if the
backend crashes, vhost-user device set_status() won't return failure,
live migration won't perceive the disconnection between QEMU and the
backend.

When the VM is migrated to the destination, the inflight IO will be
resubmitted, and if the IO was completed out of order before, it will
cause IO error.

To fix this issue:
1. Add the return value to set_status() for VirtioDeviceClass.
  a. For the vhost-user device, return failure when the backend crashes.
  b. For other virtio devices, always return 0.
2. Return failure if vhost_dev_stop() failed for vhost-user device.

If QEMU loses connection with the vhost-user backend, virtio set_status()
can return failure to the upper layer, migration_completion() can handle
the error, terminate the live migration, and restore the VM, so that
inflight IO can be completed normally.

Signed-off-by: Haoqian He <haoqian.he@smartx.com>
Message-Id: <20250416024729.3289157-4-haoqian.he@smartx.com>
Tested-by: Lei Yang <leiyang@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2025-05-14 05:39:15 -04:00

458 lines
13 KiB
C

/*
* Vhost-user filesystem virtio device
*
* Copyright 2018-2019 Red Hat, Inc.
*
* Authors:
* Stefan Hajnoczi <stefanha@redhat.com>
*
* This work is licensed under the terms of the GNU GPL, version 2 or
* (at your option) any later version. See the COPYING file in the
* top-level directory.
*/
#include "qemu/osdep.h"
#include <sys/ioctl.h>
#include "standard-headers/linux/virtio_fs.h"
#include "qapi/error.h"
#include "hw/qdev-properties.h"
#include "hw/qdev-properties-system.h"
#include "hw/virtio/virtio-bus.h"
#include "hw/virtio/virtio-access.h"
#include "qemu/error-report.h"
#include "hw/virtio/vhost.h"
#include "hw/virtio/vhost-user-fs.h"
#include "monitor/monitor.h"
#include "system/system.h"
static const int user_feature_bits[] = {
VIRTIO_F_VERSION_1,
VIRTIO_RING_F_INDIRECT_DESC,
VIRTIO_RING_F_EVENT_IDX,
VIRTIO_F_NOTIFY_ON_EMPTY,
VIRTIO_F_RING_PACKED,
VIRTIO_F_IOMMU_PLATFORM,
VIRTIO_F_RING_RESET,
VIRTIO_F_IN_ORDER,
VIRTIO_F_NOTIFICATION_DATA,
VHOST_INVALID_FEATURE_BIT
};
static void vuf_get_config(VirtIODevice *vdev, uint8_t *config)
{
VHostUserFS *fs = VHOST_USER_FS(vdev);
struct virtio_fs_config fscfg = {};
memcpy((char *)fscfg.tag, fs->conf.tag,
MIN(strlen(fs->conf.tag) + 1, sizeof(fscfg.tag)));
virtio_stl_p(vdev, &fscfg.num_request_queues, fs->conf.num_request_queues);
memcpy(config, &fscfg, sizeof(fscfg));
}
static void vuf_start(VirtIODevice *vdev)
{
VHostUserFS *fs = VHOST_USER_FS(vdev);
BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
int ret;
int i;
if (!k->set_guest_notifiers) {
error_report("binding does not support guest notifiers");
return;
}
ret = vhost_dev_enable_notifiers(&fs->vhost_dev, vdev);
if (ret < 0) {
error_report("Error enabling host notifiers: %d", -ret);
return;
}
ret = k->set_guest_notifiers(qbus->parent, fs->vhost_dev.nvqs, true);
if (ret < 0) {
error_report("Error binding guest notifier: %d", -ret);
goto err_host_notifiers;
}
fs->vhost_dev.acked_features = vdev->guest_features;
ret = vhost_dev_start(&fs->vhost_dev, vdev, true);
if (ret < 0) {
error_report("Error starting vhost: %d", -ret);
goto err_guest_notifiers;
}
/*
* guest_notifier_mask/pending not used yet, so just unmask
* everything here. virtio-pci will do the right thing by
* enabling/disabling irqfd.
*/
for (i = 0; i < fs->vhost_dev.nvqs; i++) {
vhost_virtqueue_mask(&fs->vhost_dev, vdev, i, false);
}
return;
err_guest_notifiers:
k->set_guest_notifiers(qbus->parent, fs->vhost_dev.nvqs, false);
err_host_notifiers:
vhost_dev_disable_notifiers(&fs->vhost_dev, vdev);
}
static int vuf_stop(VirtIODevice *vdev)
{
VHostUserFS *fs = VHOST_USER_FS(vdev);
BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
int ret;
if (!k->set_guest_notifiers) {
return 0;
}
ret = vhost_dev_stop(&fs->vhost_dev, vdev, true);
if (k->set_guest_notifiers(qbus->parent, fs->vhost_dev.nvqs, false) < 0) {
error_report("vhost guest notifier cleanup failed: %d", ret);
return -1;
}
vhost_dev_disable_notifiers(&fs->vhost_dev, vdev);
return ret;
}
static int vuf_set_status(VirtIODevice *vdev, uint8_t status)
{
VHostUserFS *fs = VHOST_USER_FS(vdev);
bool should_start = virtio_device_should_start(vdev, status);
if (vhost_dev_is_started(&fs->vhost_dev) == should_start) {
return 0;
}
if (should_start) {
vuf_start(vdev);
} else {
int ret;
ret = vuf_stop(vdev);
if (ret < 0) {
return ret;
}
}
return 0;
}
static uint64_t vuf_get_features(VirtIODevice *vdev,
uint64_t features,
Error **errp)
{
VHostUserFS *fs = VHOST_USER_FS(vdev);
return vhost_get_features(&fs->vhost_dev, user_feature_bits, features);
}
static void vuf_handle_output(VirtIODevice *vdev, VirtQueue *vq)
{
/*
* Not normally called; it's the daemon that handles the queue;
* however virtio's cleanup path can call this.
*/
}
static void vuf_guest_notifier_mask(VirtIODevice *vdev, int idx,
bool mask)
{
VHostUserFS *fs = VHOST_USER_FS(vdev);
/*
* Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1
* as the macro of configure interrupt's IDX, If this driver does not
* support, the function will return
*/
if (idx == VIRTIO_CONFIG_IRQ_IDX) {
return;
}
vhost_virtqueue_mask(&fs->vhost_dev, vdev, idx, mask);
}
static bool vuf_guest_notifier_pending(VirtIODevice *vdev, int idx)
{
VHostUserFS *fs = VHOST_USER_FS(vdev);
/*
* Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1
* as the macro of configure interrupt's IDX, If this driver does not
* support, the function will return
*/
if (idx == VIRTIO_CONFIG_IRQ_IDX) {
return false;
}
return vhost_virtqueue_pending(&fs->vhost_dev, idx);
}
static void vuf_device_realize(DeviceState *dev, Error **errp)
{
VirtIODevice *vdev = VIRTIO_DEVICE(dev);
VHostUserFS *fs = VHOST_USER_FS(dev);
unsigned int i;
size_t len;
int ret;
if (!fs->conf.chardev.chr) {
error_setg(errp, "missing chardev");
return;
}
if (!fs->conf.tag) {
error_setg(errp, "missing tag property");
return;
}
len = strlen(fs->conf.tag);
if (len == 0) {
error_setg(errp, "tag property cannot be empty");
return;
}
if (len > sizeof_field(struct virtio_fs_config, tag)) {
error_setg(errp, "tag property must be %zu bytes or less",
sizeof_field(struct virtio_fs_config, tag));
return;
}
if (fs->conf.num_request_queues == 0) {
error_setg(errp, "num-request-queues property must be larger than 0");
return;
}
if (!is_power_of_2(fs->conf.queue_size)) {
error_setg(errp, "queue-size property must be a power of 2");
return;
}
if (fs->conf.queue_size > VIRTQUEUE_MAX_SIZE) {
error_setg(errp, "queue-size property must be %u or smaller",
VIRTQUEUE_MAX_SIZE);
return;
}
if (!vhost_user_init(&fs->vhost_user, &fs->conf.chardev, errp)) {
return;
}
virtio_init(vdev, VIRTIO_ID_FS, sizeof(struct virtio_fs_config));
/* Hiprio queue */
fs->hiprio_vq = virtio_add_queue(vdev, fs->conf.queue_size, vuf_handle_output);
/* Request queues */
fs->req_vqs = g_new(VirtQueue *, fs->conf.num_request_queues);
for (i = 0; i < fs->conf.num_request_queues; i++) {
fs->req_vqs[i] = virtio_add_queue(vdev, fs->conf.queue_size, vuf_handle_output);
}
/* 1 high prio queue, plus the number configured */
fs->vhost_dev.nvqs = 1 + fs->conf.num_request_queues;
fs->vhost_dev.vqs = g_new0(struct vhost_virtqueue, fs->vhost_dev.nvqs);
ret = vhost_dev_init(&fs->vhost_dev, &fs->vhost_user,
VHOST_BACKEND_TYPE_USER, 0, errp);
if (ret < 0) {
goto err_virtio;
}
return;
err_virtio:
vhost_user_cleanup(&fs->vhost_user);
virtio_delete_queue(fs->hiprio_vq);
for (i = 0; i < fs->conf.num_request_queues; i++) {
virtio_delete_queue(fs->req_vqs[i]);
}
g_free(fs->req_vqs);
virtio_cleanup(vdev);
g_free(fs->vhost_dev.vqs);
}
static void vuf_device_unrealize(DeviceState *dev)
{
VirtIODevice *vdev = VIRTIO_DEVICE(dev);
VHostUserFS *fs = VHOST_USER_FS(dev);
struct vhost_virtqueue *vhost_vqs = fs->vhost_dev.vqs;
int i;
/* This will stop vhost backend if appropriate. */
vuf_set_status(vdev, 0);
vhost_dev_cleanup(&fs->vhost_dev);
vhost_user_cleanup(&fs->vhost_user);
virtio_delete_queue(fs->hiprio_vq);
for (i = 0; i < fs->conf.num_request_queues; i++) {
virtio_delete_queue(fs->req_vqs[i]);
}
g_free(fs->req_vqs);
virtio_cleanup(vdev);
g_free(vhost_vqs);
}
static struct vhost_dev *vuf_get_vhost(VirtIODevice *vdev)
{
VHostUserFS *fs = VHOST_USER_FS(vdev);
return &fs->vhost_dev;
}
/**
* Fetch the internal state from virtiofsd and save it to `f`.
*/
static int vuf_save_state(QEMUFile *f, void *pv, size_t size,
const VMStateField *field, JSONWriter *vmdesc)
{
VirtIODevice *vdev = pv;
VHostUserFS *fs = VHOST_USER_FS(vdev);
Error *local_error = NULL;
int ret;
ret = vhost_save_backend_state(&fs->vhost_dev, f, &local_error);
if (ret < 0) {
error_reportf_err(local_error,
"Error saving back-end state of %s device %s "
"(tag: \"%s\"): ",
vdev->name, vdev->parent_obj.canonical_path,
fs->conf.tag ?: "<none>");
return ret;
}
return 0;
}
/**
* Load virtiofsd's internal state from `f` and send it over to virtiofsd.
*/
static int vuf_load_state(QEMUFile *f, void *pv, size_t size,
const VMStateField *field)
{
VirtIODevice *vdev = pv;
VHostUserFS *fs = VHOST_USER_FS(vdev);
Error *local_error = NULL;
int ret;
ret = vhost_load_backend_state(&fs->vhost_dev, f, &local_error);
if (ret < 0) {
error_reportf_err(local_error,
"Error loading back-end state of %s device %s "
"(tag: \"%s\"): ",
vdev->name, vdev->parent_obj.canonical_path,
fs->conf.tag ?: "<none>");
return ret;
}
return 0;
}
static bool vuf_is_internal_migration(void *opaque)
{
/* TODO: Return false when an external migration is requested */
return true;
}
static int vuf_check_migration_support(void *opaque)
{
VirtIODevice *vdev = opaque;
VHostUserFS *fs = VHOST_USER_FS(vdev);
if (!vhost_supports_device_state(&fs->vhost_dev)) {
error_report("Back-end of %s device %s (tag: \"%s\") does not support "
"migration through qemu",
vdev->name, vdev->parent_obj.canonical_path,
fs->conf.tag ?: "<none>");
return -ENOTSUP;
}
return 0;
}
static const VMStateDescription vuf_backend_vmstate;
static const VMStateDescription vuf_vmstate = {
.name = "vhost-user-fs",
.version_id = 0,
.fields = (const VMStateField[]) {
VMSTATE_VIRTIO_DEVICE,
VMSTATE_END_OF_LIST()
},
.subsections = (const VMStateDescription * const []) {
&vuf_backend_vmstate,
NULL,
}
};
static const VMStateDescription vuf_backend_vmstate = {
.name = "vhost-user-fs-backend",
.version_id = 0,
.needed = vuf_is_internal_migration,
.pre_load = vuf_check_migration_support,
.pre_save = vuf_check_migration_support,
.fields = (const VMStateField[]) {
{
.name = "back-end",
.info = &(const VMStateInfo) {
.name = "virtio-fs back-end state",
.get = vuf_load_state,
.put = vuf_save_state,
},
},
VMSTATE_END_OF_LIST()
},
};
static const Property vuf_properties[] = {
DEFINE_PROP_CHR("chardev", VHostUserFS, conf.chardev),
DEFINE_PROP_STRING("tag", VHostUserFS, conf.tag),
DEFINE_PROP_UINT16("num-request-queues", VHostUserFS,
conf.num_request_queues, 1),
DEFINE_PROP_UINT16("queue-size", VHostUserFS, conf.queue_size, 128),
};
static void vuf_instance_init(Object *obj)
{
VHostUserFS *fs = VHOST_USER_FS(obj);
device_add_bootindex_property(obj, &fs->bootindex, "bootindex",
"/filesystem@0", DEVICE(obj));
}
static void vuf_class_init(ObjectClass *klass, const void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
device_class_set_props(dc, vuf_properties);
dc->vmsd = &vuf_vmstate;
set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
vdc->realize = vuf_device_realize;
vdc->unrealize = vuf_device_unrealize;
vdc->get_features = vuf_get_features;
vdc->get_config = vuf_get_config;
vdc->set_status = vuf_set_status;
vdc->guest_notifier_mask = vuf_guest_notifier_mask;
vdc->guest_notifier_pending = vuf_guest_notifier_pending;
vdc->get_vhost = vuf_get_vhost;
}
static const TypeInfo vuf_info = {
.name = TYPE_VHOST_USER_FS,
.parent = TYPE_VIRTIO_DEVICE,
.instance_size = sizeof(VHostUserFS),
.instance_init = vuf_instance_init,
.class_init = vuf_class_init,
};
static void vuf_register_types(void)
{
type_register_static(&vuf_info);
}
type_init(vuf_register_types)