/* * Copyright (C) 2009 Citrix Ltd. * Author Vincent Hanquez * Author Stefano Stabellini * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published * by the Free Software Foundation; version 2.1 only. with the special * exception on linking described in file LICENSE. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. */ #include "libxl_osdeps.h" /* must come before any other headers */ #include "libxl_internal.h" #define PCI_BDF "%04x:%02x:%02x.%01x" #define PCI_BDF_SHORT "%02x:%02x.%01x" #define PCI_BDF_VDEVFN "%04x:%02x:%02x.%01x@%02x" #define PCI_OPTIONS "msitranslate=%d,power_mgmt=%d" #define PCI_BDF_XSPATH "%04x-%02x-%02x-%01x" #define PCI_PT_QDEV_ID "pci-pt-%02x_%02x.%01x" static unsigned int pcidev_encode_bdf(libxl_device_pci *pcidev) { unsigned int value; value = pcidev->domain << 16; value |= (pcidev->bus & 0xff) << 8; value |= (pcidev->dev & 0x1f) << 3; value |= (pcidev->func & 0x7); return value; } static void pcidev_struct_fill(libxl_device_pci *pcidev, unsigned int domain, unsigned int bus, unsigned int dev, unsigned int func, unsigned int vdevfn) { pcidev->domain = domain; pcidev->bus = bus; pcidev->dev = dev; pcidev->func = func; pcidev->vdevfn = vdevfn; } static void libxl_create_pci_backend_device(libxl__gc *gc, flexarray_t *back, int num, const libxl_device_pci *pcidev) { flexarray_append(back, GCSPRINTF("key-%d", num)); flexarray_append(back, GCSPRINTF(PCI_BDF, pcidev->domain, pcidev->bus, pcidev->dev, pcidev->func)); flexarray_append(back, GCSPRINTF("dev-%d", num)); flexarray_append(back, GCSPRINTF(PCI_BDF, pcidev->domain, pcidev->bus, pcidev->dev, pcidev->func)); if (pcidev->vdevfn) flexarray_append_pair(back, GCSPRINTF("vdevfn-%d", num), GCSPRINTF("%x", pcidev->vdevfn)); flexarray_append(back, GCSPRINTF("opts-%d", num)); flexarray_append(back, GCSPRINTF("msitranslate=%d,power_mgmt=%d,permissive=%d", pcidev->msitranslate, pcidev->power_mgmt, pcidev->permissive)); flexarray_append_pair(back, GCSPRINTF("state-%d", num), GCSPRINTF("%d", XenbusStateInitialising)); } static void libxl__device_from_pcidev(libxl__gc *gc, uint32_t domid, const libxl_device_pci *pcidev, libxl__device *device) { device->backend_devid = 0; device->backend_domid = 0; device->backend_kind = LIBXL__DEVICE_KIND_PCI; device->devid = 0; device->domid = domid; device->kind = LIBXL__DEVICE_KIND_PCI; } static int libxl__create_pci_backend(libxl__gc *gc, uint32_t domid, const libxl_device_pci *pcidev, int num) { flexarray_t *front = NULL; flexarray_t *back = NULL; libxl__device device; int i; front = flexarray_make(gc, 16, 1); back = flexarray_make(gc, 16, 1); LOGD(DEBUG, domid, "Creating pci backend"); /* add pci device */ libxl__device_from_pcidev(gc, domid, pcidev, &device); flexarray_append_pair(back, "frontend-id", GCSPRINTF("%d", domid)); flexarray_append_pair(back, "online", "1"); flexarray_append_pair(back, "state", GCSPRINTF("%d", XenbusStateInitialising)); flexarray_append_pair(back, "domain", libxl__domid_to_name(gc, domid)); for (i = 0; i < num; i++, pcidev++) libxl_create_pci_backend_device(gc, back, i, pcidev); flexarray_append_pair(back, "num_devs", GCSPRINTF("%d", num)); flexarray_append_pair(front, "backend-id", GCSPRINTF("%d", 0)); flexarray_append_pair(front, "state", GCSPRINTF("%d", XenbusStateInitialising)); return libxl__device_generic_add(gc, XBT_NULL, &device, libxl__xs_kvs_of_flexarray(gc, back), libxl__xs_kvs_of_flexarray(gc, front), NULL); } static int libxl__device_pci_add_xenstore(libxl__gc *gc, uint32_t domid, const libxl_device_pci *pcidev, bool starting) { flexarray_t *back; char *num_devs, *be_path; int num = 0; xs_transaction_t t = XBT_NULL; int rc; libxl_domain_config d_config; libxl__flock *lock = NULL; bool is_stubdomain = libxl_is_stubdom(CTX, domid, NULL); /* Stubdomain doesn't have own config. */ if (!is_stubdomain) libxl_domain_config_init(&d_config); be_path = libxl__domain_device_backend_path(gc, 0, domid, 0, LIBXL__DEVICE_KIND_PCI); num_devs = libxl__xs_read(gc, XBT_NULL, GCSPRINTF("%s/num_devs", be_path)); if (!num_devs) return libxl__create_pci_backend(gc, domid, pcidev, 1); libxl_domain_type domtype = libxl__domain_type(gc, domid); if (domtype == LIBXL_DOMAIN_TYPE_INVALID) return ERROR_FAIL; if (!starting && domtype == LIBXL_DOMAIN_TYPE_PV) { if (libxl__wait_for_backend(gc, be_path, GCSPRINTF("%d", XenbusStateConnected)) < 0) return ERROR_FAIL; } back = flexarray_make(gc, 16, 1); LOGD(DEBUG, domid, "Adding new pci device to xenstore"); num = atoi(num_devs); libxl_create_pci_backend_device(gc, back, num, pcidev); flexarray_append_pair(back, "num_devs", GCSPRINTF("%d", num + 1)); if (!starting) flexarray_append_pair(back, "state", GCSPRINTF("%d", XenbusStateReconfiguring)); /* * Stubdomin config is derived from its target domain, it doesn't have * its own file. */ if (!is_stubdomain) { lock = libxl__lock_domain_userdata(gc, domid); if (!lock) { rc = ERROR_LOCK_FAIL; goto out; } rc = libxl__get_domain_configuration(gc, domid, &d_config); if (rc) goto out; device_add_domain_config(gc, &d_config, &libxl__pcidev_devtype, pcidev); rc = libxl__dm_check_start(gc, &d_config, domid); if (rc) goto out; } for (;;) { rc = libxl__xs_transaction_start(gc, &t); if (rc) goto out; if (lock) { rc = libxl__set_domain_configuration(gc, domid, &d_config); if (rc) goto out; } libxl__xs_writev(gc, t, be_path, libxl__xs_kvs_of_flexarray(gc, back)); rc = libxl__xs_transaction_commit(gc, &t); if (!rc) break; if (rc < 0) goto out; } out: libxl__xs_transaction_abort(gc, &t); if (lock) libxl__unlock_file(lock); if (!is_stubdomain) libxl_domain_config_dispose(&d_config); return rc; } static int libxl__device_pci_remove_xenstore(libxl__gc *gc, uint32_t domid, libxl_device_pci *pcidev) { libxl_ctx *ctx = libxl__gc_owner(gc); char *be_path, *num_devs_path, *num_devs, *xsdev, *tmp, *tmppath; int num, i, j; xs_transaction_t t; be_path = libxl__domain_device_backend_path(gc, 0, domid, 0, LIBXL__DEVICE_KIND_PCI); num_devs_path = GCSPRINTF("%s/num_devs", be_path); num_devs = libxl__xs_read(gc, XBT_NULL, num_devs_path); if (!num_devs) return ERROR_INVAL; num = atoi(num_devs); libxl_domain_type domtype = libxl__domain_type(gc, domid); if (domtype == LIBXL_DOMAIN_TYPE_INVALID) return ERROR_FAIL; if (domtype == LIBXL_DOMAIN_TYPE_PV) { if (libxl__wait_for_backend(gc, be_path, GCSPRINTF("%d", XenbusStateConnected)) < 0) { LOGD(DEBUG, domid, "pci backend at %s is not ready", be_path); return ERROR_FAIL; } } for (i = 0; i < num; i++) { unsigned int domain = 0, bus = 0, dev = 0, func = 0; xsdev = libxl__xs_read(gc, XBT_NULL, GCSPRINTF("%s/dev-%d", be_path, i)); sscanf(xsdev, PCI_BDF, &domain, &bus, &dev, &func); if (domain == pcidev->domain && bus == pcidev->bus && pcidev->dev == dev && pcidev->func == func) { break; } } if (i == num) { LOGD(ERROR, domid, "Couldn't find the device on xenstore"); return ERROR_INVAL; } retry_transaction: t = xs_transaction_start(ctx->xsh); xs_write(ctx->xsh, t, GCSPRINTF("%s/state-%d", be_path, i), GCSPRINTF("%d", XenbusStateClosing), 1); xs_write(ctx->xsh, t, GCSPRINTF("%s/state", be_path), GCSPRINTF("%d", XenbusStateReconfiguring), 1); if (!xs_transaction_end(ctx->xsh, t, 0)) if (errno == EAGAIN) goto retry_transaction; if (domtype == LIBXL_DOMAIN_TYPE_PV) { if (libxl__wait_for_backend(gc, be_path, GCSPRINTF("%d", XenbusStateConnected)) < 0) { LOGD(DEBUG, domid, "pci backend at %s is not ready", be_path); return ERROR_FAIL; } } retry_transaction2: t = xs_transaction_start(ctx->xsh); xs_rm(ctx->xsh, t, GCSPRINTF("%s/state-%d", be_path, i)); xs_rm(ctx->xsh, t, GCSPRINTF("%s/key-%d", be_path, i)); xs_rm(ctx->xsh, t, GCSPRINTF("%s/dev-%d", be_path, i)); xs_rm(ctx->xsh, t, GCSPRINTF("%s/vdev-%d", be_path, i)); xs_rm(ctx->xsh, t, GCSPRINTF("%s/opts-%d", be_path, i)); xs_rm(ctx->xsh, t, GCSPRINTF("%s/vdevfn-%d", be_path, i)); libxl__xs_printf(gc, t, num_devs_path, "%d", num - 1); for (j = i + 1; j < num; j++) { tmppath = GCSPRINTF("%s/state-%d", be_path, j); tmp = libxl__xs_read(gc, t, tmppath); xs_write(ctx->xsh, t, GCSPRINTF("%s/state-%d", be_path, j - 1), tmp, strlen(tmp)); xs_rm(ctx->xsh, t, tmppath); tmppath = GCSPRINTF("%s/dev-%d", be_path, j); tmp = libxl__xs_read(gc, t, tmppath); xs_write(ctx->xsh, t, GCSPRINTF("%s/dev-%d", be_path, j - 1), tmp, strlen(tmp)); xs_rm(ctx->xsh, t, tmppath); tmppath = GCSPRINTF("%s/key-%d", be_path, j); tmp = libxl__xs_read(gc, t, tmppath); xs_write(ctx->xsh, t, GCSPRINTF("%s/key-%d", be_path, j - 1), tmp, strlen(tmp)); xs_rm(ctx->xsh, t, tmppath); tmppath = GCSPRINTF("%s/vdev-%d", be_path, j); tmp = libxl__xs_read(gc, t, tmppath); if (tmp) { xs_write(ctx->xsh, t, GCSPRINTF("%s/vdev-%d", be_path, j - 1), tmp, strlen(tmp)); xs_rm(ctx->xsh, t, tmppath); } tmppath = GCSPRINTF("%s/opts-%d", be_path, j); tmp = libxl__xs_read(gc, t, tmppath); if (tmp) { xs_write(ctx->xsh, t, GCSPRINTF("%s/opts-%d", be_path, j - 1), tmp, strlen(tmp)); xs_rm(ctx->xsh, t, tmppath); } tmppath = GCSPRINTF("%s/vdevfn-%d", be_path, j); tmp = libxl__xs_read(gc, t, tmppath); if (tmp) { xs_write(ctx->xsh, t, GCSPRINTF("%s/vdevfn-%d", be_path, j - 1), tmp, strlen(tmp)); xs_rm(ctx->xsh, t, tmppath); } } if (!xs_transaction_end(ctx->xsh, t, 0)) if (errno == EAGAIN) goto retry_transaction2; if (num == 1) { libxl__device dev; if (libxl__parse_backend_path(gc, be_path, &dev) != 0) return ERROR_FAIL; dev.domid = domid; dev.kind = LIBXL__DEVICE_KIND_PCI; dev.devid = 0; libxl__device_destroy(gc, &dev); return 0; } return 0; } static int get_all_assigned_devices(libxl__gc *gc, libxl_device_pci **list, int *num) { char **domlist; unsigned int nd = 0, i; *list = NULL; *num = 0; domlist = libxl__xs_directory(gc, XBT_NULL, "/local/domain", &nd); for(i = 0; i < nd; i++) { char *path, *num_devs; path = GCSPRINTF("/local/domain/0/backend/%s/%s/0/num_devs", libxl__device_kind_to_string(LIBXL__DEVICE_KIND_PCI), domlist[i]); num_devs = libxl__xs_read(gc, XBT_NULL, path); if ( num_devs ) { int ndev = atoi(num_devs), j; char *devpath, *bdf; for(j = 0; j < ndev; j++) { devpath = GCSPRINTF("/local/domain/0/backend/%s/%s/0/dev-%u", libxl__device_kind_to_string(LIBXL__DEVICE_KIND_PCI), domlist[i], j); bdf = libxl__xs_read(gc, XBT_NULL, devpath); if ( bdf ) { unsigned dom, bus, dev, func; if ( sscanf(bdf, PCI_BDF, &dom, &bus, &dev, &func) != 4 ) continue; *list = realloc(*list, sizeof(libxl_device_pci) * ((*num) + 1)); if (*list == NULL) return ERROR_NOMEM; pcidev_struct_fill(*list + *num, dom, bus, dev, func, 0); (*num)++; } } } } libxl__ptr_add(gc, *list); return 0; } static int is_pcidev_in_array(libxl_device_pci *assigned, int num_assigned, int dom, int bus, int dev, int func) { int i; for(i = 0; i < num_assigned; i++) { if ( assigned[i].domain != dom ) continue; if ( assigned[i].bus != bus ) continue; if ( assigned[i].dev != dev ) continue; if ( assigned[i].func != func ) continue; return 1; } return 0; } /* Write the standard BDF into the sysfs path given by sysfs_path. */ static int sysfs_write_bdf(libxl__gc *gc, const char * sysfs_path, libxl_device_pci *pcidev) { int rc, fd; char *buf; fd = open(sysfs_path, O_WRONLY); if (fd < 0) { LOGE(ERROR, "Couldn't open %s", sysfs_path); return ERROR_FAIL; } buf = GCSPRINTF(PCI_BDF, pcidev->domain, pcidev->bus, pcidev->dev, pcidev->func); rc = write(fd, buf, strlen(buf)); /* Annoying to have two if's, but we need the errno */ if (rc < 0) LOGE(ERROR, "write to %s returned %d", sysfs_path, rc); close(fd); if (rc < 0) return ERROR_FAIL; return 0; } libxl_device_pci *libxl_device_pci_assignable_list(libxl_ctx *ctx, int *num) { GC_INIT(ctx); libxl_device_pci *pcidevs = NULL, *new, *assigned; struct dirent *de; DIR *dir; int r, num_assigned; *num = 0; r = get_all_assigned_devices(gc, &assigned, &num_assigned); if (r) goto out; dir = opendir(SYSFS_PCIBACK_DRIVER); if (NULL == dir) { if (errno == ENOENT) { LOG(ERROR, "Looks like pciback driver not loaded"); } else { LOGE(ERROR, "Couldn't open %s", SYSFS_PCIBACK_DRIVER); } goto out; } while((de = readdir(dir))) { unsigned dom, bus, dev, func; if (sscanf(de->d_name, PCI_BDF, &dom, &bus, &dev, &func) != 4) continue; if (is_pcidev_in_array(assigned, num_assigned, dom, bus, dev, func)) continue; new = realloc(pcidevs, ((*num) + 1) * sizeof(*new)); if (NULL == new) continue; pcidevs = new; new = pcidevs + *num; memset(new, 0, sizeof(*new)); pcidev_struct_fill(new, dom, bus, dev, func, 0); (*num)++; } closedir(dir); out: GC_FREE; return pcidevs; } /* Unbind device from its current driver, if any. If driver_path is non-NULL, * store the path to the original driver in it. */ static int sysfs_dev_unbind(libxl__gc *gc, libxl_device_pci *pcidev, char **driver_path) { char * spath, *dp = NULL; struct stat st; spath = GCSPRINTF(SYSFS_PCI_DEV"/"PCI_BDF"/driver", pcidev->domain, pcidev->bus, pcidev->dev, pcidev->func); if ( !lstat(spath, &st) ) { /* Find the canonical path to the driver. */ dp = libxl__zalloc(gc, PATH_MAX); dp = realpath(spath, dp); if ( !dp ) { LOGE(ERROR, "realpath() failed"); return -1; } LOG(DEBUG, "Driver re-plug path: %s", dp); /* Unbind from the old driver */ spath = GCSPRINTF("%s/unbind", dp); if ( sysfs_write_bdf(gc, spath, pcidev) < 0 ) { LOGE(ERROR, "Couldn't unbind device"); return -1; } } if ( driver_path ) *driver_path = dp; return 0; } static uint16_t sysfs_dev_get_vendor(libxl__gc *gc, libxl_device_pci *pcidev) { char *pci_device_vendor_path = GCSPRINTF(SYSFS_PCI_DEV"/"PCI_BDF"/vendor", pcidev->domain, pcidev->bus, pcidev->dev, pcidev->func); uint16_t read_items; uint16_t pci_device_vendor; FILE *f = fopen(pci_device_vendor_path, "r"); if (!f) { LOGE(ERROR, "pci device "PCI_BDF" does not have vendor attribute", pcidev->domain, pcidev->bus, pcidev->dev, pcidev->func); return 0xffff; } read_items = fscanf(f, "0x%hx\n", &pci_device_vendor); fclose(f); if (read_items != 1) { LOGE(ERROR, "cannot read vendor of pci device "PCI_BDF, pcidev->domain, pcidev->bus, pcidev->dev, pcidev->func); return 0xffff; } return pci_device_vendor; } static uint16_t sysfs_dev_get_device(libxl__gc *gc, libxl_device_pci *pcidev) { char *pci_device_device_path = GCSPRINTF(SYSFS_PCI_DEV"/"PCI_BDF"/device", pcidev->domain, pcidev->bus, pcidev->dev, pcidev->func); uint16_t read_items; uint16_t pci_device_device; FILE *f = fopen(pci_device_device_path, "r"); if (!f) { LOGE(ERROR, "pci device "PCI_BDF" does not have device attribute", pcidev->domain, pcidev->bus, pcidev->dev, pcidev->func); return 0xffff; } read_items = fscanf(f, "0x%hx\n", &pci_device_device); fclose(f); if (read_items != 1) { LOGE(ERROR, "cannot read device of pci device "PCI_BDF, pcidev->domain, pcidev->bus, pcidev->dev, pcidev->func); return 0xffff; } return pci_device_device; } static int sysfs_dev_get_class(libxl__gc *gc, libxl_device_pci *pcidev, unsigned long *class) { char *pci_device_class_path = GCSPRINTF(SYSFS_PCI_DEV"/"PCI_BDF"/class", pcidev->domain, pcidev->bus, pcidev->dev, pcidev->func); int read_items, ret = 0; FILE *f = fopen(pci_device_class_path, "r"); if (!f) { LOGE(ERROR, "pci device "PCI_BDF" does not have class attribute", pcidev->domain, pcidev->bus, pcidev->dev, pcidev->func); ret = ERROR_FAIL; goto out; } read_items = fscanf(f, "0x%lx\n", class); fclose(f); if (read_items != 1) { LOGE(ERROR, "cannot read class of pci device "PCI_BDF, pcidev->domain, pcidev->bus, pcidev->dev, pcidev->func); ret = ERROR_FAIL; } out: return ret; } /* * Some devices may need some ways to work well. Here like IGD, * we have to pass a specific option to qemu. */ bool libxl__is_igd_vga_passthru(libxl__gc *gc, const libxl_domain_config *d_config) { unsigned int i; uint16_t pt_vendor, pt_device; unsigned long class; for (i = 0 ; i < d_config->num_pcidevs ; i++) { libxl_device_pci *pcidev = &d_config->pcidevs[i]; pt_vendor = sysfs_dev_get_vendor(gc, pcidev); pt_device = sysfs_dev_get_device(gc, pcidev); if (pt_vendor == 0xffff || pt_device == 0xffff || pt_vendor != 0x8086) continue; if (sysfs_dev_get_class(gc, pcidev, &class)) continue; if (class == 0x030000) return true; } return false; } /* * A brief comment about slots. I don't know what slots are for; however, * I have by experimentation determined: * - Before a device can be bound to pciback, its BDF must first be listed * in pciback/slots * - The way to get the BDF listed there is to write BDF to * pciback/new_slot * - Writing the same BDF to pciback/new_slot is not idempotent; it results * in two entries of the BDF in pciback/slots * It's not clear whether having two entries in pciback/slots is a problem * or not. Just to be safe, this code does the conservative thing, and * first checks to see if there is a slot, adding one only if one does not * already exist. */ /* Scan through /sys/.../pciback/slots looking for pcidev's BDF */ static int pciback_dev_has_slot(libxl__gc *gc, libxl_device_pci *pcidev) { FILE *f; int rc = 0; unsigned dom, bus, dev, func; f = fopen(SYSFS_PCIBACK_DRIVER"/slots", "r"); if (f == NULL) { LOGE(ERROR, "Couldn't open %s", SYSFS_PCIBACK_DRIVER"/slots"); return ERROR_FAIL; } while(fscanf(f, "%x:%x:%x.%d\n", &dom, &bus, &dev, &func)==4) { if(dom == pcidev->domain && bus == pcidev->bus && dev == pcidev->dev && func == pcidev->func) { rc = 1; goto out; } } out: fclose(f); return rc; } static int pciback_dev_is_assigned(libxl__gc *gc, libxl_device_pci *pcidev) { char * spath; int rc; struct stat st; if ( access(SYSFS_PCIBACK_DRIVER, F_OK) < 0 ) { if ( errno == ENOENT ) { LOG(ERROR, "Looks like pciback driver is not loaded"); } else { LOGE(ERROR, "Can't access "SYSFS_PCIBACK_DRIVER); } return -1; } spath = GCSPRINTF(SYSFS_PCIBACK_DRIVER"/"PCI_BDF, pcidev->domain, pcidev->bus, pcidev->dev, pcidev->func); rc = lstat(spath, &st); if( rc == 0 ) return 1; if ( rc < 0 && errno == ENOENT ) return 0; LOGE(ERROR, "Accessing %s", spath); return -1; } static int pciback_dev_assign(libxl__gc *gc, libxl_device_pci *pcidev) { int rc; if ( (rc=pciback_dev_has_slot(gc, pcidev)) < 0 ) { LOGE(ERROR, "Error checking for pciback slot"); return ERROR_FAIL; } else if (rc == 0) { if ( sysfs_write_bdf(gc, SYSFS_PCIBACK_DRIVER"/new_slot", pcidev) < 0 ) { LOGE(ERROR, "Couldn't bind device to pciback!"); return ERROR_FAIL; } } if ( sysfs_write_bdf(gc, SYSFS_PCIBACK_DRIVER"/bind", pcidev) < 0 ) { LOGE(ERROR, "Couldn't bind device to pciback!"); return ERROR_FAIL; } return 0; } static int pciback_dev_unassign(libxl__gc *gc, libxl_device_pci *pcidev) { /* Remove from pciback */ if ( sysfs_dev_unbind(gc, pcidev, NULL) < 0 ) { LOG(ERROR, "Couldn't unbind device!"); return ERROR_FAIL; } /* Remove slot if necessary */ if ( pciback_dev_has_slot(gc, pcidev) > 0 ) { if ( sysfs_write_bdf(gc, SYSFS_PCIBACK_DRIVER"/remove_slot", pcidev) < 0 ) { LOGE(ERROR, "Couldn't remove pciback slot"); return ERROR_FAIL; } } return 0; } #define PCIBACK_INFO_PATH "/libxl/pciback" static void pci_assignable_driver_path_write(libxl__gc *gc, libxl_device_pci *pcidev, char *driver_path) { char *path; path = GCSPRINTF(PCIBACK_INFO_PATH"/"PCI_BDF_XSPATH"/driver_path", pcidev->domain, pcidev->bus, pcidev->dev, pcidev->func); if ( libxl__xs_printf(gc, XBT_NULL, path, "%s", driver_path) < 0 ) { LOGE(WARN, "Write of %s to node %s failed.", driver_path, path); } } static char * pci_assignable_driver_path_read(libxl__gc *gc, libxl_device_pci *pcidev) { return libxl__xs_read(gc, XBT_NULL, GCSPRINTF( PCIBACK_INFO_PATH "/" PCI_BDF_XSPATH "/driver_path", pcidev->domain, pcidev->bus, pcidev->dev, pcidev->func)); } static void pci_assignable_driver_path_remove(libxl__gc *gc, libxl_device_pci *pcidev) { libxl_ctx *ctx = libxl__gc_owner(gc); /* Remove the xenstore entry */ xs_rm(ctx->xsh, XBT_NULL, GCSPRINTF(PCIBACK_INFO_PATH "/" PCI_BDF_XSPATH, pcidev->domain, pcidev->bus, pcidev->dev, pcidev->func) ); } static int libxl__device_pci_assignable_add(libxl__gc *gc, libxl_device_pci *pcidev, int rebind) { libxl_ctx *ctx = libxl__gc_owner(gc); unsigned dom, bus, dev, func; char *spath, *driver_path = NULL; int rc; struct stat st; /* Local copy for convenience */ dom = pcidev->domain; bus = pcidev->bus; dev = pcidev->dev; func = pcidev->func; /* See if the device exists */ spath = GCSPRINTF(SYSFS_PCI_DEV"/"PCI_BDF, dom, bus, dev, func); if ( lstat(spath, &st) ) { LOGE(ERROR, "Couldn't lstat %s", spath); return ERROR_FAIL; } /* Check to see if it's already assigned to pciback */ rc = pciback_dev_is_assigned(gc, pcidev); if ( rc < 0 ) { return ERROR_FAIL; } if ( rc ) { LOG(WARN, PCI_BDF" already assigned to pciback", dom, bus, dev, func); goto quarantine; } /* Check to see if there's already a driver that we need to unbind from */ if ( sysfs_dev_unbind(gc, pcidev, &driver_path ) ) { LOG(ERROR, "Couldn't unbind "PCI_BDF" from driver", dom, bus, dev, func); return ERROR_FAIL; } /* Store driver_path for rebinding to dom0 */ if ( rebind ) { if ( driver_path ) { pci_assignable_driver_path_write(gc, pcidev, driver_path); } else if ( (driver_path = pci_assignable_driver_path_read(gc, pcidev)) != NULL ) { LOG(INFO, PCI_BDF" not bound to a driver, will be rebound to %s", dom, bus, dev, func, driver_path); } else { LOG(WARN, PCI_BDF" not bound to a driver, will not be rebound.", dom, bus, dev, func); } } else { pci_assignable_driver_path_remove(gc, pcidev); } if ( pciback_dev_assign(gc, pcidev) ) { LOG(ERROR, "Couldn't bind device to pciback!"); return ERROR_FAIL; } quarantine: /* * DOMID_IO is just a sentinel domain, without any actual mappings, * so always pass XEN_DOMCTL_DEV_RDM_RELAXED to avoid assignment being * unnecessarily denied. */ rc = xc_assign_device(ctx->xch, DOMID_IO, pcidev_encode_bdf(pcidev), XEN_DOMCTL_DEV_RDM_RELAXED); if ( rc < 0 ) { LOG(ERROR, "failed to quarantine "PCI_BDF, dom, bus, dev, func); return ERROR_FAIL; } return 0; } static int libxl__device_pci_assignable_remove(libxl__gc *gc, libxl_device_pci *pcidev, int rebind) { libxl_ctx *ctx = libxl__gc_owner(gc); int rc; char *driver_path; /* De-quarantine */ rc = xc_deassign_device(ctx->xch, DOMID_IO, pcidev_encode_bdf(pcidev)); if ( rc < 0 ) { LOG(ERROR, "failed to de-quarantine "PCI_BDF, pcidev->domain, pcidev->bus, pcidev->dev, pcidev->func); return ERROR_FAIL; } /* Unbind from pciback */ if ( (rc=pciback_dev_is_assigned(gc, pcidev)) < 0 ) { return ERROR_FAIL; } else if ( rc ) { pciback_dev_unassign(gc, pcidev); } else { LOG(WARN, "Not bound to pciback"); } /* Rebind if necessary */ driver_path = pci_assignable_driver_path_read(gc, pcidev); if ( driver_path ) { if ( rebind ) { LOG(INFO, "Rebinding to driver at %s", driver_path); if ( sysfs_write_bdf(gc, GCSPRINTF("%s/bind", driver_path), pcidev) < 0 ) { LOGE(ERROR, "Couldn't bind device to %s", driver_path); return -1; } pci_assignable_driver_path_remove(gc, pcidev); } } else { if ( rebind ) { LOG(WARN, "Couldn't find path for original driver; not rebinding"); } } return 0; } int libxl_device_pci_assignable_add(libxl_ctx *ctx, libxl_device_pci *pcidev, int rebind) { GC_INIT(ctx); int rc; rc = libxl__device_pci_assignable_add(gc, pcidev, rebind); GC_FREE; return rc; } int libxl_device_pci_assignable_remove(libxl_ctx *ctx, libxl_device_pci *pcidev, int rebind) { GC_INIT(ctx); int rc; rc = libxl__device_pci_assignable_remove(gc, pcidev, rebind); GC_FREE; return rc; } /* * This function checks that all functions of a device are bound to pciback * driver. It also initialises a bit-mask of which function numbers are present * on that device. */ static int pci_multifunction_check(libxl__gc *gc, libxl_device_pci *pcidev, unsigned int *func_mask) { struct dirent *de; DIR *dir; *func_mask = 0; dir = opendir(SYSFS_PCI_DEV); if ( NULL == dir ) { LOGE(ERROR, "Couldn't open %s", SYSFS_PCI_DEV); return -1; } while( (de = readdir(dir)) ) { unsigned dom, bus, dev, func; struct stat st; char *path; if ( sscanf(de->d_name, PCI_BDF, &dom, &bus, &dev, &func) != 4 ) continue; if ( pcidev->domain != dom ) continue; if ( pcidev->bus != bus ) continue; if ( pcidev->dev != dev ) continue; path = GCSPRINTF("%s/" PCI_BDF, SYSFS_PCIBACK_DRIVER, dom, bus, dev, func); if ( lstat(path, &st) ) { if ( errno == ENOENT ) LOG(ERROR, PCI_BDF " is not assigned to pciback driver", dom, bus, dev, func); else LOGE(ERROR, "Couldn't lstat %s", path); closedir(dir); return -1; } (*func_mask) |= (1 << func); } closedir(dir); return 0; } static int pci_ins_check(libxl__gc *gc, uint32_t domid, const char *state, void *priv) { char *orig_state = priv; if ( !strcmp(state, "pci-insert-failed") ) return -1; if ( !strcmp(state, "pci-inserted") ) return 0; if ( !strcmp(state, orig_state) ) return 1; return 1; } static int qemu_pci_add_xenstore(libxl__gc *gc, uint32_t domid, libxl_device_pci *pcidev) { libxl_ctx *ctx = libxl__gc_owner(gc); int rc = 0; char *path; char *state, *vdevfn; uint32_t dm_domid; dm_domid = libxl_get_stubdom_id(CTX, domid); path = DEVICE_MODEL_XS_PATH(gc, dm_domid, domid, "/state"); state = libxl__xs_read(gc, XBT_NULL, path); path = DEVICE_MODEL_XS_PATH(gc, dm_domid, domid, "/parameter"); if (pcidev->vdevfn) { libxl__xs_printf(gc, XBT_NULL, path, PCI_BDF_VDEVFN","PCI_OPTIONS, pcidev->domain, pcidev->bus, pcidev->dev, pcidev->func, pcidev->vdevfn, pcidev->msitranslate, pcidev->power_mgmt); } else { libxl__xs_printf(gc, XBT_NULL, path, PCI_BDF","PCI_OPTIONS, pcidev->domain, pcidev->bus, pcidev->dev, pcidev->func, pcidev->msitranslate, pcidev->power_mgmt); } libxl__qemu_traditional_cmd(gc, domid, "pci-ins"); rc = libxl__wait_for_device_model_deprecated(gc, domid, NULL, NULL, pci_ins_check, state); path = DEVICE_MODEL_XS_PATH(gc, dm_domid, domid, "/parameter"); vdevfn = libxl__xs_read(gc, XBT_NULL, path); path = DEVICE_MODEL_XS_PATH(gc, dm_domid, domid, "/state"); if ( rc < 0 ) LOGD(ERROR, domid, "qemu refused to add device: %s", vdevfn); else if ( sscanf(vdevfn, "0x%x", &pcidev->vdevfn) != 1 ) { LOGD(ERROR, domid, "wrong format for the vdevfn: '%s'", vdevfn); rc = -1; } xs_write(ctx->xsh, XBT_NULL, path, state, strlen(state)); return rc; } static int check_qemu_running(libxl__gc *gc, libxl_domid domid, libxl__xswait_state *xswa, int rc, const char *state) { if (rc) { if (rc == ERROR_TIMEDOUT) { LOGD(ERROR, domid, "%s not ready", xswa->what); } goto out; } if (!state || strcmp(state, "running")) return ERROR_NOT_READY; out: libxl__xswait_stop(gc, xswa); return rc; } typedef struct pci_add_state { /* filled by user of do_pci_add */ libxl__ao_device *aodev; libxl_domid domid; bool starting; void (*callback)(libxl__egc *, struct pci_add_state *, int rc); /* private to device_pci_add_stubdom_wait */ libxl__ev_devstate pciback_ds; /* private to do_pci_add */ libxl__xswait_state xswait; libxl__ev_qmp qmp; libxl__ev_time timeout; libxl_device_pci *pcidev; int pci_domid; } pci_add_state; static void pci_add_qemu_trad_watch_state_cb(libxl__egc *egc, libxl__xswait_state *xswa, int rc, const char *state); static void pci_add_qmp_device_add(libxl__egc *, pci_add_state *); static void pci_add_qmp_device_add_cb(libxl__egc *, libxl__ev_qmp *, const libxl__json_object *, int rc); static void pci_add_qmp_query_pci_cb(libxl__egc *, libxl__ev_qmp *, const libxl__json_object *, int rc); static void pci_add_timeout(libxl__egc *egc, libxl__ev_time *ev, const struct timeval *requested_abs, int rc); static void pci_add_dm_done(libxl__egc *, pci_add_state *, int rc); static void do_pci_add(libxl__egc *egc, libxl_domid domid, libxl_device_pci *pcidev, pci_add_state *pas) { STATE_AO_GC(pas->aodev->ao); libxl_domain_type type = libxl__domain_type(gc, domid); int rc; /* init pci_add_state */ libxl__xswait_init(&pas->xswait); libxl__ev_qmp_init(&pas->qmp); pas->pcidev = pcidev; pas->pci_domid = domid; libxl__ev_time_init(&pas->timeout); if (type == LIBXL_DOMAIN_TYPE_INVALID) { rc = ERROR_FAIL; goto out; } if (type == LIBXL_DOMAIN_TYPE_HVM) { switch (libxl__device_model_version_running(gc, domid)) { case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN_TRADITIONAL: pas->xswait.ao = ao; pas->xswait.what = "Device Model"; pas->xswait.path = DEVICE_MODEL_XS_PATH(gc, libxl_get_stubdom_id(CTX, domid), domid, "/state"); pas->xswait.timeout_ms = LIBXL_DEVICE_MODEL_START_TIMEOUT * 1000; pas->xswait.callback = pci_add_qemu_trad_watch_state_cb; rc = libxl__xswait_start(gc, &pas->xswait); if (rc) goto out; return; case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN: pci_add_qmp_device_add(egc, pas); /* must be last */ return; default: rc = ERROR_INVAL; break; } } rc = 0; out: pci_add_dm_done(egc, pas, rc); /* must be last */ } static void pci_add_qemu_trad_watch_state_cb(libxl__egc *egc, libxl__xswait_state *xswa, int rc, const char *state) { pci_add_state *pas = CONTAINER_OF(xswa, *pas, xswait); STATE_AO_GC(pas->aodev->ao); /* Convenience aliases */ libxl_domid domid = pas->domid; libxl_device_pci *pcidev = pas->pcidev; rc = check_qemu_running(gc, domid, xswa, rc, state); if (rc == ERROR_NOT_READY) return; if (rc) goto out; rc = qemu_pci_add_xenstore(gc, domid, pcidev); out: pci_add_dm_done(egc, pas, rc); /* must be last */ } static void pci_add_qmp_device_add(libxl__egc *egc, pci_add_state *pas) { STATE_AO_GC(pas->aodev->ao); libxl__json_object *args = NULL; int rc; /* Convenience aliases */ libxl_domid domid = pas->domid; libxl_device_pci *pcidev = pas->pcidev; libxl__ev_qmp *const qmp = &pas->qmp; rc = libxl__ev_time_register_rel(ao, &pas->timeout, pci_add_timeout, LIBXL_QMP_CMD_TIMEOUT * 1000); if (rc) goto out; libxl__qmp_param_add_string(gc, &args, "driver", "xen-pci-passthrough"); QMP_PARAMETERS_SPRINTF(&args, "id", PCI_PT_QDEV_ID, pcidev->bus, pcidev->dev, pcidev->func); QMP_PARAMETERS_SPRINTF(&args, "hostaddr", "%04x:%02x:%02x.%01x", pcidev->domain, pcidev->bus, pcidev->dev, pcidev->func); if (pcidev->vdevfn) { QMP_PARAMETERS_SPRINTF(&args, "addr", "%x.%x", PCI_SLOT(pcidev->vdevfn), PCI_FUNC(pcidev->vdevfn)); } /* * Version of QEMU prior to the XSA-131 fix did not support * this property and were effectively always in permissive * mode. The fix for XSA-131 switched the default to be * restricted by default and added the permissive property. * * Therefore in order to support both old and new QEMU we only * set the permissive flag if it is true. Users of older QEMU * have no reason to set the flag so this is ok. */ if (pcidev->permissive) libxl__qmp_param_add_bool(gc, &args, "permissive", true); qmp->ao = pas->aodev->ao; qmp->domid = domid; qmp->payload_fd = -1; qmp->callback = pci_add_qmp_device_add_cb; rc = libxl__ev_qmp_send(egc, qmp, "device_add", args); if (rc) goto out; return; out: pci_add_dm_done(egc, pas, rc); /* must be last */ } static void pci_add_qmp_device_add_cb(libxl__egc *egc, libxl__ev_qmp *qmp, const libxl__json_object *response, int rc) { EGC_GC; pci_add_state *pas = CONTAINER_OF(qmp, *pas, qmp); if (rc) goto out; qmp->callback = pci_add_qmp_query_pci_cb; rc = libxl__ev_qmp_send(egc, qmp, "query-pci", NULL); if (rc) goto out; return; out: pci_add_dm_done(egc, pas, rc); /* must be last */ } static void pci_add_qmp_query_pci_cb(libxl__egc *egc, libxl__ev_qmp *qmp, const libxl__json_object *response, int rc) { EGC_GC; pci_add_state *pas = CONTAINER_OF(qmp, *pas, qmp); const libxl__json_object *bus = NULL; char *asked_id; int i, j; const libxl__json_object *devices = NULL; const libxl__json_object *device = NULL; const libxl__json_object *o = NULL; const char *id = NULL; int dev_slot, dev_func; /* Convenience aliases */ libxl_device_pci *pcidev = pas->pcidev; if (rc) goto out; /* `query-pci' returns: * [ * {'bus': 'int', * 'devices': [ * {'bus': 'int', 'slot': 'int', 'function': 'int', * 'class_info': 'PciDeviceClass', 'id': 'PciDeviceId', * '*irq': 'int', 'qdev_id': 'str', * '*pci_bridge': 'PciBridgeInfo', * 'regions': ['PciMemoryRegion'] * } * ] * } * ] * (See qemu.git/qapi/ for the struct that aren't detailed here) */ asked_id = GCSPRINTF(PCI_PT_QDEV_ID, pcidev->bus, pcidev->dev, pcidev->func); for (i = 0; (bus = libxl__json_array_get(response, i)); i++) { devices = libxl__json_map_get("devices", bus, JSON_ARRAY); if (!devices) { rc = ERROR_QEMU_API; goto out; } for (j = 0; (device = libxl__json_array_get(devices, j)); j++) { o = libxl__json_map_get("qdev_id", device, JSON_STRING); if (!o) { rc = ERROR_QEMU_API; goto out; } id = libxl__json_object_get_string(o); if (!id || strcmp(asked_id, id)) continue; o = libxl__json_map_get("slot", device, JSON_INTEGER); if (!o) { rc = ERROR_QEMU_API; goto out; } dev_slot = libxl__json_object_get_integer(o); o = libxl__json_map_get("function", device, JSON_INTEGER); if (!o) { rc = ERROR_QEMU_API; goto out; } dev_func = libxl__json_object_get_integer(o); pcidev->vdevfn = PCI_DEVFN(dev_slot, dev_func); rc = 0; goto out; } } rc = ERROR_FAIL; LOGD(ERROR, qmp->domid, "PCI device id '%s' wasn't found in QEMU's 'query-pci' response.", asked_id); out: if (rc == ERROR_QEMU_API) { LOGD(ERROR, qmp->domid, "Unexpected response to QMP cmd 'query-pci', received:\n%s", JSON(response)); } pci_add_dm_done(egc, pas, rc); /* must be last */ } static void pci_add_timeout(libxl__egc *egc, libxl__ev_time *ev, const struct timeval *requested_abs, int rc) { pci_add_state *pas = CONTAINER_OF(ev, *pas, timeout); pci_add_dm_done(egc, pas, rc); } static void pci_add_dm_done(libxl__egc *egc, pci_add_state *pas, int rc) { STATE_AO_GC(pas->aodev->ao); libxl_ctx *ctx = libxl__gc_owner(gc); libxl_domid domid = pas->pci_domid; char *sysfs_path; FILE *f; unsigned long long start, end, flags, size; int irq, i; int r; uint32_t flag = XEN_DOMCTL_DEV_RDM_RELAXED; uint32_t domainid = domid; bool isstubdom = libxl_is_stubdom(ctx, domid, &domainid); /* Convenience aliases */ bool starting = pas->starting; libxl_device_pci *pcidev = pas->pcidev; bool hvm = libxl__domain_type(gc, domid) == LIBXL_DOMAIN_TYPE_HVM; libxl__ev_qmp_dispose(gc, &pas->qmp); if (rc) goto out; /* stubdomain is always running by now, even at create time */ if (isstubdom) starting = false; sysfs_path = GCSPRINTF(SYSFS_PCI_DEV"/"PCI_BDF"/resource", pcidev->domain, pcidev->bus, pcidev->dev, pcidev->func); f = fopen(sysfs_path, "r"); start = end = flags = size = 0; irq = 0; if (f == NULL) { LOGED(ERROR, domainid, "Couldn't open %s", sysfs_path); rc = ERROR_FAIL; goto out; } for (i = 0; i < PROC_PCI_NUM_RESOURCES; i++) { if (fscanf(f, "0x%llx 0x%llx 0x%llx\n", &start, &end, &flags) != 3) continue; size = end - start + 1; if (start) { if (flags & PCI_BAR_IO) { r = xc_domain_ioport_permission(ctx->xch, domid, start, size, 1); if (r < 0) { LOGED(ERROR, domainid, "xc_domain_ioport_permission 0x%llx/0x%llx (error %d)", start, size, r); fclose(f); rc = ERROR_FAIL; goto out; } } else { r = xc_domain_iomem_permission(ctx->xch, domid, start>>XC_PAGE_SHIFT, (size+(XC_PAGE_SIZE-1))>>XC_PAGE_SHIFT, 1); if (r < 0) { LOGED(ERROR, domainid, "xc_domain_iomem_permission 0x%llx/0x%llx (error %d)", start, size, r); fclose(f); rc = ERROR_FAIL; goto out; } } } } fclose(f); sysfs_path = GCSPRINTF(SYSFS_PCI_DEV"/"PCI_BDF"/irq", pcidev->domain, pcidev->bus, pcidev->dev, pcidev->func); f = fopen(sysfs_path, "r"); if (f == NULL) { LOGED(ERROR, domainid, "Couldn't open %s", sysfs_path); goto out_no_irq; } if ((fscanf(f, "%u", &irq) == 1) && irq) { r = xc_physdev_map_pirq(ctx->xch, domid, irq, &irq); if (r < 0) { LOGED(ERROR, domainid, "xc_physdev_map_pirq irq=%d (error=%d)", irq, r); fclose(f); rc = ERROR_FAIL; goto out; } r = xc_domain_irq_permission(ctx->xch, domid, irq, 1); if (r < 0) { LOGED(ERROR, domainid, "xc_domain_irq_permission irq=%d (error=%d)", irq, r); fclose(f); rc = ERROR_FAIL; goto out; } } fclose(f); /* Don't restrict writes to the PCI config space from this VM */ if (pcidev->permissive) { if ( sysfs_write_bdf(gc, SYSFS_PCIBACK_DRIVER"/permissive", pcidev) < 0 ) { LOGD(ERROR, domainid, "Setting permissive for device"); rc = ERROR_FAIL; goto out; } } out_no_irq: if (!isstubdom) { if (pcidev->rdm_policy == LIBXL_RDM_RESERVE_POLICY_STRICT) { flag &= ~XEN_DOMCTL_DEV_RDM_RELAXED; } else if (pcidev->rdm_policy != LIBXL_RDM_RESERVE_POLICY_RELAXED) { LOGED(ERROR, domainid, "unknown rdm check flag."); rc = ERROR_FAIL; goto out; } r = xc_assign_device(ctx->xch, domid, pcidev_encode_bdf(pcidev), flag); if (r < 0 && (hvm || errno != ENOSYS)) { LOGED(ERROR, domainid, "xc_assign_device failed"); rc = ERROR_FAIL; goto out; } } if (!starting && !libxl_get_stubdom_id(CTX, domid)) rc = libxl__device_pci_add_xenstore(gc, domid, pcidev, starting); else rc = 0; out: libxl__ev_time_deregister(gc, &pas->timeout); pas->callback(egc, pas, rc); } static int libxl__device_pci_reset(libxl__gc *gc, unsigned int domain, unsigned int bus, unsigned int dev, unsigned int func) { char *reset; int fd, rc; reset = GCSPRINTF("%s/do_flr", SYSFS_PCIBACK_DRIVER); fd = open(reset, O_WRONLY); if (fd >= 0) { char *buf = GCSPRINTF(PCI_BDF, domain, bus, dev, func); rc = write(fd, buf, strlen(buf)); if (rc < 0) LOGD(ERROR, domain, "write to %s returned %d", reset, rc); close(fd); return rc < 0 ? rc : 0; } if (errno != ENOENT) LOGED(ERROR, domain, "Failed to access pciback path %s", reset); reset = GCSPRINTF("%s/"PCI_BDF"/reset", SYSFS_PCI_DEV, domain, bus, dev, func); fd = open(reset, O_WRONLY); if (fd >= 0) { rc = write(fd, "1", 1); if (rc < 0) LOGED(ERROR, domain, "write to %s returned %d", reset, rc); close(fd); return rc < 0 ? rc : 0; } if (errno == ENOENT) { LOGD(ERROR, domain, "The kernel doesn't support reset from sysfs for PCI device "PCI_BDF, domain, bus, dev, func); } else { LOGED(ERROR, domain, "Failed to access reset path %s", reset); } return -1; } int libxl__device_pci_setdefault(libxl__gc *gc, uint32_t domid, libxl_device_pci *pci, bool hotplug) { /* We'd like to force reserve rdm specific to a device by default.*/ if (pci->rdm_policy == LIBXL_RDM_RESERVE_POLICY_INVALID) pci->rdm_policy = LIBXL_RDM_RESERVE_POLICY_STRICT; return 0; } int libxl_device_pci_add(libxl_ctx *ctx, uint32_t domid, libxl_device_pci *pcidev, const libxl_asyncop_how *ao_how) { AO_CREATE(ctx, domid, ao_how); libxl__ao_device *aodev; GCNEW(aodev); libxl__prepare_ao_device(ao, aodev); aodev->action = LIBXL__DEVICE_ACTION_ADD; aodev->callback = device_addrm_aocomplete; aodev->update_json = true; libxl__device_pci_add(egc, domid, pcidev, false, aodev); return AO_INPROGRESS; } static int libxl_pcidev_assignable(libxl_ctx *ctx, libxl_device_pci *pcidev) { libxl_device_pci *pcidevs; int num, i; pcidevs = libxl_device_pci_assignable_list(ctx, &num); for (i = 0; i < num; i++) { if (pcidevs[i].domain == pcidev->domain && pcidevs[i].bus == pcidev->bus && pcidevs[i].dev == pcidev->dev && pcidevs[i].func == pcidev->func) break; } free(pcidevs); return i != num; } static void device_pci_add_stubdom_wait(libxl__egc *egc, pci_add_state *pas, int rc); static void device_pci_add_stubdom_ready(libxl__egc *egc, libxl__ev_devstate *ds, int rc); static void device_pci_add_stubdom_done(libxl__egc *egc, pci_add_state *, int rc); static void device_pci_add_done(libxl__egc *egc, pci_add_state *, int rc); void libxl__device_pci_add(libxl__egc *egc, uint32_t domid, libxl_device_pci *pcidev, bool starting, libxl__ao_device *aodev) { STATE_AO_GC(aodev->ao); libxl_ctx *ctx = libxl__gc_owner(gc); libxl_device_pci *assigned; int num_assigned, rc; int stubdomid = 0; pci_add_state *pas; /* Store *pcidev to be used by callbacks */ aodev->device_config = pcidev; aodev->device_type = &libxl__pcidev_devtype; GCNEW(pas); pas->aodev = aodev; pas->domid = domid; pas->starting = starting; pas->callback = device_pci_add_stubdom_done; if (libxl__domain_type(gc, domid) == LIBXL_DOMAIN_TYPE_HVM) { rc = xc_test_assign_device(ctx->xch, domid, pcidev_encode_bdf(pcidev)); if (rc) { LOGD(ERROR, domid, "PCI device %04x:%02x:%02x.%u %s?", pcidev->domain, pcidev->bus, pcidev->dev, pcidev->func, errno == EOPNOTSUPP ? "cannot be assigned - no IOMMU" : "already assigned to a different guest"); goto out; } } rc = libxl__device_pci_setdefault(gc, domid, pcidev, !starting); if (rc) goto out; if (pcidev->seize && !pciback_dev_is_assigned(gc, pcidev)) { rc = libxl__device_pci_assignable_add(gc, pcidev, 1); if ( rc ) goto out; } if (!libxl_pcidev_assignable(ctx, pcidev)) { LOGD(ERROR, domid, "PCI device %x:%x:%x.%x is not assignable", pcidev->domain, pcidev->bus, pcidev->dev, pcidev->func); rc = ERROR_FAIL; goto out; } rc = get_all_assigned_devices(gc, &assigned, &num_assigned); if ( rc ) { LOGD(ERROR, domid, "cannot determine if device is assigned, refusing to continue"); goto out; } if ( is_pcidev_in_array(assigned, num_assigned, pcidev->domain, pcidev->bus, pcidev->dev, pcidev->func) ) { LOGD(ERROR, domid, "PCI device already attached to a domain"); rc = ERROR_FAIL; goto out; } libxl__device_pci_reset(gc, pcidev->domain, pcidev->bus, pcidev->dev, pcidev->func); stubdomid = libxl_get_stubdom_id(ctx, domid); if (stubdomid != 0) { libxl_device_pci *pcidev_s; GCNEW(pcidev_s); libxl_device_pci_init(pcidev_s); libxl_device_pci_copy(CTX, pcidev_s, pcidev); pas->callback = device_pci_add_stubdom_wait; do_pci_add(egc, stubdomid, pcidev_s, pas); /* must be last */ return; } device_pci_add_stubdom_done(egc, pas, 0); /* must be last */ return; out: device_pci_add_done(egc, pas, rc); /* must be last */ } static void device_pci_add_stubdom_wait(libxl__egc *egc, pci_add_state *pas, int rc) { libxl__ao_device *aodev = pas->aodev; STATE_AO_GC(aodev->ao); int stubdomid = libxl_get_stubdom_id(CTX, pas->domid); char *state_path; if (rc) goto out; /* Wait for the device actually being connected, otherwise device model * running there will fail to find the device. */ state_path = GCSPRINTF("%s/state", libxl__domain_device_backend_path(gc, 0, stubdomid, 0, LIBXL__DEVICE_KIND_PCI)); rc = libxl__ev_devstate_wait(ao, &pas->pciback_ds, device_pci_add_stubdom_ready, state_path, XenbusStateConnected, LIBXL_DEVICE_MODEL_START_TIMEOUT * 1000); if (rc) goto out; return; out: device_pci_add_done(egc, pas, rc); /* must be last */ } static void device_pci_add_stubdom_ready(libxl__egc *egc, libxl__ev_devstate *ds, int rc) { pci_add_state *pas = CONTAINER_OF(ds, *pas, pciback_ds); device_pci_add_stubdom_done(egc, pas, rc); /* must be last */ } static void device_pci_add_stubdom_done(libxl__egc *egc, pci_add_state *pas, int rc) { STATE_AO_GC(pas->aodev->ao); unsigned int orig_vdev, pfunc_mask; int i; /* Convenience aliases */ libxl__ao_device *aodev = pas->aodev; libxl_domid domid = pas->domid; libxl_device_pci *pcidev = aodev->device_config; if (rc) goto out; orig_vdev = pcidev->vdevfn & ~7U; if ( pcidev->vfunc_mask == LIBXL_PCI_FUNC_ALL ) { if ( !(pcidev->vdevfn >> 3) ) { LOGD(ERROR, domid, "Must specify a v-slot for multi-function devices"); rc = ERROR_INVAL; goto out; } if ( pci_multifunction_check(gc, pcidev, &pfunc_mask) ) { rc = ERROR_FAIL; goto out; } pcidev->vfunc_mask &= pfunc_mask; /* so now vfunc_mask == pfunc_mask */ }else{ pfunc_mask = (1 << pcidev->func); } for(rc = 0, i = 7; i >= 0; --i) { if ( (1 << i) & pfunc_mask ) { if ( pcidev->vfunc_mask == pfunc_mask ) { pcidev->func = i; pcidev->vdevfn = orig_vdev | i; }else{ /* if not passing through multiple devices in a block make * sure that virtual function number 0 is always used otherwise * guest won't see the device */ pcidev->vdevfn = orig_vdev; } pas->callback = device_pci_add_done; do_pci_add(egc, domid, pcidev, pas); /* must be last */ return; } } out: device_pci_add_done(egc, pas, rc); } static void device_pci_add_done(libxl__egc *egc, pci_add_state *pas, int rc) { EGC_GC; libxl__ao_device *aodev = pas->aodev; libxl_domid domid = pas->domid; libxl_device_pci *pcidev = aodev->device_config; if (rc) { LOGD(ERROR, domid, "libxl__device_pci_add failed for " "PCI device %x:%x:%x.%x (rc %d)", pcidev->domain, pcidev->bus, pcidev->dev, pcidev->func, rc); } aodev->rc = rc; aodev->callback(egc, aodev); } typedef struct { libxl__multidev multidev; libxl__ao_device *outer_aodev; libxl_domain_config *d_config; libxl_domid domid; } add_pcidevs_state; static void add_pcidevs_done(libxl__egc *, libxl__multidev *, int rc); static void libxl__add_pcidevs(libxl__egc *egc, libxl__ao *ao, uint32_t domid, libxl_domain_config *d_config, libxl__multidev *multidev) { AO_GC; add_pcidevs_state *apds; int i; /* We need to start a new multidev in order to be able to execute * libxl__create_pci_backend only once. */ GCNEW(apds); apds->outer_aodev = libxl__multidev_prepare(multidev); apds->d_config = d_config; apds->domid = domid; apds->multidev.callback = add_pcidevs_done; libxl__multidev_begin(ao, &apds->multidev); for (i = 0; i < d_config->num_pcidevs; i++) { libxl__ao_device *aodev = libxl__multidev_prepare(&apds->multidev); libxl__device_pci_add(egc, domid, &d_config->pcidevs[i], true, aodev); } libxl__multidev_prepared(egc, &apds->multidev, 0); } static void add_pcidevs_done(libxl__egc *egc, libxl__multidev *multidev, int rc) { EGC_GC; add_pcidevs_state *apds = CONTAINER_OF(multidev, *apds, multidev); /* Convenience aliases */ libxl_domain_config *d_config = apds->d_config; libxl_domid domid = apds->domid; libxl__ao_device *aodev = apds->outer_aodev; if (rc) goto out; if (d_config->num_pcidevs > 0 && !libxl_get_stubdom_id(CTX, domid)) { rc = libxl__create_pci_backend(gc, domid, d_config->pcidevs, d_config->num_pcidevs); if (rc < 0) { LOGD(ERROR, domid, "libxl_create_pci_backend failed: %d", rc); goto out; } } out: aodev->rc = rc; aodev->callback(egc, aodev); } static int qemu_pci_remove_xenstore(libxl__gc *gc, uint32_t domid, libxl_device_pci *pcidev, int force) { libxl_ctx *ctx = libxl__gc_owner(gc); char *state; char *path; uint32_t dm_domid; dm_domid = libxl_get_stubdom_id(CTX, domid); path = DEVICE_MODEL_XS_PATH(gc, dm_domid, domid, "/state"); state = libxl__xs_read(gc, XBT_NULL, path); path = DEVICE_MODEL_XS_PATH(gc, dm_domid, domid, "/parameter"); libxl__xs_printf(gc, XBT_NULL, path, PCI_BDF, pcidev->domain, pcidev->bus, pcidev->dev, pcidev->func); /* Remove all functions at once atomically by only signalling * device-model for function 0 */ if ( !force && (pcidev->vdevfn & 0x7) == 0 ) { libxl__qemu_traditional_cmd(gc, domid, "pci-rem"); if (libxl__wait_for_device_model_deprecated(gc, domid, "pci-removed", NULL, NULL, NULL) < 0) { LOGD(ERROR, domid, "Device Model didn't respond in time"); /* This depends on guest operating system acknowledging the * SCI, if it doesn't respond in time then we may wish to * force the removal. */ return ERROR_FAIL; } } path = DEVICE_MODEL_XS_PATH(gc, dm_domid, domid, "/state"); xs_write(ctx->xsh, XBT_NULL, path, state, strlen(state)); return 0; } typedef struct pci_remove_state { libxl__ao_device *aodev; libxl_domid domid; libxl_device_pci *pcidev; bool force; bool hvm; unsigned int orig_vdev; unsigned int pfunc_mask; int next_func; libxl__ao_device stubdom_aodev; libxl__xswait_state xswait; libxl__ev_qmp qmp; libxl__ev_time timeout; libxl__ev_time retry_timer; } pci_remove_state; static void libxl__device_pci_remove_common(libxl__egc *egc, uint32_t domid, libxl_device_pci *pcidev, bool force, libxl__ao_device *aodev); static void device_pci_remove_common_next(libxl__egc *egc, pci_remove_state *prs, int rc); static void pci_remove_qemu_trad_watch_state_cb(libxl__egc *egc, libxl__xswait_state *xswa, int rc, const char *state); static void pci_remove_qmp_device_del(libxl__egc *egc, pci_remove_state *prs); static void pci_remove_qmp_device_del_cb(libxl__egc *egc, libxl__ev_qmp *qmp, const libxl__json_object *response, int rc); static void pci_remove_qmp_retry_timer_cb(libxl__egc *egc, libxl__ev_time *ev, const struct timeval *requested_abs, int rc); static void pci_remove_qmp_query_cb(libxl__egc *egc, libxl__ev_qmp *qmp, const libxl__json_object *response, int rc); static void pci_remove_timeout(libxl__egc *egc, libxl__ev_time *ev, const struct timeval *requested_abs, int rc); static void pci_remove_detatched(libxl__egc *egc, pci_remove_state *prs, int rc); static void pci_remove_stubdom_done(libxl__egc *egc, libxl__ao_device *aodev); static void pci_remove_done(libxl__egc *egc, pci_remove_state *prs, int rc); static void do_pci_remove(libxl__egc *egc, uint32_t domid, libxl_device_pci *pcidev, int force, pci_remove_state *prs) { STATE_AO_GC(prs->aodev->ao); libxl_ctx *ctx = libxl__gc_owner(gc); libxl_device_pci *assigned; libxl_domain_type type = libxl__domain_type(gc, domid); int rc, num; uint32_t domainid = domid; assigned = libxl_device_pci_list(ctx, domid, &num); if (assigned == NULL) { rc = ERROR_FAIL; goto out_fail; } libxl__ptr_add(gc, assigned); rc = ERROR_INVAL; if ( !is_pcidev_in_array(assigned, num, pcidev->domain, pcidev->bus, pcidev->dev, pcidev->func) ) { LOGD(ERROR, domainid, "PCI device not attached to this domain"); goto out_fail; } rc = ERROR_FAIL; if (type == LIBXL_DOMAIN_TYPE_HVM) { prs->hvm = true; switch (libxl__device_model_version_running(gc, domid)) { case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN_TRADITIONAL: prs->xswait.ao = ao; prs->xswait.what = "Device Model"; prs->xswait.path = DEVICE_MODEL_XS_PATH(gc, libxl_get_stubdom_id(CTX, domid), domid, "/state"); prs->xswait.timeout_ms = LIBXL_DEVICE_MODEL_START_TIMEOUT * 1000; prs->xswait.callback = pci_remove_qemu_trad_watch_state_cb; rc = libxl__xswait_start(gc, &prs->xswait); if (rc) goto out_fail; return; case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN: pci_remove_qmp_device_del(egc, prs); /* must be last */ return; default: rc = ERROR_INVAL; goto out_fail; } } else { assert(type == LIBXL_DOMAIN_TYPE_PV); char *sysfs_path = GCSPRINTF(SYSFS_PCI_DEV"/"PCI_BDF"/resource", pcidev->domain, pcidev->bus, pcidev->dev, pcidev->func); FILE *f = fopen(sysfs_path, "r"); unsigned int start = 0, end = 0, flags = 0, size = 0; int irq = 0; int i; if (f == NULL) { LOGED(ERROR, domainid, "Couldn't open %s", sysfs_path); goto skip1; } for (i = 0; i < PROC_PCI_NUM_RESOURCES; i++) { if (fscanf(f, "0x%x 0x%x 0x%x\n", &start, &end, &flags) != 3) continue; size = end - start + 1; if (start) { if (flags & PCI_BAR_IO) { rc = xc_domain_ioport_permission(ctx->xch, domid, start, size, 0); if (rc < 0) LOGED(ERROR, domainid, "xc_domain_ioport_permission error 0x%x/0x%x", start, size); } else { rc = xc_domain_iomem_permission(ctx->xch, domid, start>>XC_PAGE_SHIFT, (size+(XC_PAGE_SIZE-1))>>XC_PAGE_SHIFT, 0); if (rc < 0) LOGED(ERROR, domainid, "xc_domain_iomem_permission error 0x%x/0x%x", start, size); } } } fclose(f); skip1: sysfs_path = GCSPRINTF(SYSFS_PCI_DEV"/"PCI_BDF"/irq", pcidev->domain, pcidev->bus, pcidev->dev, pcidev->func); f = fopen(sysfs_path, "r"); if (f == NULL) { LOGED(ERROR, domainid, "Couldn't open %s", sysfs_path); goto skip_irq; } if ((fscanf(f, "%u", &irq) == 1) && irq) { rc = xc_physdev_unmap_pirq(ctx->xch, domid, irq); if (rc < 0) { LOGED(ERROR, domainid, "xc_physdev_unmap_pirq irq=%d", irq); } rc = xc_domain_irq_permission(ctx->xch, domid, irq, 0); if (rc < 0) { LOGED(ERROR, domainid, "xc_domain_irq_permission irq=%d", irq); } } fclose(f); } skip_irq: rc = 0; out_fail: pci_remove_detatched(egc, prs, rc); /* must be last */ } static void pci_remove_qemu_trad_watch_state_cb(libxl__egc *egc, libxl__xswait_state *xswa, int rc, const char *state) { pci_remove_state *prs = CONTAINER_OF(xswa, *prs, xswait); STATE_AO_GC(prs->aodev->ao); /* Convenience aliases */ libxl_domid domid = prs->domid; libxl_device_pci *const pcidev = prs->pcidev; rc = check_qemu_running(gc, domid, xswa, rc, state); if (rc == ERROR_NOT_READY) return; if (rc) goto out; rc = qemu_pci_remove_xenstore(gc, domid, pcidev, prs->force); out: pci_remove_detatched(egc, prs, rc); } static void pci_remove_qmp_device_del(libxl__egc *egc, pci_remove_state *prs) { STATE_AO_GC(prs->aodev->ao); libxl__json_object *args = NULL; int rc; /* Convenience aliases */ libxl_device_pci *const pcidev = prs->pcidev; rc = libxl__ev_time_register_rel(ao, &prs->timeout, pci_remove_timeout, LIBXL_QMP_CMD_TIMEOUT * 1000); if (rc) goto out; QMP_PARAMETERS_SPRINTF(&args, "id", PCI_PT_QDEV_ID, pcidev->bus, pcidev->dev, pcidev->func); prs->qmp.callback = pci_remove_qmp_device_del_cb; rc = libxl__ev_qmp_send(egc, &prs->qmp, "device_del", args); if (rc) goto out; return; out: pci_remove_detatched(egc, prs, rc); } static void pci_remove_qmp_device_del_cb(libxl__egc *egc, libxl__ev_qmp *qmp, const libxl__json_object *response, int rc) { EGC_GC; pci_remove_state *prs = CONTAINER_OF(qmp, *prs, qmp); if (rc) goto out; /* Now that the command is sent, we want to wait until QEMU has * confirmed that the device is removed. */ /* TODO: Instead of using a poll loop { ev_timer ; query-pci }, it * could be possible to listen to events sent by QEMU via QMP in order * to wait for the passthrough pci-device to be removed from QEMU. */ pci_remove_qmp_retry_timer_cb(egc, &prs->retry_timer, NULL, ERROR_TIMEDOUT); return; out: pci_remove_detatched(egc, prs, rc); } static void pci_remove_qmp_retry_timer_cb(libxl__egc *egc, libxl__ev_time *ev, const struct timeval *requested_abs, int rc) { EGC_GC; pci_remove_state *prs = CONTAINER_OF(ev, *prs, retry_timer); prs->qmp.callback = pci_remove_qmp_query_cb; rc = libxl__ev_qmp_send(egc, &prs->qmp, "query-pci", NULL); if (rc) goto out; return; out: pci_remove_detatched(egc, prs, rc); } static void pci_remove_qmp_query_cb(libxl__egc *egc, libxl__ev_qmp *qmp, const libxl__json_object *response, int rc) { EGC_GC; pci_remove_state *prs = CONTAINER_OF(qmp, *prs, qmp); const libxl__json_object *bus = NULL; const char *asked_id; int i, j; /* Convenience aliases */ libxl__ao *const ao = prs->aodev->ao; libxl_device_pci *const pcidev = prs->pcidev; if (rc) goto out; libxl__ev_qmp_dispose(gc, qmp); asked_id = GCSPRINTF(PCI_PT_QDEV_ID, pcidev->bus, pcidev->dev, pcidev->func); /* query-pci response: * [{ 'devices': [ 'qdev_id': 'str', ... ], ... }] * */ for (i = 0; (bus = libxl__json_array_get(response, i)); i++) { const libxl__json_object *devices = NULL; const libxl__json_object *device = NULL; const libxl__json_object *o = NULL; const char *id = NULL; devices = libxl__json_map_get("devices", bus, JSON_ARRAY); if (!devices) { rc = ERROR_QEMU_API; goto out; } for (j = 0; (device = libxl__json_array_get(devices, j)); j++) { o = libxl__json_map_get("qdev_id", device, JSON_STRING); if (!o) { rc = ERROR_QEMU_API; goto out; } id = libxl__json_object_get_string(o); if (id && !strcmp(asked_id, id)) { /* Device still in QEMU, need to wait longuer. */ rc = libxl__ev_time_register_rel(ao, &prs->retry_timer, pci_remove_qmp_retry_timer_cb, 1000); if (rc) goto out; return; } } } out: pci_remove_detatched(egc, prs, rc); /* must be last */ } static void pci_remove_timeout(libxl__egc *egc, libxl__ev_time *ev, const struct timeval *requested_abs, int rc) { EGC_GC; pci_remove_state *prs = CONTAINER_OF(ev, *prs, timeout); /* Convenience aliases */ libxl_device_pci *const pcidev = prs->pcidev; LOGD(WARN, prs->domid, "timed out waiting for DM to remove " PCI_PT_QDEV_ID, pcidev->bus, pcidev->dev, pcidev->func); /* If we timed out, we might still want to keep destroying the device * (when force==true), so let the next function decide what to do on * error */ pci_remove_detatched(egc, prs, rc); } static void pci_remove_detatched(libxl__egc *egc, pci_remove_state *prs, int rc) { STATE_AO_GC(prs->aodev->ao); int stubdomid = 0; uint32_t domainid = prs->domid; bool isstubdom; /* Convenience aliases */ libxl_device_pci *const pcidev = prs->pcidev; libxl_domid domid = prs->domid; /* Cleaning QMP states ASAP */ libxl__ev_qmp_dispose(gc, &prs->qmp); libxl__ev_time_deregister(gc, &prs->timeout); libxl__ev_time_deregister(gc, &prs->retry_timer); if (rc && !prs->force) goto out; isstubdom = libxl_is_stubdom(CTX, domid, &domainid); /* don't do multiple resets while some functions are still passed through */ if ( (pcidev->vdevfn & 0x7) == 0 ) { libxl__device_pci_reset(gc, pcidev->domain, pcidev->bus, pcidev->dev, pcidev->func); } if (!isstubdom) { rc = xc_deassign_device(CTX->xch, domid, pcidev_encode_bdf(pcidev)); if (rc < 0 && (prs->hvm || errno != ENOSYS)) LOGED(ERROR, domainid, "xc_deassign_device failed"); } stubdomid = libxl_get_stubdom_id(CTX, domid); if (stubdomid != 0) { libxl_device_pci *pcidev_s; libxl__ao_device *const stubdom_aodev = &prs->stubdom_aodev; GCNEW(pcidev_s); libxl_device_pci_init(pcidev_s); libxl_device_pci_copy(CTX, pcidev_s, pcidev); libxl__prepare_ao_device(ao, stubdom_aodev); stubdom_aodev->action = LIBXL__DEVICE_ACTION_REMOVE; stubdom_aodev->callback = pci_remove_stubdom_done; stubdom_aodev->update_json = prs->aodev->update_json; libxl__device_pci_remove_common(egc, stubdomid, pcidev_s, prs->force, stubdom_aodev); return; } rc = 0; out: pci_remove_done(egc, prs, rc); } static void pci_remove_stubdom_done(libxl__egc *egc, libxl__ao_device *aodev) { pci_remove_state *prs = CONTAINER_OF(aodev, *prs, stubdom_aodev); pci_remove_done(egc, prs, 0); } static void pci_remove_done(libxl__egc *egc, pci_remove_state *prs, int rc) { EGC_GC; if (rc) goto out; libxl__device_pci_remove_xenstore(gc, prs->domid, prs->pcidev); out: device_pci_remove_common_next(egc, prs, rc); } static void libxl__device_pci_remove_common(libxl__egc *egc, uint32_t domid, libxl_device_pci *pcidev, bool force, libxl__ao_device *aodev) { STATE_AO_GC(aodev->ao); int rc; pci_remove_state *prs; GCNEW(prs); prs->aodev = aodev; prs->domid = domid; prs->pcidev = pcidev; prs->force = force; libxl__xswait_init(&prs->xswait); libxl__ev_qmp_init(&prs->qmp); prs->qmp.ao = prs->aodev->ao; prs->qmp.domid = prs->domid; prs->qmp.payload_fd = -1; libxl__ev_time_init(&prs->timeout); libxl__ev_time_init(&prs->retry_timer); prs->orig_vdev = pcidev->vdevfn & ~7U; if ( pcidev->vfunc_mask == LIBXL_PCI_FUNC_ALL ) { if ( pci_multifunction_check(gc, pcidev, &prs->pfunc_mask) ) { rc = ERROR_FAIL; goto out; } pcidev->vfunc_mask &= prs->pfunc_mask; }else{ prs->pfunc_mask = (1 << pcidev->func); } rc = 0; prs->next_func = 7; out: device_pci_remove_common_next(egc, prs, rc); } static void device_pci_remove_common_next(libxl__egc *egc, pci_remove_state *prs, int rc) { EGC_GC; /* Convenience aliases */ libxl_domid domid = prs->domid; libxl_device_pci *const pcidev = prs->pcidev; libxl__ao_device *const aodev = prs->aodev; const unsigned int pfunc_mask = prs->pfunc_mask; const unsigned int orig_vdev = prs->orig_vdev; if (rc) goto out; while (prs->next_func >= 0) { const int i = prs->next_func; prs->next_func--; if ( (1 << i) & pfunc_mask ) { if ( pcidev->vfunc_mask == pfunc_mask ) { pcidev->func = i; pcidev->vdevfn = orig_vdev | i; }else{ pcidev->vdevfn = orig_vdev; } do_pci_remove(egc, domid, pcidev, prs->force, prs); return; } } rc = 0; out: libxl__ev_qmp_dispose(gc, &prs->qmp); libxl__xswait_stop(gc, &prs->xswait); libxl__ev_time_deregister(gc, &prs->timeout); libxl__ev_time_deregister(gc, &prs->retry_timer); aodev->rc = rc; aodev->callback(egc, aodev); } int libxl_device_pci_remove(libxl_ctx *ctx, uint32_t domid, libxl_device_pci *pcidev, const libxl_asyncop_how *ao_how) { AO_CREATE(ctx, domid, ao_how); libxl__ao_device *aodev; GCNEW(aodev); libxl__prepare_ao_device(ao, aodev); aodev->action = LIBXL__DEVICE_ACTION_REMOVE; aodev->callback = device_addrm_aocomplete; aodev->update_json = true; libxl__device_pci_remove_common(egc, domid, pcidev, false, aodev); return AO_INPROGRESS; } int libxl_device_pci_destroy(libxl_ctx *ctx, uint32_t domid, libxl_device_pci *pcidev, const libxl_asyncop_how *ao_how) { AO_CREATE(ctx, domid, ao_how); libxl__ao_device *aodev; GCNEW(aodev); libxl__prepare_ao_device(ao, aodev); aodev->action = LIBXL__DEVICE_ACTION_REMOVE; aodev->callback = device_addrm_aocomplete; aodev->update_json = true; libxl__device_pci_remove_common(egc, domid, pcidev, true, aodev); return AO_INPROGRESS; } static int libxl__device_pci_from_xs_be(libxl__gc *gc, const char *be_path, libxl_devid nr, void *data) { char *s; unsigned int domain = 0, bus = 0, dev = 0, func = 0, vdevfn = 0; libxl_device_pci *pci = data; s = libxl__xs_read(gc, XBT_NULL, GCSPRINTF("%s/dev-%d", be_path, nr)); sscanf(s, PCI_BDF, &domain, &bus, &dev, &func); s = libxl__xs_read(gc, XBT_NULL, GCSPRINTF("%s/vdevfn-%d", be_path, nr)); if (s) vdevfn = strtol(s, (char **) NULL, 16); pcidev_struct_fill(pci, domain, bus, dev, func, vdevfn); s = libxl__xs_read(gc, XBT_NULL, GCSPRINTF("%s/opts-%d", be_path, nr)); if (s) { char *saveptr; char *p = strtok_r(s, ",=", &saveptr); do { while (*p == ' ') p++; if (!strcmp(p, "msitranslate")) { p = strtok_r(NULL, ",=", &saveptr); pci->msitranslate = atoi(p); } else if (!strcmp(p, "power_mgmt")) { p = strtok_r(NULL, ",=", &saveptr); pci->power_mgmt = atoi(p); } else if (!strcmp(p, "permissive")) { p = strtok_r(NULL, ",=", &saveptr); pci->permissive = atoi(p); } } while ((p = strtok_r(NULL, ",=", &saveptr)) != NULL); } return 0; } static int libxl__device_pci_get_num(libxl__gc *gc, const char *be_path, unsigned int *num) { char *num_devs; int rc = 0; num_devs = libxl__xs_read(gc, XBT_NULL, GCSPRINTF("%s/num_devs", be_path)); if (!num_devs) rc = ERROR_FAIL; else *num = atoi(num_devs); return rc; } libxl_device_pci *libxl_device_pci_list(libxl_ctx *ctx, uint32_t domid, int *num) { GC_INIT(ctx); char *be_path; unsigned int n, i; libxl_device_pci *pcidevs = NULL; *num = 0; be_path = libxl__domain_device_backend_path(gc, 0, domid, 0, LIBXL__DEVICE_KIND_PCI); if (libxl__device_pci_get_num(gc, be_path, &n)) goto out; pcidevs = calloc(n, sizeof(libxl_device_pci)); for (i = 0; i < n; i++) libxl__device_pci_from_xs_be(gc, be_path, i, pcidevs + i); *num = n; out: GC_FREE; return pcidevs; } void libxl__device_pci_destroy_all(libxl__egc *egc, uint32_t domid, libxl__multidev *multidev) { STATE_AO_GC(multidev->ao); libxl_device_pci *pcidevs; int num, i; pcidevs = libxl_device_pci_list(CTX, domid, &num); if ( pcidevs == NULL ) return; libxl__ptr_add(gc, pcidevs); for (i = 0; i < num; i++) { /* Force remove on shutdown since, on HVM, qemu will not always * respond to SCI interrupt because the guest kernel has shut down the * devices by the time we even get here! */ libxl__ao_device *aodev = libxl__multidev_prepare(multidev); libxl__device_pci_remove_common(egc, domid, pcidevs + i, true, aodev); } } int libxl__grant_vga_iomem_permission(libxl__gc *gc, const uint32_t domid, libxl_domain_config *const d_config) { int i, ret; if (!libxl_defbool_val(d_config->b_info.u.hvm.gfx_passthru)) return 0; for (i = 0 ; i < d_config->num_pcidevs ; i++) { uint64_t vga_iomem_start = 0xa0000 >> XC_PAGE_SHIFT; uint32_t stubdom_domid; libxl_device_pci *pcidev = &d_config->pcidevs[i]; unsigned long pci_device_class; if (sysfs_dev_get_class(gc, pcidev, &pci_device_class)) continue; if (pci_device_class != 0x030000) /* VGA class */ continue; stubdom_domid = libxl_get_stubdom_id(CTX, domid); ret = xc_domain_iomem_permission(CTX->xch, stubdom_domid, vga_iomem_start, 0x20, 1); if (ret < 0) { LOGED(ERROR, domid, "failed to give stubdom%d access to iomem range " "%"PRIx64"-%"PRIx64" for VGA passthru", stubdom_domid, vga_iomem_start, (vga_iomem_start + 0x20 - 1)); return ret; } ret = xc_domain_iomem_permission(CTX->xch, domid, vga_iomem_start, 0x20, 1); if (ret < 0) { LOGED(ERROR, domid, "failed to give dom%d access to iomem range " "%"PRIx64"-%"PRIx64" for VGA passthru", domid, vga_iomem_start, (vga_iomem_start + 0x20 - 1)); return ret; } break; } return 0; } static int libxl_device_pci_compare(const libxl_device_pci *d1, const libxl_device_pci *d2) { return COMPARE_PCI(d1, d2); } #define libxl__device_pci_update_devid NULL DEFINE_DEVICE_TYPE_STRUCT_X(pcidev, pci, PCI, .get_num = libxl__device_pci_get_num, .from_xenstore = libxl__device_pci_from_xs_be, ); /* * Local variables: * mode: C * c-basic-offset: 4 * indent-tabs-mode: nil * End: */