mirror of
https://mirrors.bfsu.edu.cn/git/linux.git
synced 2024-11-15 08:14:15 +08:00
lguest: Tell Guest net not to notify us on every packet xmit
virtio_ring has the ability to suppress notifications. This prevents a guest exit for every packet, but we need to set a timer on packet receipt to re-check if there were any remaining packets. Here are the times for 1G TCP Guest->Host with different timeout settings (it matters because the TCP window doesn't grow big enough to fill the entire buffer): Timeout value Seconds Xmit/Recv/Timeout None (before) 25.3784 xmit 7750233 recv 1 2500 usec 62.5119 xmit 207020 recv 2 timeout 207020 1000 usec 34.5379 xmit 207003 recv 2 timeout 207003 750 usec 29.2305 xmit 207002 recv 1 timeout 207002 500 usec 19.1887 xmit 561141 recv 1 timeout 559657 250 usec 20.0465 xmit 214128 recv 2 timeout 214110 100 usec 19.2583 xmit 561621 recv 1 timeout 560153 (Note that these values are sensitive to the GSO patches which come later, and probably other traffic-related variables, so take with a large grain of salt). Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
This commit is contained in:
parent
5dae785a82
commit
a161883a29
@ -36,6 +36,7 @@
|
|||||||
#include <sched.h>
|
#include <sched.h>
|
||||||
#include <limits.h>
|
#include <limits.h>
|
||||||
#include <stddef.h>
|
#include <stddef.h>
|
||||||
|
#include <signal.h>
|
||||||
#include "linux/lguest_launcher.h"
|
#include "linux/lguest_launcher.h"
|
||||||
#include "linux/virtio_config.h"
|
#include "linux/virtio_config.h"
|
||||||
#include "linux/virtio_net.h"
|
#include "linux/virtio_net.h"
|
||||||
@ -81,6 +82,8 @@ static int waker_fd;
|
|||||||
static void *guest_base;
|
static void *guest_base;
|
||||||
/* The maximum guest physical address allowed, and maximum possible. */
|
/* The maximum guest physical address allowed, and maximum possible. */
|
||||||
static unsigned long guest_limit, guest_max;
|
static unsigned long guest_limit, guest_max;
|
||||||
|
/* The pipe for signal hander to write to. */
|
||||||
|
static int timeoutpipe[2];
|
||||||
|
|
||||||
/* a per-cpu variable indicating whose vcpu is currently running */
|
/* a per-cpu variable indicating whose vcpu is currently running */
|
||||||
static unsigned int __thread cpu_id;
|
static unsigned int __thread cpu_id;
|
||||||
@ -156,11 +159,14 @@ struct virtqueue
|
|||||||
/* Last available index we saw. */
|
/* Last available index we saw. */
|
||||||
u16 last_avail_idx;
|
u16 last_avail_idx;
|
||||||
|
|
||||||
/* The routine to call when the Guest pings us. */
|
/* The routine to call when the Guest pings us, or timeout. */
|
||||||
void (*handle_output)(int fd, struct virtqueue *me);
|
void (*handle_output)(int fd, struct virtqueue *me, bool timeout);
|
||||||
|
|
||||||
/* Outstanding buffers */
|
/* Outstanding buffers */
|
||||||
unsigned int inflight;
|
unsigned int inflight;
|
||||||
|
|
||||||
|
/* Is this blocked awaiting a timer? */
|
||||||
|
bool blocked;
|
||||||
};
|
};
|
||||||
|
|
||||||
/* Remember the arguments to the program so we can "reboot" */
|
/* Remember the arguments to the program so we can "reboot" */
|
||||||
@ -874,7 +880,7 @@ static bool handle_console_input(int fd, struct device *dev)
|
|||||||
|
|
||||||
/* Handling output for console is simple: we just get all the output buffers
|
/* Handling output for console is simple: we just get all the output buffers
|
||||||
* and write them to stdout. */
|
* and write them to stdout. */
|
||||||
static void handle_console_output(int fd, struct virtqueue *vq)
|
static void handle_console_output(int fd, struct virtqueue *vq, bool timeout)
|
||||||
{
|
{
|
||||||
unsigned int head, out, in;
|
unsigned int head, out, in;
|
||||||
int len;
|
int len;
|
||||||
@ -889,6 +895,21 @@ static void handle_console_output(int fd, struct virtqueue *vq)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void block_vq(struct virtqueue *vq)
|
||||||
|
{
|
||||||
|
struct itimerval itm;
|
||||||
|
|
||||||
|
vq->vring.used->flags |= VRING_USED_F_NO_NOTIFY;
|
||||||
|
vq->blocked = true;
|
||||||
|
|
||||||
|
itm.it_interval.tv_sec = 0;
|
||||||
|
itm.it_interval.tv_usec = 0;
|
||||||
|
itm.it_value.tv_sec = 0;
|
||||||
|
itm.it_value.tv_usec = 500;
|
||||||
|
|
||||||
|
setitimer(ITIMER_REAL, &itm, NULL);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The Network
|
* The Network
|
||||||
*
|
*
|
||||||
@ -896,9 +917,9 @@ static void handle_console_output(int fd, struct virtqueue *vq)
|
|||||||
* and write them (ignoring the first element) to this device's file descriptor
|
* and write them (ignoring the first element) to this device's file descriptor
|
||||||
* (/dev/net/tun).
|
* (/dev/net/tun).
|
||||||
*/
|
*/
|
||||||
static void handle_net_output(int fd, struct virtqueue *vq)
|
static void handle_net_output(int fd, struct virtqueue *vq, bool timeout)
|
||||||
{
|
{
|
||||||
unsigned int head, out, in;
|
unsigned int head, out, in, num = 0;
|
||||||
int len;
|
int len;
|
||||||
struct iovec iov[vq->vring.num];
|
struct iovec iov[vq->vring.num];
|
||||||
|
|
||||||
@ -912,7 +933,12 @@ static void handle_net_output(int fd, struct virtqueue *vq)
|
|||||||
(void)convert(&iov[0], struct virtio_net_hdr);
|
(void)convert(&iov[0], struct virtio_net_hdr);
|
||||||
len = writev(vq->dev->fd, iov+1, out-1);
|
len = writev(vq->dev->fd, iov+1, out-1);
|
||||||
add_used_and_trigger(fd, vq, head, len);
|
add_used_and_trigger(fd, vq, head, len);
|
||||||
|
num++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Block further kicks and set up a timer if we saw anything. */
|
||||||
|
if (!timeout && num)
|
||||||
|
block_vq(vq);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* This is where we handle a packet coming in from the tun device to our
|
/* This is where we handle a packet coming in from the tun device to our
|
||||||
@ -967,18 +993,18 @@ static bool handle_tun_input(int fd, struct device *dev)
|
|||||||
/*L:215 This is the callback attached to the network and console input
|
/*L:215 This is the callback attached to the network and console input
|
||||||
* virtqueues: it ensures we try again, in case we stopped console or net
|
* virtqueues: it ensures we try again, in case we stopped console or net
|
||||||
* delivery because Guest didn't have any buffers. */
|
* delivery because Guest didn't have any buffers. */
|
||||||
static void enable_fd(int fd, struct virtqueue *vq)
|
static void enable_fd(int fd, struct virtqueue *vq, bool timeout)
|
||||||
{
|
{
|
||||||
add_device_fd(vq->dev->fd);
|
add_device_fd(vq->dev->fd);
|
||||||
/* Tell waker to listen to it again */
|
/* Tell waker to listen to it again */
|
||||||
write(waker_fd, &vq->dev->fd, sizeof(vq->dev->fd));
|
write(waker_fd, &vq->dev->fd, sizeof(vq->dev->fd));
|
||||||
}
|
}
|
||||||
|
|
||||||
static void net_enable_fd(int fd, struct virtqueue *vq)
|
static void net_enable_fd(int fd, struct virtqueue *vq, bool timeout)
|
||||||
{
|
{
|
||||||
/* We don't need to know again when Guest refills receive buffer. */
|
/* We don't need to know again when Guest refills receive buffer. */
|
||||||
vq->vring.used->flags |= VRING_USED_F_NO_NOTIFY;
|
vq->vring.used->flags |= VRING_USED_F_NO_NOTIFY;
|
||||||
enable_fd(fd, vq);
|
enable_fd(fd, vq, timeout);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* When the Guest tells us they updated the status field, we handle it. */
|
/* When the Guest tells us they updated the status field, we handle it. */
|
||||||
@ -1047,7 +1073,7 @@ static void handle_output(int fd, unsigned long addr)
|
|||||||
if (strcmp(vq->dev->name, "console") != 0)
|
if (strcmp(vq->dev->name, "console") != 0)
|
||||||
verbose("Output to %s\n", vq->dev->name);
|
verbose("Output to %s\n", vq->dev->name);
|
||||||
if (vq->handle_output)
|
if (vq->handle_output)
|
||||||
vq->handle_output(fd, vq);
|
vq->handle_output(fd, vq, false);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1061,6 +1087,29 @@ static void handle_output(int fd, unsigned long addr)
|
|||||||
strnlen(from_guest_phys(addr), guest_limit - addr));
|
strnlen(from_guest_phys(addr), guest_limit - addr));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void handle_timeout(int fd)
|
||||||
|
{
|
||||||
|
char buf[32];
|
||||||
|
struct device *i;
|
||||||
|
struct virtqueue *vq;
|
||||||
|
|
||||||
|
/* Clear the pipe */
|
||||||
|
read(timeoutpipe[0], buf, sizeof(buf));
|
||||||
|
|
||||||
|
/* Check each device and virtqueue: flush blocked ones. */
|
||||||
|
for (i = devices.dev; i; i = i->next) {
|
||||||
|
for (vq = i->vq; vq; vq = vq->next) {
|
||||||
|
if (!vq->blocked)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
vq->vring.used->flags &= ~VRING_USED_F_NO_NOTIFY;
|
||||||
|
vq->blocked = false;
|
||||||
|
if (vq->handle_output)
|
||||||
|
vq->handle_output(fd, vq, true);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/* This is called when the Waker wakes us up: check for incoming file
|
/* This is called when the Waker wakes us up: check for incoming file
|
||||||
* descriptors. */
|
* descriptors. */
|
||||||
static void handle_input(int fd)
|
static void handle_input(int fd)
|
||||||
@ -1071,9 +1120,14 @@ static void handle_input(int fd)
|
|||||||
for (;;) {
|
for (;;) {
|
||||||
struct device *i;
|
struct device *i;
|
||||||
fd_set fds = devices.infds;
|
fd_set fds = devices.infds;
|
||||||
|
int num;
|
||||||
|
|
||||||
|
num = select(devices.max_infd+1, &fds, NULL, NULL, &poll);
|
||||||
|
/* Could get interrupted */
|
||||||
|
if (num < 0)
|
||||||
|
continue;
|
||||||
/* If nothing is ready, we're done. */
|
/* If nothing is ready, we're done. */
|
||||||
if (select(devices.max_infd+1, &fds, NULL, NULL, &poll) == 0)
|
if (num == 0)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
/* Otherwise, call the device(s) which have readable file
|
/* Otherwise, call the device(s) which have readable file
|
||||||
@ -1097,6 +1151,10 @@ static void handle_input(int fd)
|
|||||||
write(waker_fd, &dev_fd, sizeof(dev_fd));
|
write(waker_fd, &dev_fd, sizeof(dev_fd));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Is this the timeout fd? */
|
||||||
|
if (FD_ISSET(timeoutpipe[0], &fds))
|
||||||
|
handle_timeout(fd);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1145,7 +1203,7 @@ static struct lguest_device_desc *new_dev_desc(u16 type)
|
|||||||
/* Each device descriptor is followed by the description of its virtqueues. We
|
/* Each device descriptor is followed by the description of its virtqueues. We
|
||||||
* specify how many descriptors the virtqueue is to have. */
|
* specify how many descriptors the virtqueue is to have. */
|
||||||
static void add_virtqueue(struct device *dev, unsigned int num_descs,
|
static void add_virtqueue(struct device *dev, unsigned int num_descs,
|
||||||
void (*handle_output)(int fd, struct virtqueue *me))
|
void (*handle_output)(int, struct virtqueue *, bool))
|
||||||
{
|
{
|
||||||
unsigned int pages;
|
unsigned int pages;
|
||||||
struct virtqueue **i, *vq = malloc(sizeof(*vq));
|
struct virtqueue **i, *vq = malloc(sizeof(*vq));
|
||||||
@ -1161,6 +1219,7 @@ static void add_virtqueue(struct device *dev, unsigned int num_descs,
|
|||||||
vq->last_avail_idx = 0;
|
vq->last_avail_idx = 0;
|
||||||
vq->dev = dev;
|
vq->dev = dev;
|
||||||
vq->inflight = 0;
|
vq->inflight = 0;
|
||||||
|
vq->blocked = false;
|
||||||
|
|
||||||
/* Initialize the configuration. */
|
/* Initialize the configuration. */
|
||||||
vq->config.num = num_descs;
|
vq->config.num = num_descs;
|
||||||
@ -1293,6 +1352,24 @@ static void setup_console(void)
|
|||||||
}
|
}
|
||||||
/*:*/
|
/*:*/
|
||||||
|
|
||||||
|
static void timeout_alarm(int sig)
|
||||||
|
{
|
||||||
|
write(timeoutpipe[1], "", 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void setup_timeout(void)
|
||||||
|
{
|
||||||
|
if (pipe(timeoutpipe) != 0)
|
||||||
|
err(1, "Creating timeout pipe");
|
||||||
|
|
||||||
|
if (fcntl(timeoutpipe[1], F_SETFL,
|
||||||
|
fcntl(timeoutpipe[1], F_GETFL) | O_NONBLOCK) != 0)
|
||||||
|
err(1, "Making timeout pipe nonblocking");
|
||||||
|
|
||||||
|
add_device_fd(timeoutpipe[0]);
|
||||||
|
signal(SIGALRM, timeout_alarm);
|
||||||
|
}
|
||||||
|
|
||||||
/*M:010 Inter-guest networking is an interesting area. Simplest is to have a
|
/*M:010 Inter-guest networking is an interesting area. Simplest is to have a
|
||||||
* --sharenet=<name> option which opens or creates a named pipe. This can be
|
* --sharenet=<name> option which opens or creates a named pipe. This can be
|
||||||
* used to send packets to another guest in a 1:1 manner.
|
* used to send packets to another guest in a 1:1 manner.
|
||||||
@ -1653,7 +1730,7 @@ static bool handle_io_finish(int fd, struct device *dev)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* When the Guest submits some I/O, we just need to wake the I/O thread. */
|
/* When the Guest submits some I/O, we just need to wake the I/O thread. */
|
||||||
static void handle_virtblk_output(int fd, struct virtqueue *vq)
|
static void handle_virtblk_output(int fd, struct virtqueue *vq, bool timeout)
|
||||||
{
|
{
|
||||||
struct vblk_info *vblk = vq->dev->priv;
|
struct vblk_info *vblk = vq->dev->priv;
|
||||||
char c = 0;
|
char c = 0;
|
||||||
@ -1824,7 +1901,7 @@ static void __attribute__((noreturn)) run_guest(int lguest_fd)
|
|||||||
/* ERESTART means that we need to reboot the guest */
|
/* ERESTART means that we need to reboot the guest */
|
||||||
} else if (errno == ERESTART) {
|
} else if (errno == ERESTART) {
|
||||||
restart_guest();
|
restart_guest();
|
||||||
/* EAGAIN means the Waker wanted us to look at some input.
|
/* EAGAIN means a signal (timeout).
|
||||||
* Anything else means a bug or incompatible change. */
|
* Anything else means a bug or incompatible change. */
|
||||||
} else if (errno != EAGAIN)
|
} else if (errno != EAGAIN)
|
||||||
err(1, "Running guest failed");
|
err(1, "Running guest failed");
|
||||||
@ -1948,6 +2025,9 @@ int main(int argc, char *argv[])
|
|||||||
/* We always have a console device */
|
/* We always have a console device */
|
||||||
setup_console();
|
setup_console();
|
||||||
|
|
||||||
|
/* We can timeout waiting for Guest network transmit. */
|
||||||
|
setup_timeout();
|
||||||
|
|
||||||
/* Now we load the kernel */
|
/* Now we load the kernel */
|
||||||
start = load_kernel(open_or_die(argv[optind+1], O_RDONLY));
|
start = load_kernel(open_or_die(argv[optind+1], O_RDONLY));
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user