aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--block/Makefile.objs1
-rw-r--r--block/qapi.c3
-rw-r--r--block/write-threshold.c125
-rw-r--r--include/block/block_int.h4
-rw-r--r--include/block/write-threshold.h64
-rw-r--r--qapi/block-core.json51
-rw-r--r--qmp-commands.hx32
-rw-r--r--tests/Makefile3
-rw-r--r--tests/qemu-iotests/067.out5
-rw-r--r--tests/test-write-threshold.c119
10 files changed, 406 insertions, 1 deletions
diff --git a/block/Makefile.objs b/block/Makefile.objs
index 04b0e43eb..010afad71 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -20,6 +20,7 @@ block-obj-$(CONFIG_GLUSTERFS) += gluster.o
block-obj-$(CONFIG_ARCHIPELAGO) += archipelago.o
block-obj-$(CONFIG_LIBSSH2) += ssh.o
block-obj-y += accounting.o
+block-obj-y += write-threshold.o
common-obj-y += stream.o
common-obj-y += commit.o
diff --git a/block/qapi.c b/block/qapi.c
index d1a891796..1808e6733 100644
--- a/block/qapi.c
+++ b/block/qapi.c
@@ -24,6 +24,7 @@
#include "block/qapi.h"
#include "block/block_int.h"
+#include "block/write-threshold.h"
#include "qmp-commands.h"
#include "qapi-visit.h"
#include "qapi/qmp-output-visitor.h"
@@ -89,6 +90,8 @@ BlockDeviceInfo *bdrv_block_device_info(BlockDriverState *bs)
info->iops_size = cfg.op_size;
}
+ info->write_threshold = bdrv_write_threshold_get(bs);
+
return info;
}
diff --git a/block/write-threshold.c b/block/write-threshold.c
new file mode 100644
index 000000000..c2cd51771
--- /dev/null
+++ b/block/write-threshold.c
@@ -0,0 +1,125 @@
+/*
+ * QEMU System Emulator block write threshold notification
+ *
+ * Copyright Red Hat, Inc. 2014
+ *
+ * Authors:
+ * Francesco Romani <fromani@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ */
+
+#include "block/block_int.h"
+#include "block/coroutine.h"
+#include "block/write-threshold.h"
+#include "qemu/notify.h"
+#include "qapi-event.h"
+#include "qmp-commands.h"
+
+
+uint64_t bdrv_write_threshold_get(const BlockDriverState *bs)
+{
+ return bs->write_threshold_offset;
+}
+
+bool bdrv_write_threshold_is_set(const BlockDriverState *bs)
+{
+ return bs->write_threshold_offset > 0;
+}
+
+static void write_threshold_disable(BlockDriverState *bs)
+{
+ if (bdrv_write_threshold_is_set(bs)) {
+ notifier_with_return_remove(&bs->write_threshold_notifier);
+ bs->write_threshold_offset = 0;
+ }
+}
+
+uint64_t bdrv_write_threshold_exceeded(const BlockDriverState *bs,
+ const BdrvTrackedRequest *req)
+{
+ if (bdrv_write_threshold_is_set(bs)) {
+ if (req->offset > bs->write_threshold_offset) {
+ return (req->offset - bs->write_threshold_offset) + req->bytes;
+ }
+ if ((req->offset + req->bytes) > bs->write_threshold_offset) {
+ return (req->offset + req->bytes) - bs->write_threshold_offset;
+ }
+ }
+ return 0;
+}
+
+static int coroutine_fn before_write_notify(NotifierWithReturn *notifier,
+ void *opaque)
+{
+ BdrvTrackedRequest *req = opaque;
+ BlockDriverState *bs = req->bs;
+ uint64_t amount = 0;
+
+ amount = bdrv_write_threshold_exceeded(bs, req);
+ if (amount > 0) {
+ qapi_event_send_block_write_threshold(
+ bs->node_name,
+ amount,
+ bs->write_threshold_offset,
+ &error_abort);
+
+ /* autodisable to avoid flooding the monitor */
+ write_threshold_disable(bs);
+ }
+
+ return 0; /* should always let other notifiers run */
+}
+
+static void write_threshold_register_notifier(BlockDriverState *bs)
+{
+ bs->write_threshold_notifier.notify = before_write_notify;
+ notifier_with_return_list_add(&bs->before_write_notifiers,
+ &bs->write_threshold_notifier);
+}
+
+static void write_threshold_update(BlockDriverState *bs,
+ int64_t threshold_bytes)
+{
+ bs->write_threshold_offset = threshold_bytes;
+}
+
+void bdrv_write_threshold_set(BlockDriverState *bs, uint64_t threshold_bytes)
+{
+ if (bdrv_write_threshold_is_set(bs)) {
+ if (threshold_bytes > 0) {
+ write_threshold_update(bs, threshold_bytes);
+ } else {
+ write_threshold_disable(bs);
+ }
+ } else {
+ if (threshold_bytes > 0) {
+ /* avoid multiple registration */
+ write_threshold_register_notifier(bs);
+ write_threshold_update(bs, threshold_bytes);
+ }
+ /* discard bogus disable request */
+ }
+}
+
+void qmp_block_set_write_threshold(const char *node_name,
+ uint64_t threshold_bytes,
+ Error **errp)
+{
+ BlockDriverState *bs;
+ AioContext *aio_context;
+
+ bs = bdrv_find_node(node_name);
+ if (!bs) {
+ error_set(errp, QERR_DEVICE_NOT_FOUND, node_name);
+ return;
+ }
+
+ aio_context = bdrv_get_aio_context(bs);
+ aio_context_acquire(aio_context);
+
+ bdrv_write_threshold_set(bs, threshold_bytes);
+
+ aio_context_release(aio_context);
+}
diff --git a/include/block/block_int.h b/include/block/block_int.h
index e264be97b..7ad19503d 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -412,6 +412,10 @@ struct BlockDriverState {
/* The error object in use for blocking operations on backing_hd */
Error *backing_blocker;
+
+ /* threshold limit for writes, in bytes. "High water mark". */
+ uint64_t write_threshold_offset;
+ NotifierWithReturn write_threshold_notifier;
};
diff --git a/include/block/write-threshold.h b/include/block/write-threshold.h
new file mode 100644
index 000000000..f1b899cd5
--- /dev/null
+++ b/include/block/write-threshold.h
@@ -0,0 +1,64 @@
+/*
+ * QEMU System Emulator block write threshold notification
+ *
+ * Copyright Red Hat, Inc. 2014
+ *
+ * Authors:
+ * Francesco Romani <fromani@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ */
+#ifndef BLOCK_WRITE_THRESHOLD_H
+#define BLOCK_WRITE_THRESHOLD_H
+
+#include <stdint.h>
+
+#include "qemu/typedefs.h"
+#include "qemu-common.h"
+
+/*
+ * bdrv_write_threshold_set:
+ *
+ * Set the write threshold for block devices, in bytes.
+ * Notify when a write exceeds the threshold, meaning the device
+ * is becoming full, so it can be transparently resized.
+ * To be used with thin-provisioned block devices.
+ *
+ * Use threshold_bytes == 0 to disable.
+ */
+void bdrv_write_threshold_set(BlockDriverState *bs, uint64_t threshold_bytes);
+
+/*
+ * bdrv_write_threshold_get
+ *
+ * Get the configured write threshold, in bytes.
+ * Zero means no threshold configured.
+ */
+uint64_t bdrv_write_threshold_get(const BlockDriverState *bs);
+
+/*
+ * bdrv_write_threshold_is_set
+ *
+ * Tell if a write threshold is set for a given BDS.
+ */
+bool bdrv_write_threshold_is_set(const BlockDriverState *bs);
+
+/*
+ * bdrv_write_threshold_exceeded
+ *
+ * Return the extent of a write request that exceeded the threshold,
+ * or zero if the request is below the threshold.
+ * Return zero also if the threshold was not set.
+ *
+ * NOTE: here we assume the following holds for each request this code
+ * deals with:
+ *
+ * assert((req->offset + req->bytes) <= UINT64_MAX)
+ *
+ * Please not there is *not* an actual C assert().
+ */
+uint64_t bdrv_write_threshold_exceeded(const BlockDriverState *bs,
+ const BdrvTrackedRequest *req);
+
+#endif
diff --git a/qapi/block-core.json b/qapi/block-core.json
index b7d977244..a3fdaf02b 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -257,6 +257,9 @@
#
# @cache: the cache mode used for the block device (since: 2.3)
#
+# @write_threshold: configured write threshold for the device.
+# 0 if disabled. (Since 2.3)
+#
# Since: 0.14.0
#
##
@@ -271,7 +274,8 @@
'*bps_max': 'int', '*bps_rd_max': 'int',
'*bps_wr_max': 'int', '*iops_max': 'int',
'*iops_rd_max': 'int', '*iops_wr_max': 'int',
- '*iops_size': 'int', 'cache': 'BlockdevCacheInfo' } }
+ '*iops_size': 'int', 'cache': 'BlockdevCacheInfo',
+ 'write_threshold': 'int' } }
##
# @BlockDeviceIoStatus:
@@ -1917,3 +1921,48 @@
##
{ 'enum': 'PreallocMode',
'data': [ 'off', 'metadata', 'falloc', 'full' ] }
+
+##
+# @BLOCK_WRITE_THRESHOLD
+#
+# Emitted when writes on block device reaches or exceeds the
+# configured write threshold. For thin-provisioned devices, this
+# means the device should be extended to avoid pausing for
+# disk exhaustion.
+# The event is one shot. Once triggered, it needs to be
+# re-registered with another block-set-threshold command.
+#
+# @node-name: graph node name on which the threshold was exceeded.
+#
+# @amount-exceeded: amount of data which exceeded the threshold, in bytes.
+#
+# @write-threshold: last configured threshold, in bytes.
+#
+# Since: 2.3
+##
+{ 'event': 'BLOCK_WRITE_THRESHOLD',
+ 'data': { 'node-name': 'str',
+ 'amount-exceeded': 'uint64',
+ 'write-threshold': 'uint64' } }
+
+##
+# @block-set-write-threshold
+#
+# Change the write threshold for a block drive. An event will be delivered
+# if a write to this block drive crosses the configured threshold.
+# This is useful to transparently resize thin-provisioned drives without
+# the guest OS noticing.
+#
+# @node-name: graph node name on which the threshold must be set.
+#
+# @write-threshold: configured threshold for the block device, bytes.
+# Use 0 to disable the threshold.
+#
+# Returns: Nothing on success
+# If @node name is not found on the block device graph,
+# DeviceNotFound
+#
+# Since: 2.3
+##
+{ 'command': 'block-set-write-threshold',
+ 'data': { 'node-name': 'str', 'write-threshold': 'uint64' } }
diff --git a/qmp-commands.hx b/qmp-commands.hx
index af3fd1993..a85d8479e 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -2146,6 +2146,8 @@ Each json-object contain the following:
- "iops_size": I/O size when limiting by iops (json-int)
- "detect_zeroes": detect and optimize zero writing (json-string)
- Possible values: "off", "on", "unmap"
+ - "write_threshold": write offset threshold in bytes, a event will be
+ emitted if crossed. Zero if disabled (json-int)
- "image": the detail of the image, it is a json-object containing
the following:
- "filename": image file name (json-string)
@@ -2223,6 +2225,7 @@ Example:
"iops_wr_max": 0,
"iops_size": 0,
"detect_zeroes": "on",
+ "write_threshold": 0,
"image":{
"filename":"disks/test.qcow2",
"format":"qcow2",
@@ -3685,6 +3688,7 @@ Example:
"iops_rd_max": 0,
"iops_wr_max": 0,
"iops_size": 0,
+ "write_threshold": 0,
"image":{
"filename":"disks/test.qcow2",
"format":"qcow2",
@@ -3921,3 +3925,31 @@ Move mouse pointer to absolute coordinates (20000, 400).
<- { "return": {} }
EQMP
+
+ {
+ .name = "block-set-write-threshold",
+ .args_type = "node-name:s,write-threshold:l",
+ .mhandler.cmd_new = qmp_marshal_input_block_set_write_threshold,
+ },
+
+SQMP
+block-set-write-threshold
+------------
+
+Change the write threshold for a block drive. The threshold is an offset,
+thus must be non-negative. Default is no write threshold.
+Setting the threshold to zero disables it.
+
+Arguments:
+
+- "node-name": the node name in the block driver state graph (json-string)
+- "write-threshold": the write threshold in bytes (json-int)
+
+Example:
+
+-> { "execute": "block-set-write-threshold",
+ "arguments": { "node-name": "mydev",
+ "write-threshold": 17179869184 } }
+<- { "return": {} }
+
+EQMP
diff --git a/tests/Makefile b/tests/Makefile
index 5caccf765..d5df16882 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -68,6 +68,8 @@ check-unit-y += tests/check-qom-interface$(EXESUF)
gcov-files-check-qom-interface-y = qom/object.c
check-unit-y += tests/test-qemu-opts$(EXESUF)
gcov-files-test-qemu-opts-y = qom/test-qemu-opts.c
+check-unit-y += tests/test-write-threshold$(EXESUF)
+gcov-files-test-write-threshold-y = block/write-threshold.c
check-block-$(CONFIG_POSIX) += tests/qemu-iotests-quick.sh
@@ -360,6 +362,7 @@ tests/usb-hcd-xhci-test$(EXESUF): tests/usb-hcd-xhci-test.o $(libqos-usb-obj-y)
tests/vhost-user-test$(EXESUF): tests/vhost-user-test.o qemu-char.o qemu-timer.o $(qtest-obj-y)
tests/qemu-iotests/socket_scm_helper$(EXESUF): tests/qemu-iotests/socket_scm_helper.o
tests/test-qemu-opts$(EXESUF): tests/test-qemu-opts.o libqemuutil.a libqemustub.a
+tests/test-write-threshold$(EXESUF): tests/test-write-threshold.o $(block-obj-y) libqemuutil.a libqemustub.a
ifeq ($(CONFIG_POSIX),y)
LIBS += -lutil
diff --git a/tests/qemu-iotests/067.out b/tests/qemu-iotests/067.out
index 13ff3cd7a..00b3eaefc 100644
--- a/tests/qemu-iotests/067.out
+++ b/tests/qemu-iotests/067.out
@@ -43,6 +43,7 @@ Testing: -drive file=TEST_DIR/t.qcow2,format=qcow2,if=none,id=disk -device virti
"drv": "qcow2",
"iops": 0,
"bps_wr": 0,
+ "write_threshold": 0,
"encrypted": false,
"bps": 0,
"bps_rd": 0,
@@ -218,6 +219,7 @@ Testing: -drive file=TEST_DIR/t.qcow2,format=qcow2,if=none,id=disk
"drv": "qcow2",
"iops": 0,
"bps_wr": 0,
+ "write_threshold": 0,
"encrypted": false,
"bps": 0,
"bps_rd": 0,
@@ -423,6 +425,7 @@ Testing:
"drv": "qcow2",
"iops": 0,
"bps_wr": 0,
+ "write_threshold": 0,
"encrypted": false,
"bps": 0,
"bps_rd": 0,
@@ -607,6 +610,7 @@ Testing:
"drv": "qcow2",
"iops": 0,
"bps_wr": 0,
+ "write_threshold": 0,
"encrypted": false,
"bps": 0,
"bps_rd": 0,
@@ -717,6 +721,7 @@ Testing:
"drv": "qcow2",
"iops": 0,
"bps_wr": 0,
+ "write_threshold": 0,
"encrypted": false,
"bps": 0,
"bps_rd": 0,
diff --git a/tests/test-write-threshold.c b/tests/test-write-threshold.c
new file mode 100644
index 000000000..faffa7b85
--- /dev/null
+++ b/tests/test-write-threshold.c
@@ -0,0 +1,119 @@
+/*
+ * Test block device write threshold
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#include <glib.h>
+#include <stdint.h>
+#include "block/block_int.h"
+#include "block/write-threshold.h"
+
+
+static void test_threshold_not_set_on_init(void)
+{
+ uint64_t res;
+ BlockDriverState bs;
+ memset(&bs, 0, sizeof(bs));
+
+ g_assert(!bdrv_write_threshold_is_set(&bs));
+
+ res = bdrv_write_threshold_get(&bs);
+ g_assert_cmpint(res, ==, 0);
+}
+
+static void test_threshold_set_get(void)
+{
+ uint64_t threshold = 4 * 1024 * 1024;
+ uint64_t res;
+ BlockDriverState bs;
+ memset(&bs, 0, sizeof(bs));
+
+ bdrv_write_threshold_set(&bs, threshold);
+
+ g_assert(bdrv_write_threshold_is_set(&bs));
+
+ res = bdrv_write_threshold_get(&bs);
+ g_assert_cmpint(res, ==, threshold);
+}
+
+static void test_threshold_multi_set_get(void)
+{
+ uint64_t threshold1 = 4 * 1024 * 1024;
+ uint64_t threshold2 = 15 * 1024 * 1024;
+ uint64_t res;
+ BlockDriverState bs;
+ memset(&bs, 0, sizeof(bs));
+
+ bdrv_write_threshold_set(&bs, threshold1);
+ bdrv_write_threshold_set(&bs, threshold2);
+ res = bdrv_write_threshold_get(&bs);
+ g_assert_cmpint(res, ==, threshold2);
+}
+
+static void test_threshold_not_trigger(void)
+{
+ uint64_t amount = 0;
+ uint64_t threshold = 4 * 1024 * 1024;
+ BlockDriverState bs;
+ BdrvTrackedRequest req;
+
+ memset(&bs, 0, sizeof(bs));
+ memset(&req, 0, sizeof(req));
+ req.offset = 1024;
+ req.bytes = 1024;
+
+ bdrv_write_threshold_set(&bs, threshold);
+ amount = bdrv_write_threshold_exceeded(&bs, &req);
+ g_assert_cmpuint(amount, ==, 0);
+}
+
+
+static void test_threshold_trigger(void)
+{
+ uint64_t amount = 0;
+ uint64_t threshold = 4 * 1024 * 1024;
+ BlockDriverState bs;
+ BdrvTrackedRequest req;
+
+ memset(&bs, 0, sizeof(bs));
+ memset(&req, 0, sizeof(req));
+ req.offset = (4 * 1024 * 1024) - 1024;
+ req.bytes = 2 * 1024;
+
+ bdrv_write_threshold_set(&bs, threshold);
+ amount = bdrv_write_threshold_exceeded(&bs, &req);
+ g_assert_cmpuint(amount, >=, 1024);
+}
+
+typedef struct TestStruct {
+ const char *name;
+ void (*func)(void);
+} TestStruct;
+
+
+int main(int argc, char **argv)
+{
+ size_t i;
+ TestStruct tests[] = {
+ { "/write-threshold/not-set-on-init",
+ test_threshold_not_set_on_init },
+ { "/write-threshold/set-get",
+ test_threshold_set_get },
+ { "/write-threshold/multi-set-get",
+ test_threshold_multi_set_get },
+ { "/write-threshold/not-trigger",
+ test_threshold_not_trigger },
+ { "/write-threshold/trigger",
+ test_threshold_trigger },
+ { NULL, NULL }
+ };
+
+ g_test_init(&argc, &argv, NULL);
+ for (i = 0; tests[i].name != NULL; i++) {
+ g_test_add_func(tests[i].name, tests[i].func);
+ }
+ return g_test_run();
+}