2 * storage_backend_rbd.c: storage backend for RBD (RADOS Block Device) handling
4 * Copyright (C) 2013-2016 Red Hat, Inc.
5 * Copyright (C) 2012 Wido den Hollander
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with this library. If not, see
19 * <http://www.gnu.org/licenses/>.
25 #include "datatypes.h"
27 #include "storage_backend_rbd.h"
28 #include "storage_conf.h"
33 #include "virstring.h"
34 #include "virrandom.h"
35 #include "rados/librados.h"
36 #include "rbd/librbd.h"
37 #include "secret_util.h"
38 #include "storage_util.h"
39 #include <libxml/xpathInternals.h>
41 #define VIR_FROM_THIS VIR_FROM_STORAGE
43 VIR_LOG_INIT("storage.storage_backend_rbd");
45 struct _virStorageBackendRBDState
{
51 typedef struct _virStorageBackendRBDState virStorageBackendRBDState
;
52 typedef virStorageBackendRBDState
*virStorageBackendRBDStatePtr
;
54 typedef struct _virStoragePoolRBDConfigOptionsDef virStoragePoolRBDConfigOptionsDef
;
55 typedef virStoragePoolRBDConfigOptionsDef
*virStoragePoolRBDConfigOptionsDefPtr
;
56 struct _virStoragePoolRBDConfigOptionsDef
{
62 #define STORAGE_POOL_RBD_NAMESPACE_HREF "http://libvirt.org/schemas/storagepool/rbd/1.0"
65 virStoragePoolDefRBDNamespaceFree(void *nsdata
)
67 virStoragePoolRBDConfigOptionsDefPtr cmdopts
= nsdata
;
73 for (i
= 0; i
< cmdopts
->noptions
; i
++) {
74 VIR_FREE(cmdopts
->names
[i
]);
75 VIR_FREE(cmdopts
->values
[i
]);
77 VIR_FREE(cmdopts
->names
);
78 VIR_FREE(cmdopts
->values
);
85 virStoragePoolDefRBDNamespaceParse(xmlXPathContextPtr ctxt
,
88 virStoragePoolRBDConfigOptionsDefPtr cmdopts
= NULL
;
92 VIR_AUTOFREE(xmlNodePtr
*)nodes
= NULL
;
94 if (xmlXPathRegisterNs(ctxt
, BAD_CAST
"rbd",
95 BAD_CAST STORAGE_POOL_RBD_NAMESPACE_HREF
) < 0) {
96 virReportError(VIR_ERR_INTERNAL_ERROR
,
97 _("Failed to register xml namespace '%s'"),
98 STORAGE_POOL_RBD_NAMESPACE_HREF
);
102 nnodes
= virXPathNodeSet("./rbd:config_opts/rbd:option", ctxt
, &nodes
);
109 if (VIR_ALLOC(cmdopts
) < 0)
112 if (VIR_ALLOC_N(cmdopts
->names
, nnodes
) < 0 ||
113 VIR_ALLOC_N(cmdopts
->values
, nnodes
) < 0)
116 for (i
= 0; i
< nnodes
; i
++) {
117 if (!(cmdopts
->names
[cmdopts
->noptions
] =
118 virXMLPropString(nodes
[i
], "name"))) {
119 virReportError(VIR_ERR_XML_ERROR
, "%s",
120 _("no rbd option name specified"));
123 if (*cmdopts
->names
[cmdopts
->noptions
] == '\0') {
124 virReportError(VIR_ERR_XML_ERROR
, "%s",
125 _("empty rbd option name specified"));
128 if (!(cmdopts
->values
[cmdopts
->noptions
] =
129 virXMLPropString(nodes
[i
], "value"))) {
130 virReportError(VIR_ERR_XML_ERROR
,
131 _("no rbd option value specified for name '%s'"),
132 cmdopts
->names
[cmdopts
->noptions
]);
135 if (*cmdopts
->values
[cmdopts
->noptions
] == '\0') {
136 virReportError(VIR_ERR_XML_ERROR
,
137 _("empty rbd option value specified for name '%s'"),
138 cmdopts
->names
[cmdopts
->noptions
]);
144 VIR_STEAL_PTR(*data
, cmdopts
);
148 virStoragePoolDefRBDNamespaceFree(cmdopts
);
154 virStoragePoolDefRBDNamespaceFormatXML(virBufferPtr buf
,
158 virStoragePoolRBDConfigOptionsDefPtr def
= nsdata
;
163 virBufferAddLit(buf
, "<rbd:config_opts>\n");
164 virBufferAdjustIndent(buf
, 2);
166 for (i
= 0; i
< def
->noptions
; i
++) {
167 virBufferEscapeString(buf
, "<rbd:option name='%s' ", def
->names
[i
]);
168 virBufferEscapeString(buf
, "value='%s'/>\n", def
->values
[i
]);
171 virBufferAdjustIndent(buf
, -2);
172 virBufferAddLit(buf
, "</rbd:config_opts>\n");
179 virStoragePoolDefRBDNamespaceHref(void)
181 return "xmlns:rbd='" STORAGE_POOL_RBD_NAMESPACE_HREF
"'";
186 virStorageBackendRBDRADOSConfSet(rados_t cluster
,
190 VIR_DEBUG("Setting RADOS option '%s' to '%s'",
192 if (rados_conf_set(cluster
, option
, value
) < 0) {
193 virReportError(VIR_ERR_INTERNAL_ERROR
,
194 _("failed to set RADOS option: %s"),
203 virStorageBackendRBDOpenRADOSConn(virStorageBackendRBDStatePtr ptr
,
204 virStoragePoolDefPtr def
)
208 virStoragePoolSourcePtr source
= &def
->source
;
209 virStorageAuthDefPtr authdef
= source
->auth
;
210 unsigned char *secret_value
= NULL
;
211 size_t secret_value_size
= 0;
212 VIR_AUTODISPOSE_STR rados_key
= NULL
;
213 virBuffer mon_host
= VIR_BUFFER_INITIALIZER
;
215 const char *client_mount_timeout
= "30";
216 const char *mon_op_timeout
= "30";
217 const char *osd_op_timeout
= "30";
218 const char *rbd_default_format
= "2";
219 virConnectPtr conn
= NULL
;
220 VIR_AUTOFREE(char *) mon_buff
= NULL
;
223 VIR_DEBUG("Using cephx authorization, username: %s", authdef
->username
);
225 if ((r
= rados_create(&ptr
->cluster
, authdef
->username
)) < 0) {
226 virReportSystemError(-r
, "%s", _("failed to initialize RADOS"));
230 conn
= virGetConnectSecret();
234 if (virSecretGetSecretString(conn
, &authdef
->seclookupdef
,
235 VIR_SECRET_USAGE_TYPE_CEPH
,
236 &secret_value
, &secret_value_size
) < 0)
239 if (!(rados_key
= virStringEncodeBase64(secret_value
, secret_value_size
)))
242 if (virStorageBackendRBDRADOSConfSet(ptr
->cluster
,
243 "key", rados_key
) < 0)
246 if (virStorageBackendRBDRADOSConfSet(ptr
->cluster
,
247 "auth_supported", "cephx") < 0)
250 VIR_DEBUG("Not using cephx authorization");
251 if (rados_create(&ptr
->cluster
, NULL
) < 0) {
252 virReportError(VIR_ERR_INTERNAL_ERROR
, "%s",
253 _("failed to create the RADOS cluster"));
256 if (virStorageBackendRBDRADOSConfSet(ptr
->cluster
,
257 "auth_supported", "none") < 0)
261 VIR_DEBUG("Found %zu RADOS cluster monitors in the pool configuration",
264 /* combine host and port into portal */
265 for (i
= 0; i
< source
->nhost
; i
++) {
266 if (source
->hosts
[i
].name
!= NULL
&&
267 !source
->hosts
[i
].port
) {
268 virBufferAsprintf(&mon_host
, "%s,",
269 source
->hosts
[i
].name
);
270 } else if (source
->hosts
[i
].name
!= NULL
&&
271 source
->hosts
[i
].port
) {
272 const char *incFormat
;
273 if (virSocketAddrNumericFamily(source
->hosts
[i
].name
) == AF_INET6
) {
274 /* IPv6 address must be escaped in brackets on the cmd line */
275 incFormat
= "[%s]:%d,";
277 /* listenAddress is a hostname or IPv4 */
278 incFormat
= "%s:%d,";
280 virBufferAsprintf(&mon_host
, incFormat
,
281 source
->hosts
[i
].name
,
282 source
->hosts
[i
].port
);
284 virReportError(VIR_ERR_INTERNAL_ERROR
, "%s",
285 _("received malformed monitor, check the XML definition"));
289 if (virBufferCheckError(&mon_host
) < 0)
292 mon_buff
= virBufferContentAndReset(&mon_host
);
293 if (virStorageBackendRBDRADOSConfSet(ptr
->cluster
,
299 * Set timeout options for librados.
300 * In case the Ceph cluster is down libvirt won't block forever.
301 * Operations in librados will return -ETIMEDOUT when the timeout is reached.
303 if (virStorageBackendRBDRADOSConfSet(ptr
->cluster
,
304 "client_mount_timeout",
305 client_mount_timeout
) < 0)
308 if (virStorageBackendRBDRADOSConfSet(ptr
->cluster
,
309 "rados_mon_op_timeout",
313 if (virStorageBackendRBDRADOSConfSet(ptr
->cluster
,
314 "rados_osd_op_timeout",
319 * Librbd supports creating RBD format 2 images. We no longer have to invoke
320 * rbd_create3(), we can tell librbd to default to format 2.
321 * This leaves us to simply use rbd_create() and use the default behavior of librbd
323 if (virStorageBackendRBDRADOSConfSet(ptr
->cluster
,
324 "rbd_default_format",
325 rbd_default_format
) < 0)
328 if (def
->namespaceData
) {
329 virStoragePoolRBDConfigOptionsDefPtr cmdopts
= def
->namespaceData
;
330 char uuidstr
[VIR_UUID_STRING_BUFLEN
];
332 for (i
= 0; i
< cmdopts
->noptions
; i
++) {
333 if (virStorageBackendRBDRADOSConfSet(ptr
->cluster
,
335 cmdopts
->values
[i
]) < 0)
339 virUUIDFormat(def
->uuid
, uuidstr
);
340 VIR_WARN("Storage Pool name='%s' uuid='%s' is tainted by custom "
341 "config_opts from XML", def
->name
, uuidstr
);
344 ptr
->starttime
= time(0);
345 if ((r
= rados_connect(ptr
->cluster
)) < 0) {
346 virReportSystemError(-r
, _("failed to connect to the RADOS monitor on: %s"),
354 VIR_DISPOSE_N(secret_value
, secret_value_size
);
356 virObjectUnref(conn
);
357 virBufferFreeAndReset(&mon_host
);
362 virStorageBackendRBDOpenIoCTX(virStorageBackendRBDStatePtr ptr
,
363 virStoragePoolObjPtr pool
)
365 virStoragePoolDefPtr def
= virStoragePoolObjGetDef(pool
);
366 int r
= rados_ioctx_create(ptr
->cluster
, def
->source
.name
, &ptr
->ioctx
);
368 virReportSystemError(-r
, _("failed to create the RBD IoCTX. Does the pool '%s' exist?"),
375 virStorageBackendRBDCloseRADOSConn(virStorageBackendRBDStatePtr ptr
)
377 if (ptr
->ioctx
!= NULL
) {
378 VIR_DEBUG("Closing RADOS IoCTX");
379 rados_ioctx_destroy(ptr
->ioctx
);
383 if (ptr
->cluster
!= NULL
) {
384 VIR_DEBUG("Closing RADOS connection");
385 rados_shutdown(ptr
->cluster
);
389 VIR_DEBUG("RADOS connection existed for %ld seconds",
390 time(0) - ptr
->starttime
);
395 virStorageBackendRBDFreeState(virStorageBackendRBDStatePtr
*ptr
)
400 virStorageBackendRBDCloseRADOSConn(*ptr
);
406 static virStorageBackendRBDStatePtr
407 virStorageBackendRBDNewState(virStoragePoolObjPtr pool
)
409 virStorageBackendRBDStatePtr ptr
;
410 virStoragePoolDefPtr def
= virStoragePoolObjGetDef(pool
);
412 if (VIR_ALLOC(ptr
) < 0)
415 if (virStorageBackendRBDOpenRADOSConn(ptr
, def
) < 0)
418 if (virStorageBackendRBDOpenIoCTX(ptr
, pool
) < 0)
424 virStorageBackendRBDFreeState(&ptr
);
430 volStorageBackendRBDGetFeatures(rbd_image_t image
,
436 if ((r
= rbd_get_features(image
, features
)) < 0) {
437 virReportSystemError(-r
, _("failed to get the features of RBD image "
447 #if LIBRBD_VERSION_CODE > 265
449 volStorageBackendRBDGetFlags(rbd_image_t image
,
455 if ((rc
= rbd_get_flags(image
, flags
)) < 0) {
456 virReportSystemError(-rc
,
457 _("failed to get the flags of RBD image %s"),
466 volStorageBackendRBDUseFastDiff(uint64_t features
, uint64_t flags
)
468 return (((features
& RBD_FEATURE_FAST_DIFF
) != 0ULL) &&
469 ((flags
& RBD_FLAG_FAST_DIFF_INVALID
) == 0ULL));
473 virStorageBackendRBDRefreshVolInfoCb(uint64_t offset ATTRIBUTE_UNUSED
,
478 size_t *used_size
= (size_t *)(arg
);
486 virStorageBackendRBDSetAllocation(virStorageVolDefPtr vol
,
488 rbd_image_info_t
*info
)
491 size_t allocation
= 0;
493 if ((r
= rbd_diff_iterate2(image
, NULL
, 0, info
->size
, 0, 1,
494 &virStorageBackendRBDRefreshVolInfoCb
,
496 virReportSystemError(-r
, _("failed to iterate RBD image '%s'"),
501 VIR_DEBUG("Found %zu bytes allocated for RBD image %s",
502 allocation
, vol
->name
);
504 vol
->target
.allocation
= allocation
;
513 volStorageBackendRBDGetFlags(rbd_image_t image ATTRIBUTE_UNUSED
,
514 const char *volname ATTRIBUTE_UNUSED
,
522 volStorageBackendRBDUseFastDiff(uint64_t features ATTRIBUTE_UNUSED
,
523 uint64_t feature_flags ATTRIBUTE_UNUSED
)
529 virStorageBackendRBDSetAllocation(virStorageVolDefPtr vol ATTRIBUTE_UNUSED
,
530 rbd_image_t
*image ATTRIBUTE_UNUSED
,
531 rbd_image_info_t
*info ATTRIBUTE_UNUSED
)
538 volStorageBackendRBDRefreshVolInfo(virStorageVolDefPtr vol
,
539 virStoragePoolObjPtr pool
,
540 virStorageBackendRBDStatePtr ptr
)
543 virStoragePoolDefPtr def
= virStoragePoolObjGetDef(pool
);
545 rbd_image_t image
= NULL
;
546 rbd_image_info_t info
;
550 if ((r
= rbd_open_read_only(ptr
->ioctx
, vol
->name
, &image
, NULL
)) < 0) {
552 virReportSystemError(-r
, _("failed to open the RBD image '%s'"),
557 if ((r
= rbd_stat(image
, &info
, sizeof(info
))) < 0) {
559 virReportSystemError(-r
, _("failed to stat the RBD image '%s'"),
564 if (volStorageBackendRBDGetFeatures(image
, vol
->name
, &features
) < 0)
567 if (volStorageBackendRBDGetFlags(image
, vol
->name
, &flags
) < 0)
570 vol
->target
.capacity
= info
.size
;
571 vol
->type
= VIR_STORAGE_VOL_NETWORK
;
572 vol
->target
.format
= VIR_STORAGE_FILE_RAW
;
575 def
->refresh
->volume
.allocation
== VIR_STORAGE_VOL_DEF_REFRESH_ALLOCATION_DEFAULT
&&
576 volStorageBackendRBDUseFastDiff(features
, flags
)) {
577 VIR_DEBUG("RBD image %s/%s has fast-diff feature enabled. "
578 "Querying for actual allocation",
579 def
->source
.name
, vol
->name
);
581 if (virStorageBackendRBDSetAllocation(vol
, image
, &info
) < 0)
584 vol
->target
.allocation
= info
.obj_size
* info
.num_objs
;
587 VIR_DEBUG("Refreshed RBD image %s/%s (capacity: %llu allocation: %llu "
588 "obj_size: %"PRIu64
" num_objs: %"PRIu64
")",
589 def
->source
.name
, vol
->name
, vol
->target
.capacity
,
590 vol
->target
.allocation
, info
.obj_size
, info
.num_objs
);
592 VIR_FREE(vol
->target
.path
);
593 if (virAsprintf(&vol
->target
.path
, "%s/%s",
594 def
->source
.name
, vol
->name
) < 0)
598 if (virAsprintf(&vol
->key
, "%s/%s",
599 def
->source
.name
, vol
->name
) < 0)
611 #ifdef HAVE_RBD_LIST2
613 virStorageBackendRBDGetVolNames(virStorageBackendRBDStatePtr ptr
)
618 rbd_image_spec_t
*images
= NULL
;
623 if (VIR_REALLOC_N(images
, nimages
) < 0)
626 rc
= rbd_list2(ptr
->ioctx
, images
, &nimages
);
630 virReportSystemError(-rc
, "%s", _("Unable to list RBD images"));
635 if (VIR_ALLOC_N(names
, nimages
+ 1) < 0)
639 for (i
= 0; i
< nimages
; i
++)
640 VIR_STEAL_PTR(names
[i
], images
[i
].name
);
645 virStringListFreeCount(names
, nnames
);
646 rbd_image_spec_list_cleanup(images
, nimages
);
651 #else /* ! HAVE_RBD_LIST2 */
654 virStorageBackendRBDGetVolNames(virStorageBackendRBDStatePtr ptr
)
659 size_t max_size
= 1024;
660 VIR_AUTOFREE(char *) namebuf
= NULL
;
664 if (VIR_ALLOC_N(namebuf
, max_size
) < 0)
667 rc
= rbd_list(ptr
->ioctx
, namebuf
, &max_size
);
671 virReportSystemError(-rc
, "%s", _("Unable to list RBD images"));
677 for (name
= namebuf
; name
< namebuf
+ max_size
;) {
678 VIR_AUTOFREE(char *) namedup
= NULL
;
683 if (VIR_STRDUP(namedup
, name
) < 0)
686 if (VIR_APPEND_ELEMENT(names
, nnames
, namedup
) < 0)
689 name
+= strlen(name
) + 1;
692 if (VIR_EXPAND_N(names
, nnames
, 1) < 0)
698 virStringListFreeCount(names
, nnames
);
701 #endif /* ! HAVE_RBD_LIST2 */
705 virStorageBackendRBDRefreshPool(virStoragePoolObjPtr pool
)
709 virStoragePoolDefPtr def
= virStoragePoolObjGetDef(pool
);
710 virStorageBackendRBDStatePtr ptr
= NULL
;
711 struct rados_cluster_stat_t clusterstat
;
712 struct rados_pool_stat_t poolstat
;
716 if (!(ptr
= virStorageBackendRBDNewState(pool
)))
719 if ((r
= rados_cluster_stat(ptr
->cluster
, &clusterstat
)) < 0) {
720 virReportSystemError(-r
, "%s", _("failed to stat the RADOS cluster"));
724 if ((r
= rados_ioctx_pool_stat(ptr
->ioctx
, &poolstat
)) < 0) {
725 virReportSystemError(-r
, _("failed to stat the RADOS pool '%s'"),
730 def
->capacity
= clusterstat
.kb
* 1024;
731 def
->available
= clusterstat
.kb_avail
* 1024;
732 def
->allocation
= poolstat
.num_bytes
;
734 VIR_DEBUG("Utilization of RBD pool %s: (kb: %"PRIu64
" kb_avail: %"PRIu64
735 " num_bytes: %"PRIu64
")",
736 def
->source
.name
, clusterstat
.kb
, clusterstat
.kb_avail
,
739 if (!(names
= virStorageBackendRBDGetVolNames(ptr
)))
742 for (i
= 0; names
[i
] != NULL
; i
++) {
743 VIR_AUTOPTR(virStorageVolDef
) vol
= NULL
;
745 if (VIR_ALLOC(vol
) < 0)
748 VIR_STEAL_PTR(vol
->name
, names
[i
]);
750 r
= volStorageBackendRBDRefreshVolInfo(vol
, pool
, ptr
);
752 /* It could be that a volume has been deleted through a different route
753 * then libvirt and that will cause a -ENOENT to be returned.
755 * Another possibility is that there is something wrong with the placement
756 * group (PG) that RBD image's header is in and that causes -ETIMEDOUT
759 * Do not error out and simply ignore the volume
762 if (r
== -ENOENT
|| r
== -ETIMEDOUT
)
768 if (virStoragePoolObjAddVol(pool
, vol
) < 0)
773 VIR_DEBUG("Found %zu images in RBD pool %s",
774 virStoragePoolObjGetVolumesCount(pool
), def
->source
.name
);
779 virStringListFree(names
);
780 virStorageBackendRBDFreeState(&ptr
);
785 virStorageBackendRBDCleanupSnapshots(rados_ioctx_t ioctx
,
786 virStoragePoolSourcePtr source
,
787 virStorageVolDefPtr vol
)
792 int snap_count
, protected;
794 rbd_image_t image
= NULL
;
795 VIR_AUTOFREE(rbd_snap_info_t
*) snaps
= NULL
;
797 if ((r
= rbd_open(ioctx
, vol
->name
, &image
, NULL
)) < 0) {
798 virReportSystemError(-r
, _("failed to open the RBD image '%s'"),
804 if (VIR_ALLOC_N(snaps
, max_snaps
))
807 snap_count
= rbd_snap_list(image
, snaps
, &max_snaps
);
811 } while (snap_count
== -ERANGE
);
813 VIR_DEBUG("Found %d snapshots for volume %s/%s", snap_count
,
814 source
->name
, vol
->name
);
816 for (i
= 0; i
< snap_count
; i
++) {
817 if ((r
= rbd_snap_is_protected(image
, snaps
[i
].name
, &protected)) < 0) {
818 virReportSystemError(-r
, _("failed to verify if snapshot '%s/%s@%s' is protected"),
819 source
->name
, vol
->name
,
824 if (protected == 1) {
825 VIR_DEBUG("Snapshot %s/%s@%s is protected needs to be "
826 "unprotected", source
->name
, vol
->name
,
829 if ((r
= rbd_snap_unprotect(image
, snaps
[i
].name
)) < 0) {
830 virReportSystemError(-r
, _("failed to unprotect snapshot '%s/%s@%s'"),
831 source
->name
, vol
->name
,
837 VIR_DEBUG("Removing snapshot %s/%s@%s", source
->name
,
838 vol
->name
, snaps
[i
].name
);
840 if ((r
= rbd_snap_remove(image
, snaps
[i
].name
)) < 0) {
841 virReportSystemError(-r
, _("failed to remove snapshot '%s/%s@%s'"),
842 source
->name
, vol
->name
,
852 rbd_snap_list_end(snaps
);
861 virStorageBackendRBDDeleteVol(virStoragePoolObjPtr pool
,
862 virStorageVolDefPtr vol
,
867 virStoragePoolDefPtr def
= virStoragePoolObjGetDef(pool
);
868 virStorageBackendRBDStatePtr ptr
= NULL
;
870 virCheckFlags(VIR_STORAGE_VOL_DELETE_ZEROED
|
871 VIR_STORAGE_VOL_DELETE_WITH_SNAPSHOTS
, -1);
873 VIR_DEBUG("Removing RBD image %s/%s", def
->source
.name
, vol
->name
);
875 if (flags
& VIR_STORAGE_VOL_DELETE_ZEROED
)
876 VIR_WARN("%s", "This storage backend does not support zeroed removal of volumes");
878 if (!(ptr
= virStorageBackendRBDNewState(pool
)))
881 if (flags
& VIR_STORAGE_VOL_DELETE_WITH_SNAPSHOTS
) {
882 if (virStorageBackendRBDCleanupSnapshots(ptr
->ioctx
, &def
->source
,
887 VIR_DEBUG("Removing volume %s/%s", def
->source
.name
, vol
->name
);
889 r
= rbd_remove(ptr
->ioctx
, vol
->name
);
890 if (r
< 0 && (-r
) != ENOENT
) {
891 virReportSystemError(-r
, _("failed to remove volume '%s/%s'"),
892 def
->source
.name
, vol
->name
);
899 virStorageBackendRBDFreeState(&ptr
);
905 virStorageBackendRBDCreateVol(virStoragePoolObjPtr pool
,
906 virStorageVolDefPtr vol
)
908 virStoragePoolDefPtr def
= virStoragePoolObjGetDef(pool
);
910 vol
->type
= VIR_STORAGE_VOL_NETWORK
;
912 if (vol
->target
.format
!= VIR_STORAGE_FILE_RAW
) {
913 virReportError(VIR_ERR_CONFIG_UNSUPPORTED
, "%s",
914 _("only RAW volumes are supported by this storage pool"));
918 VIR_FREE(vol
->target
.path
);
919 if (virAsprintf(&vol
->target
.path
, "%s/%s",
920 def
->source
.name
, vol
->name
) < 0)
924 if (virAsprintf(&vol
->key
, "%s/%s",
925 def
->source
.name
, vol
->name
) < 0)
931 static int virStorageBackendRBDCreateImage(rados_ioctx_t io
,
932 char *name
, long capacity
)
935 return rbd_create(io
, name
, capacity
, &order
);
939 virStorageBackendRBDBuildVol(virStoragePoolObjPtr pool
,
940 virStorageVolDefPtr vol
,
943 virStoragePoolDefPtr def
= virStoragePoolObjGetDef(pool
);
944 virStorageBackendRBDStatePtr ptr
= NULL
;
948 VIR_DEBUG("Creating RBD image %s/%s with size %llu",
949 def
->source
.name
, vol
->name
, vol
->target
.capacity
);
951 virCheckFlags(0, -1);
953 if (!vol
->target
.capacity
) {
954 virReportError(VIR_ERR_NO_SUPPORT
, "%s",
955 _("volume capacity required for this storage pool"));
959 if (vol
->target
.format
!= VIR_STORAGE_FILE_RAW
) {
960 virReportError(VIR_ERR_NO_SUPPORT
, "%s",
961 _("only RAW volumes are supported by this storage pool"));
965 if (vol
->target
.encryption
!= NULL
) {
966 virReportError(VIR_ERR_CONFIG_UNSUPPORTED
, "%s",
967 _("storage pool does not support encrypted volumes"));
971 if (!(ptr
= virStorageBackendRBDNewState(pool
)))
974 if ((r
= virStorageBackendRBDCreateImage(ptr
->ioctx
, vol
->name
,
975 vol
->target
.capacity
)) < 0) {
976 virReportSystemError(-r
, _("failed to create volume '%s/%s'"),
977 def
->source
.name
, vol
->name
);
984 virStorageBackendRBDFreeState(&ptr
);
989 virStorageBackendRBDImageInfo(rbd_image_t image
,
992 uint64_t *stripe_unit
,
993 uint64_t *stripe_count
)
999 if ((r
= rbd_get_old_format(image
, &oldformat
)) < 0) {
1000 virReportSystemError(-r
, _("failed to get the format of RBD image %s"),
1005 if (oldformat
!= 0) {
1006 virReportError(VIR_ERR_OPERATION_UNSUPPORTED
,
1007 _("RBD image %s is old format. Does not support "
1008 "extended features and striping"),
1013 if (volStorageBackendRBDGetFeatures(image
, volname
, features
) < 0)
1016 if ((r
= rbd_get_stripe_unit(image
, stripe_unit
)) < 0) {
1017 virReportSystemError(-r
, _("failed to get the stripe unit of RBD image %s"),
1022 if ((r
= rbd_get_stripe_count(image
, stripe_count
)) < 0) {
1023 virReportSystemError(-r
, _("failed to get the stripe count of RBD image %s"),
1034 /* Callback function for rbd_diff_iterate() */
1036 virStorageBackendRBDIterateCb(uint64_t offset ATTRIBUTE_UNUSED
,
1037 size_t length ATTRIBUTE_UNUSED
,
1038 int exists ATTRIBUTE_UNUSED
,
1042 * Just set that there is a diff for this snapshot, we do not care where
1044 * When it returns a negative number the rbd_diff_iterate() function will stop
1046 * That's why we return -1, meaning that there is a difference and we can stop
1047 * searching any further.
1054 virStorageBackendRBDSnapshotFindNoDiff(rbd_image_t image
,
1056 virBufferPtr snapname
)
1061 int max_snaps
= 128;
1064 rbd_image_info_t info
;
1065 VIR_AUTOFREE(rbd_snap_info_t
*) snaps
= NULL
;
1067 if ((r
= rbd_stat(image
, &info
, sizeof(info
))) < 0) {
1068 virReportSystemError(-r
, _("failed to stat the RBD image %s"),
1074 if (VIR_ALLOC_N(snaps
, max_snaps
))
1077 snap_count
= rbd_snap_list(image
, snaps
, &max_snaps
);
1078 if (snap_count
<= 0)
1081 } while (snap_count
== -ERANGE
);
1083 if (snap_count
<= 0) {
1084 if (snap_count
== 0)
1089 VIR_DEBUG("Found %d snapshots for RBD image %s", snap_count
, imgname
);
1091 for (i
= 0; i
< snap_count
; i
++) {
1092 VIR_DEBUG("Querying diff for RBD snapshot %s@%s", imgname
,
1095 /* The callback will set diff to non-zero if there is a diff */
1099 * rbd_diff_iterate2() is available in versions above Ceph 0.94 (Hammer)
1100 * It uses a object map inside Ceph which is faster than rbd_diff_iterate()
1101 * which iterates all objects.
1102 * LIBRBD_VERSION_CODE for Ceph 0.94 is 265. In 266 and upwards diff_iterate2
1105 #if LIBRBD_VERSION_CODE > 265
1106 r
= rbd_diff_iterate2(image
, snaps
[i
].name
, 0, info
.size
, 0, 1,
1107 virStorageBackendRBDIterateCb
, (void *)&diff
);
1109 r
= rbd_diff_iterate(image
, snaps
[i
].name
, 0, info
.size
,
1110 virStorageBackendRBDIterateCb
, (void *)&diff
);
1114 virReportSystemError(-r
, _("failed to iterate RBD snapshot %s@%s"),
1115 imgname
, snaps
[i
].name
);
1119 /* If diff is still set to zero we found a snapshot without deltas */
1121 VIR_DEBUG("RBD snapshot %s@%s has no delta", imgname
,
1123 virBufferAsprintf(snapname
, "%s", snaps
[i
].name
);
1128 VIR_DEBUG("RBD snapshot %s@%s has deltas. Continuing search.",
1129 imgname
, snaps
[i
].name
);
1136 rbd_snap_list_end(snaps
);
1142 virStorageBackendRBDSnapshotCreate(rbd_image_t image
,
1149 VIR_DEBUG("Creating RBD snapshot %s@%s", imgname
, snapname
);
1151 if ((r
= rbd_snap_create(image
, snapname
)) < 0) {
1152 virReportSystemError(-r
, _("failed to create RBD snapshot %s@%s"),
1164 virStorageBackendRBDSnapshotProtect(rbd_image_t image
,
1172 VIR_DEBUG("Querying if RBD snapshot %s@%s is protected", imgname
, snapname
);
1174 if ((r
= rbd_snap_is_protected(image
, snapname
, &protected)) < 0) {
1175 virReportSystemError(-r
, _("failed to verify if RBD snapshot %s@%s "
1176 "is protected"), imgname
, snapname
);
1180 if (protected == 0) {
1181 VIR_DEBUG("RBD Snapshot %s@%s is not protected, protecting",
1184 if ((r
= rbd_snap_protect(image
, snapname
)) < 0) {
1185 virReportSystemError(-r
, _("failed to protect RBD snapshot %s@%s"),
1190 VIR_DEBUG("RBD Snapshot %s@%s is already protected", imgname
, snapname
);
1200 virStorageBackendRBDCloneImage(rados_ioctx_t io
,
1208 uint64_t stripe_count
;
1209 uint64_t stripe_unit
;
1210 virBuffer snapname
= VIR_BUFFER_INITIALIZER
;
1211 rbd_image_t image
= NULL
;
1212 VIR_AUTOFREE(char *) snapname_buff
= NULL
;
1214 if ((r
= rbd_open(io
, origvol
, &image
, NULL
)) < 0) {
1215 virReportSystemError(-r
, _("failed to open the RBD image %s"),
1220 if ((virStorageBackendRBDImageInfo(image
, origvol
, &features
, &stripe_unit
,
1221 &stripe_count
)) < 0)
1225 * First we attempt to find a snapshot which has no differences between
1226 * the current state of the RBD image.
1228 * This prevents us from creating a new snapshot for every clone operation
1229 * while it could be that the original volume has not changed
1231 if (virStorageBackendRBDSnapshotFindNoDiff(image
, origvol
, &snapname
) < 0)
1235 * the virBuffer snapname will contain a snapshot's name if one without
1236 * deltas has been found.
1238 * If it's NULL we have to create a new snapshot and clone from there
1240 snapname_buff
= virBufferContentAndReset(&snapname
);
1242 if (snapname_buff
== NULL
) {
1243 VIR_DEBUG("No RBD snapshot with zero delta could be found for image %s",
1246 virBufferAsprintf(&snapname
, "libvirt-%d", (int)virRandomInt(65534));
1248 if (virBufferCheckError(&snapname
) < 0)
1251 snapname_buff
= virBufferContentAndReset(&snapname
);
1253 if (virStorageBackendRBDSnapshotCreate(image
, origvol
, snapname_buff
) < 0)
1258 VIR_DEBUG("Using snapshot name %s for cloning RBD image %s to %s",
1259 snapname_buff
, origvol
, newvol
);
1262 * RBD snapshots have to be 'protected' before they can be used
1263 * as a parent snapshot for a child image
1265 if ((r
= virStorageBackendRBDSnapshotProtect(image
, origvol
, snapname_buff
)) < 0)
1268 VIR_DEBUG("Performing RBD clone from %s to %s", origvol
, newvol
);
1270 if ((r
= rbd_clone2(io
, origvol
, snapname_buff
, io
, newvol
, features
,
1271 &order
, stripe_unit
, stripe_count
)) < 0) {
1272 virReportSystemError(-r
, _("failed to clone RBD volume %s to %s"),
1277 VIR_DEBUG("Cloned RBD image %s to %s", origvol
, newvol
);
1282 virBufferFreeAndReset(&snapname
);
1291 virStorageBackendRBDBuildVolFrom(virStoragePoolObjPtr pool
,
1292 virStorageVolDefPtr newvol
,
1293 virStorageVolDefPtr origvol
,
1296 virStoragePoolDefPtr def
= virStoragePoolObjGetDef(pool
);
1297 virStorageBackendRBDStatePtr ptr
= NULL
;
1300 VIR_DEBUG("Creating clone of RBD image %s/%s with name %s",
1301 def
->source
.name
, origvol
->name
, newvol
->name
);
1303 virCheckFlags(0, -1);
1305 if (!(ptr
= virStorageBackendRBDNewState(pool
)))
1308 if ((virStorageBackendRBDCloneImage(ptr
->ioctx
, origvol
->name
,
1315 virStorageBackendRBDFreeState(&ptr
);
1320 virStorageBackendRBDRefreshVol(virStoragePoolObjPtr pool
,
1321 virStorageVolDefPtr vol
)
1323 virStorageBackendRBDStatePtr ptr
= NULL
;
1326 if (!(ptr
= virStorageBackendRBDNewState(pool
)))
1329 if (volStorageBackendRBDRefreshVolInfo(vol
, pool
, ptr
) < 0)
1335 virStorageBackendRBDFreeState(&ptr
);
1340 virStorageBackendRBDResizeVol(virStoragePoolObjPtr pool
,
1341 virStorageVolDefPtr vol
,
1342 unsigned long long capacity
,
1345 virStorageBackendRBDStatePtr ptr
= NULL
;
1346 rbd_image_t image
= NULL
;
1350 virCheckFlags(0, -1);
1352 if (!(ptr
= virStorageBackendRBDNewState(pool
)))
1355 if ((r
= rbd_open(ptr
->ioctx
, vol
->name
, &image
, NULL
)) < 0) {
1356 virReportSystemError(-r
, _("failed to open the RBD image '%s'"),
1361 if ((r
= rbd_resize(image
, capacity
)) < 0) {
1362 virReportSystemError(-r
, _("failed to resize the RBD image '%s'"),
1372 virStorageBackendRBDFreeState(&ptr
);
1377 virStorageBackendRBDVolWipeZero(rbd_image_t image
,
1379 rbd_image_info_t
*info
,
1380 uint64_t stripe_count
)
1383 unsigned long long offset
= 0;
1384 unsigned long long length
;
1385 VIR_AUTOFREE(char *) writebuf
= NULL
;
1387 if (VIR_ALLOC_N(writebuf
, info
->obj_size
* stripe_count
) < 0)
1390 while (offset
< info
->size
) {
1391 length
= MIN((info
->size
- offset
), (info
->obj_size
* stripe_count
));
1393 if ((r
= rbd_write(image
, offset
, length
, writebuf
)) < 0) {
1394 virReportSystemError(-r
, _("writing %llu bytes failed on "
1395 "RBD image %s at offset %llu"),
1396 length
, imgname
, offset
);
1400 VIR_DEBUG("Wrote %llu bytes to RBD image %s at offset %llu",
1401 length
, imgname
, offset
);
1410 virStorageBackendRBDVolWipeDiscard(rbd_image_t image
,
1412 rbd_image_info_t
*info
,
1413 uint64_t stripe_count
)
1417 unsigned long long offset
= 0;
1418 unsigned long long length
;
1420 VIR_DEBUG("Wiping RBD %s volume using discard)", imgname
);
1422 while (offset
< info
->size
) {
1423 length
= MIN((info
->size
- offset
), (info
->obj_size
* stripe_count
));
1425 if ((r
= rbd_discard(image
, offset
, length
)) < 0) {
1426 virReportSystemError(-r
, _("discarding %llu bytes failed on "
1427 "RBD image %s at offset %llu"),
1428 length
, imgname
, offset
);
1432 VIR_DEBUG("Discarded %llu bytes of RBD image %s at offset %llu",
1433 length
, imgname
, offset
);
1445 virStorageBackendRBDVolWipe(virStoragePoolObjPtr pool
,
1446 virStorageVolDefPtr vol
,
1447 unsigned int algorithm
,
1450 virStorageBackendRBDStatePtr ptr
= NULL
;
1451 virStoragePoolDefPtr def
;
1452 rbd_image_t image
= NULL
;
1453 rbd_image_info_t info
;
1454 uint64_t stripe_count
;
1458 virCheckFlags(0, -1);
1460 virObjectLock(pool
);
1461 def
= virStoragePoolObjGetDef(pool
);
1462 VIR_DEBUG("Wiping RBD image %s/%s", def
->source
.name
, vol
->name
);
1463 ptr
= virStorageBackendRBDNewState(pool
);
1464 virObjectUnlock(pool
);
1469 if ((r
= rbd_open(ptr
->ioctx
, vol
->name
, &image
, NULL
)) < 0) {
1470 virReportSystemError(-r
, _("failed to open the RBD image %s"),
1475 if ((r
= rbd_stat(image
, &info
, sizeof(info
))) < 0) {
1476 virReportSystemError(-r
, _("failed to stat the RBD image %s"),
1481 if ((r
= rbd_get_stripe_count(image
, &stripe_count
)) < 0) {
1482 virReportSystemError(-r
, _("failed to get stripe count of RBD image %s"),
1487 VIR_DEBUG("Need to wipe %"PRIu64
" bytes from RBD image %s/%s",
1488 info
.size
, def
->source
.name
, vol
->name
);
1490 switch ((virStorageVolWipeAlgorithm
) algorithm
) {
1491 case VIR_STORAGE_VOL_WIPE_ALG_ZERO
:
1492 r
= virStorageBackendRBDVolWipeZero(image
, vol
->name
,
1493 &info
, stripe_count
);
1495 case VIR_STORAGE_VOL_WIPE_ALG_TRIM
:
1496 r
= virStorageBackendRBDVolWipeDiscard(image
, vol
->name
,
1497 &info
, stripe_count
);
1499 case VIR_STORAGE_VOL_WIPE_ALG_NNSA
:
1500 case VIR_STORAGE_VOL_WIPE_ALG_DOD
:
1501 case VIR_STORAGE_VOL_WIPE_ALG_BSI
:
1502 case VIR_STORAGE_VOL_WIPE_ALG_GUTMANN
:
1503 case VIR_STORAGE_VOL_WIPE_ALG_SCHNEIER
:
1504 case VIR_STORAGE_VOL_WIPE_ALG_PFITZNER7
:
1505 case VIR_STORAGE_VOL_WIPE_ALG_PFITZNER33
:
1506 case VIR_STORAGE_VOL_WIPE_ALG_RANDOM
:
1507 case VIR_STORAGE_VOL_WIPE_ALG_LAST
:
1508 virReportError(VIR_ERR_INVALID_ARG
, _("unsupported algorithm %d"),
1514 virReportSystemError(-r
, _("failed to wipe RBD image %s"),
1525 virStorageBackendRBDFreeState(&ptr
);
1531 virStorageBackend virStorageBackendRBD
= {
1532 .type
= VIR_STORAGE_POOL_RBD
,
1534 .refreshPool
= virStorageBackendRBDRefreshPool
,
1535 .createVol
= virStorageBackendRBDCreateVol
,
1536 .buildVol
= virStorageBackendRBDBuildVol
,
1537 .buildVolFrom
= virStorageBackendRBDBuildVolFrom
,
1538 .refreshVol
= virStorageBackendRBDRefreshVol
,
1539 .deleteVol
= virStorageBackendRBDDeleteVol
,
1540 .resizeVol
= virStorageBackendRBDResizeVol
,
1541 .wipeVol
= virStorageBackendRBDVolWipe
1545 static virStoragePoolXMLNamespace virStoragePoolRBDXMLNamespace
= {
1546 .parse
= virStoragePoolDefRBDNamespaceParse
,
1547 .free
= virStoragePoolDefRBDNamespaceFree
,
1548 .format
= virStoragePoolDefRBDNamespaceFormatXML
,
1549 .href
= virStoragePoolDefRBDNamespaceHref
,
1554 virStorageBackendRBDRegister(void)
1556 if (virStorageBackendRegister(&virStorageBackendRBD
) < 0)
1559 return virStorageBackendNamespaceInit(VIR_STORAGE_POOL_RBD
,
1560 &virStoragePoolRBDXMLNamespace
);