4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are
8 * * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
11 * * Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
15 * * Neither the name of Red Hat nor the names of its contributors may be
16 * used to endorse or promote products derived from this software without
17 * specific prior written permission.
19 * THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
21 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
22 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR
23 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
26 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
27 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
29 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
42 #define NBDKIT_API_VERSION 2
43 #include <nbdkit-plugin.h>
53 command_type_string (enum command_type type
)
56 case INFO
: return "info";
57 case READ
: return "read";
58 case WRITE
: return "write";
59 case FLUSH
: return "flush";
60 case CAN_EXTENTS
: return "can_extents";
61 case EXTENTS
: return "extents";
62 case STOP
: return "stop";
67 /* Send command to the background thread and wait for completion.
70 * On error, calls nbdkit_error and returns -1.
73 send_command_and_wait (struct vddk_handle
*h
, struct command
*cmd
)
75 /* Add the command to the command queue. */
77 ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&h
->commands_lock
);
80 if (command_queue_append (&h
->commands
, cmd
) == -1)
81 /* On error command_queue_append will call nbdkit_error. */
84 /* Signal the caller if it could be sleeping on an empty queue. */
85 if (h
->commands
.len
== 1)
86 pthread_cond_signal (&h
->commands_cond
);
88 /* This will be used to signal command completion back to us. */
89 pthread_mutex_init (&cmd
->mutex
, NULL
);
90 pthread_cond_init (&cmd
->cond
, NULL
);
93 /* Wait for the command to be completed by the background thread. */
95 ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&cmd
->mutex
);
96 while (cmd
->status
== SUBMITTED
)
97 pthread_cond_wait (&cmd
->cond
, &cmd
->mutex
);
100 pthread_mutex_destroy (&cmd
->mutex
);
101 pthread_cond_destroy (&cmd
->cond
);
103 /* On error the background thread will call nbdkit_error. */
104 switch (cmd
->status
) {
105 case SUCCEEDED
: return 0;
106 case FAILED
: return -1;
111 /* Asynchronous commands are completed when this function is called. */
113 complete_command (void *vp
, VixError result
)
115 struct command
*cmd
= vp
;
117 if (vddk_debug_datapath
)
118 nbdkit_debug ("command %" PRIu64
" completed", cmd
->id
);
120 ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&cmd
->mutex
);
122 if (result
== VIX_OK
) {
123 cmd
->status
= SUCCEEDED
;
125 VDDK_ERROR (result
, "command %" PRIu64
": asynchronous %s failed",
126 cmd
->id
, command_type_string (cmd
->type
));
127 cmd
->status
= FAILED
;
130 pthread_cond_signal (&cmd
->cond
);
133 /* Wait for any asynchronous commands to complete. */
135 do_stop (struct command
*cmd
, struct vddk_handle
*h
)
139 /* Because we assume VDDK >= 6.5, VixDiskLib_Wait must exist. */
140 VDDK_CALL_START (VixDiskLib_Wait
, "handle")
141 err
= VixDiskLib_Wait (h
->handle
);
142 VDDK_CALL_END (VixDiskLib_Wait
, 0);
144 VDDK_ERROR (err
, "VixDiskLib_Wait");
145 /* In the end this error indication is ignored because it only
146 * happens on the close path when we cannot handle errors.
153 /* Disk info command. */
155 do_info (struct command
*cmd
, struct vddk_handle
*h
)
158 VixDiskLibInfo
**info
= cmd
->ptr
;
160 VDDK_CALL_START (VixDiskLib_GetInfo
, "handle, info")
161 err
= VixDiskLib_GetInfo (h
->handle
, info
);
162 VDDK_CALL_END (VixDiskLib_GetInfo
, 0);
164 VDDK_ERROR (err
, "VixDiskLib_GetInfo");
168 if (vddk_debug_diskinfo
) {
169 nbdkit_debug ("disk info: capacity: %" PRIu64
" sectors",
171 nbdkit_debug ("disk info: biosGeo: C:%" PRIu32
" H:%" PRIu32
" S:%" PRIu32
,
172 (*info
)->biosGeo
.cylinders
,
173 (*info
)->biosGeo
.heads
,
174 (*info
)->biosGeo
.sectors
);
175 nbdkit_debug ("disk info: physGeo: C:%" PRIu32
" H:%" PRIu32
" S:%" PRIu32
,
176 (*info
)->physGeo
.cylinders
,
177 (*info
)->physGeo
.heads
,
178 (*info
)->physGeo
.sectors
);
179 nbdkit_debug ("disk info: adapter type: %d",
180 (int) (*info
)->adapterType
);
181 nbdkit_debug ("disk info: num links: %d", (*info
)->numLinks
);
182 nbdkit_debug ("disk info: parent filename hint: %s",
183 (*info
)->parentFileNameHint
? : "NULL");
184 nbdkit_debug ("disk info: uuid: %s",
185 (*info
)->uuid
? : "NULL");
186 if (library_version
>= 7) {
187 nbdkit_debug ("disk info: sector size: "
188 "logical %" PRIu32
" physical %" PRIu32
,
189 (*info
)->logicalSectorSize
,
190 (*info
)->physicalSectorSize
);
198 do_read (struct command
*cmd
, struct vddk_handle
*h
)
201 uint32_t count
= cmd
->count
;
202 uint64_t offset
= cmd
->offset
;
203 void *buf
= cmd
->ptr
;
205 /* Align to sectors. */
206 if (!IS_ALIGNED (offset
, VIXDISKLIB_SECTOR_SIZE
)) {
207 nbdkit_error ("%s is not aligned to sectors", "read");
211 if (!IS_ALIGNED (count
, VIXDISKLIB_SECTOR_SIZE
)) {
212 nbdkit_error ("%s is not aligned to sectors", "read");
216 offset
/= VIXDISKLIB_SECTOR_SIZE
;
217 count
/= VIXDISKLIB_SECTOR_SIZE
;
219 VDDK_CALL_START (VixDiskLib_ReadAsync
,
220 "handle, %" PRIu64
" sectors, "
221 "%" PRIu32
" sectors, buffer, callback, %" PRIu64
,
222 offset
, count
, cmd
->id
)
223 err
= VixDiskLib_ReadAsync (h
->handle
, offset
, count
, buf
,
224 complete_command
, cmd
);
225 VDDK_CALL_END (VixDiskLib_ReadAsync
, count
* VIXDISKLIB_SECTOR_SIZE
);
226 if (err
!= VIX_ASYNC
) {
227 VDDK_ERROR (err
, "VixDiskLib_ReadAsync");
235 do_write (struct command
*cmd
, struct vddk_handle
*h
)
238 uint32_t count
= cmd
->count
;
239 uint64_t offset
= cmd
->offset
;
240 const void *buf
= cmd
->ptr
;
242 /* Align to sectors. */
243 if (!IS_ALIGNED (offset
, VIXDISKLIB_SECTOR_SIZE
)) {
244 nbdkit_error ("%s is not aligned to sectors", "write");
248 if (!IS_ALIGNED (count
, VIXDISKLIB_SECTOR_SIZE
)) {
249 nbdkit_error ("%s is not aligned to sectors", "write");
253 offset
/= VIXDISKLIB_SECTOR_SIZE
;
254 count
/= VIXDISKLIB_SECTOR_SIZE
;
256 VDDK_CALL_START (VixDiskLib_WriteAsync
,
257 "handle, %" PRIu64
" sectors, "
258 "%" PRIu32
" sectors, buffer, callback, %" PRIu64
,
259 offset
, count
, cmd
->id
)
260 err
= VixDiskLib_WriteAsync (h
->handle
, offset
, count
, buf
,
261 complete_command
, cmd
);
262 VDDK_CALL_END (VixDiskLib_WriteAsync
, count
* VIXDISKLIB_SECTOR_SIZE
);
263 if (err
!= VIX_ASYNC
) {
264 VDDK_ERROR (err
, "VixDiskLib_WriteAsync");
272 do_flush (struct command
*cmd
, struct vddk_handle
*h
)
276 /* It seems safer to wait for outstanding asynchronous commands to
277 * complete before doing a flush, so do this but ignore errors
278 * except to print them.
280 VDDK_CALL_START (VixDiskLib_Wait
, "handle")
281 err
= VixDiskLib_Wait (h
->handle
);
282 VDDK_CALL_END (VixDiskLib_Wait
, 0);
284 VDDK_ERROR (err
, "VixDiskLib_Wait");
286 /* The documentation for Flush is missing, but the comment in the
287 * header file seems to indicate that it waits for WriteAsync
288 * commands to finish. There's a new function Wait to wait for
289 * those. However I verified using strace that in fact Flush calls
290 * fsync on the file so it appears to be the correct call to use
293 VDDK_CALL_START (VixDiskLib_Flush
, "handle")
294 err
= VixDiskLib_Flush (h
->handle
);
295 VDDK_CALL_END (VixDiskLib_Flush
, 0);
297 VDDK_ERROR (err
, "VixDiskLib_Flush");
305 do_can_extents (struct command
*cmd
, struct vddk_handle
*h
)
308 VixDiskLibBlockList
*block_list
;
310 /* This call was added in VDDK 6.7. In earlier versions the
311 * function pointer will be NULL and we cannot query extents.
313 if (VixDiskLib_QueryAllocatedBlocks
== NULL
) {
314 nbdkit_debug ("can_extents: VixDiskLib_QueryAllocatedBlocks == NULL, "
315 "probably this is VDDK < 6.7");
319 /* Suppress errors around this call. See:
320 * https://bugzilla.redhat.com/show_bug.cgi?id=1709211#c7
322 error_suppression
= 1;
324 /* However even when the call is available it rarely works well so
325 * the best thing we can do here is to try the call and if it's
326 * non-functional return false.
328 VDDK_CALL_START (VixDiskLib_QueryAllocatedBlocks
,
329 "handle, 0, %d sectors, %d sectors",
330 VIXDISKLIB_MIN_CHUNK_SIZE
, VIXDISKLIB_MIN_CHUNK_SIZE
)
331 err
= VixDiskLib_QueryAllocatedBlocks (h
->handle
,
332 0, VIXDISKLIB_MIN_CHUNK_SIZE
,
333 VIXDISKLIB_MIN_CHUNK_SIZE
,
335 VDDK_CALL_END (VixDiskLib_QueryAllocatedBlocks
, 0);
336 error_suppression
= 0;
338 VDDK_CALL_START (VixDiskLib_FreeBlockList
, "block_list")
339 VixDiskLib_FreeBlockList (block_list
);
340 VDDK_CALL_END (VixDiskLib_FreeBlockList
, 0);
343 char *errmsg
= VixDiskLib_GetErrorText (err
, NULL
);
344 nbdkit_debug ("can_extents: "
345 "VixDiskLib_QueryAllocatedBlocks test failed, "
346 "extents support will be disabled: "
347 "original error: %s",
349 VixDiskLib_FreeErrorText (errmsg
);
356 /* Add an extent to the list of extents. */
358 add_extent (struct nbdkit_extents
*extents
,
359 uint64_t *position
, uint64_t next_position
, bool is_hole
)
362 const uint64_t length
= next_position
- *position
;
365 type
= NBDKIT_EXTENT_HOLE
;
366 /* Images opened as single link might be backed by another file in the
367 chain, so the holes are not guaranteed to be zeroes. */
369 type
|= NBDKIT_EXTENT_ZERO
;
372 assert (*position
<= next_position
);
373 if (*position
== next_position
)
376 if (vddk_debug_extents
)
377 nbdkit_debug ("adding extent type %s at [%" PRIu64
"...%" PRIu64
"]",
378 is_hole
? "hole" : "allocated data",
379 *position
, next_position
-1);
380 if (nbdkit_add_extent (extents
, *position
, length
, type
) == -1)
383 *position
= next_position
;
388 do_extents (struct command
*cmd
, struct vddk_handle
*h
)
390 uint32_t count
= cmd
->count
;
391 uint64_t offset
= cmd
->offset
;
392 bool req_one
= cmd
->req_one
;
393 struct nbdkit_extents
*extents
= cmd
->ptr
;
394 uint64_t position
, end
, start_sector
;
397 end
= offset
+ count
;
399 /* We can only query whole chunks. Therefore start with the
400 * first chunk before offset.
403 ROUND_DOWN (offset
, VIXDISKLIB_MIN_CHUNK_SIZE
* VIXDISKLIB_SECTOR_SIZE
)
404 / VIXDISKLIB_SECTOR_SIZE
;
405 while (start_sector
* VIXDISKLIB_SECTOR_SIZE
< end
) {
408 uint64_t nr_chunks
, nr_sectors
;
409 VixDiskLibBlockList
*block_list
;
411 assert (IS_ALIGNED (start_sector
, VIXDISKLIB_MIN_CHUNK_SIZE
));
414 ROUND_UP (end
- start_sector
* VIXDISKLIB_SECTOR_SIZE
,
415 VIXDISKLIB_MIN_CHUNK_SIZE
* VIXDISKLIB_SECTOR_SIZE
)
416 / (VIXDISKLIB_MIN_CHUNK_SIZE
* VIXDISKLIB_SECTOR_SIZE
);
417 nr_chunks
= MIN (nr_chunks
, VIXDISKLIB_MAX_CHUNK_NUMBER
);
418 nr_sectors
= nr_chunks
* VIXDISKLIB_MIN_CHUNK_SIZE
;
420 VDDK_CALL_START (VixDiskLib_QueryAllocatedBlocks
,
421 "handle, %" PRIu64
" sectors, %" PRIu64
" sectors, "
423 start_sector
, nr_sectors
, VIXDISKLIB_MIN_CHUNK_SIZE
)
424 err
= VixDiskLib_QueryAllocatedBlocks (h
->handle
,
425 start_sector
, nr_sectors
,
426 VIXDISKLIB_MIN_CHUNK_SIZE
,
428 VDDK_CALL_END (VixDiskLib_QueryAllocatedBlocks
, 0);
430 VDDK_ERROR (err
, "VixDiskLib_QueryAllocatedBlocks");
434 for (i
= 0; i
< block_list
->numBlocks
; ++i
) {
435 uint64_t blk_offset
, blk_length
;
437 blk_offset
= block_list
->blocks
[i
].offset
* VIXDISKLIB_SECTOR_SIZE
;
438 blk_length
= block_list
->blocks
[i
].length
* VIXDISKLIB_SECTOR_SIZE
;
440 /* The query returns allocated blocks. We must insert holes
441 * between the blocks as necessary.
443 if ((position
< blk_offset
&&
444 add_extent (extents
, &position
, blk_offset
, true) == -1) ||
445 (add_extent (extents
,
446 &position
, blk_offset
+ blk_length
, false) == -1)) {
447 VDDK_CALL_START (VixDiskLib_FreeBlockList
, "block_list")
448 VixDiskLib_FreeBlockList (block_list
);
449 VDDK_CALL_END (VixDiskLib_FreeBlockList
, 0);
453 VDDK_CALL_START (VixDiskLib_FreeBlockList
, "block_list")
454 VixDiskLib_FreeBlockList (block_list
);
455 VDDK_CALL_END (VixDiskLib_FreeBlockList
, 0);
457 /* There's an implicit hole after the returned list of blocks,
458 * up to the end of the QueryAllocatedBlocks request.
460 if (add_extent (extents
,
462 (start_sector
+ nr_sectors
) * VIXDISKLIB_SECTOR_SIZE
,
467 start_sector
+= nr_sectors
;
469 /* If one extent was requested, as long as we've added an extent
470 * overlapping the original offset we're done.
472 if (req_one
&& position
> offset
)
479 /* Background worker thread, one per connection, which is where the
480 * VDDK commands are issued.
483 vddk_worker_thread (void *handle
)
485 struct vddk_handle
*h
= handle
;
493 /* Wait until we are sent at least one command. */
495 ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&h
->commands_lock
);
496 while (h
->commands
.len
== 0)
497 pthread_cond_wait (&h
->commands_cond
, &h
->commands_lock
);
498 cmd
= h
->commands
.ptr
[0];
499 command_queue_remove (&h
->commands
, 0);
504 r
= do_stop (cmd
, h
);
509 r
= do_info (cmd
, h
);
513 r
= do_read (cmd
, h
);
514 /* If async is true, don't retire this command now. */
519 r
= do_write (cmd
, h
);
520 /* If async is true, don't retire this command now. */
525 r
= do_flush (cmd
, h
);
529 r
= do_can_extents (cmd
, h
);
531 *(int *)cmd
->ptr
= r
;
535 r
= do_extents (cmd
, h
);
538 default: abort (); /* impossible, but keeps GCC happy */
542 /* Update the command status. */
543 ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&cmd
->mutex
);
544 cmd
->status
= r
>= 0 ? SUCCEEDED
: FAILED
;
546 /* For synchronous commands signal the caller thread that the
547 * command has completed. (Asynchronous commands are completed in
548 * the callback handler).
550 pthread_cond_signal (&cmd
->cond
);
552 } /* while (!stop) */
554 /* Exit the worker thread. */