Update Red Hat Copyright Notices
[nbdkit.git] / plugins / vddk / worker.c
blob467d00caa4270f268fe90a69d1c5f195cf4d4f38
1 /* nbdkit
2 * Copyright Red Hat
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are
6 * met:
8 * * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
11 * * Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
15 * * Neither the name of Red Hat nor the names of its contributors may be
16 * used to endorse or promote products derived from this software without
17 * specific prior written permission.
19 * THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
21 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
22 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR
23 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
26 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
27 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
29 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
33 #include <config.h>
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <stdint.h>
38 #include <inttypes.h>
40 #include <pthread.h>
42 #define NBDKIT_API_VERSION 2
43 #include <nbdkit-plugin.h>
45 #include "cleanup.h"
46 #include "minmax.h"
47 #include "rounding.h"
48 #include "vector.h"
50 #include "vddk.h"
52 const char *
53 command_type_string (enum command_type type)
55 switch (type) {
56 case INFO: return "info";
57 case READ: return "read";
58 case WRITE: return "write";
59 case FLUSH: return "flush";
60 case CAN_EXTENTS: return "can_extents";
61 case EXTENTS: return "extents";
62 case STOP: return "stop";
63 default: abort ();
67 /* Send command to the background thread and wait for completion.
69 * Returns 0 for OK
70 * On error, calls nbdkit_error and returns -1.
72 int
73 send_command_and_wait (struct vddk_handle *h, struct command *cmd)
75 /* Add the command to the command queue. */
77 ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&h->commands_lock);
78 cmd->id = h->id++;
80 if (command_queue_append (&h->commands, cmd) == -1)
81 /* On error command_queue_append will call nbdkit_error. */
82 return -1;
84 /* Signal the caller if it could be sleeping on an empty queue. */
85 if (h->commands.len == 1)
86 pthread_cond_signal (&h->commands_cond);
88 /* This will be used to signal command completion back to us. */
89 pthread_mutex_init (&cmd->mutex, NULL);
90 pthread_cond_init (&cmd->cond, NULL);
93 /* Wait for the command to be completed by the background thread. */
95 ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&cmd->mutex);
96 while (cmd->status == SUBMITTED)
97 pthread_cond_wait (&cmd->cond, &cmd->mutex);
100 pthread_mutex_destroy (&cmd->mutex);
101 pthread_cond_destroy (&cmd->cond);
103 /* On error the background thread will call nbdkit_error. */
104 switch (cmd->status) {
105 case SUCCEEDED: return 0;
106 case FAILED: return -1;
107 default: abort ();
111 /* Asynchronous commands are completed when this function is called. */
112 static void
113 complete_command (void *vp, VixError result)
115 struct command *cmd = vp;
117 if (vddk_debug_datapath)
118 nbdkit_debug ("command %" PRIu64 " completed", cmd->id);
120 ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&cmd->mutex);
122 if (result == VIX_OK) {
123 cmd->status = SUCCEEDED;
124 } else {
125 VDDK_ERROR (result, "command %" PRIu64 ": asynchronous %s failed",
126 cmd->id, command_type_string (cmd->type));
127 cmd->status = FAILED;
130 pthread_cond_signal (&cmd->cond);
133 /* Wait for any asynchronous commands to complete. */
134 static int
135 do_stop (struct command *cmd, struct vddk_handle *h)
137 VixError err;
139 /* Because we assume VDDK >= 6.5, VixDiskLib_Wait must exist. */
140 VDDK_CALL_START (VixDiskLib_Wait, "handle")
141 err = VixDiskLib_Wait (h->handle);
142 VDDK_CALL_END (VixDiskLib_Wait, 0);
143 if (err != VIX_OK) {
144 VDDK_ERROR (err, "VixDiskLib_Wait");
145 /* In the end this error indication is ignored because it only
146 * happens on the close path when we cannot handle errors.
148 return -1;
150 return 0;
153 /* Disk info command. */
154 static int64_t
155 do_info (struct command *cmd, struct vddk_handle *h)
157 VixError err;
158 VixDiskLibInfo **info = cmd->ptr;
160 VDDK_CALL_START (VixDiskLib_GetInfo, "handle, info")
161 err = VixDiskLib_GetInfo (h->handle, info);
162 VDDK_CALL_END (VixDiskLib_GetInfo, 0);
163 if (err != VIX_OK) {
164 VDDK_ERROR (err, "VixDiskLib_GetInfo");
165 return -1;
168 if (vddk_debug_diskinfo) {
169 nbdkit_debug ("disk info: capacity: %" PRIu64 " sectors",
170 (*info)->capacity);
171 nbdkit_debug ("disk info: biosGeo: C:%" PRIu32 " H:%" PRIu32 " S:%" PRIu32,
172 (*info)->biosGeo.cylinders,
173 (*info)->biosGeo.heads,
174 (*info)->biosGeo.sectors);
175 nbdkit_debug ("disk info: physGeo: C:%" PRIu32 " H:%" PRIu32 " S:%" PRIu32,
176 (*info)->physGeo.cylinders,
177 (*info)->physGeo.heads,
178 (*info)->physGeo.sectors);
179 nbdkit_debug ("disk info: adapter type: %d",
180 (int) (*info)->adapterType);
181 nbdkit_debug ("disk info: num links: %d", (*info)->numLinks);
182 nbdkit_debug ("disk info: parent filename hint: %s",
183 (*info)->parentFileNameHint ? : "NULL");
184 nbdkit_debug ("disk info: uuid: %s",
185 (*info)->uuid ? : "NULL");
186 if (library_version >= 7) {
187 nbdkit_debug ("disk info: sector size: "
188 "logical %" PRIu32 " physical %" PRIu32,
189 (*info)->logicalSectorSize,
190 (*info)->physicalSectorSize);
194 return 0;
197 static int
198 do_read (struct command *cmd, struct vddk_handle *h)
200 VixError err;
201 uint32_t count = cmd->count;
202 uint64_t offset = cmd->offset;
203 void *buf = cmd->ptr;
205 /* Align to sectors. */
206 if (!IS_ALIGNED (offset, VIXDISKLIB_SECTOR_SIZE)) {
207 nbdkit_error ("%s is not aligned to sectors", "read");
208 errno = EINVAL;
209 return -1;
211 if (!IS_ALIGNED (count, VIXDISKLIB_SECTOR_SIZE)) {
212 nbdkit_error ("%s is not aligned to sectors", "read");
213 errno = EINVAL;
214 return -1;
216 offset /= VIXDISKLIB_SECTOR_SIZE;
217 count /= VIXDISKLIB_SECTOR_SIZE;
219 VDDK_CALL_START (VixDiskLib_ReadAsync,
220 "handle, %" PRIu64 " sectors, "
221 "%" PRIu32 " sectors, buffer, callback, %" PRIu64,
222 offset, count, cmd->id)
223 err = VixDiskLib_ReadAsync (h->handle, offset, count, buf,
224 complete_command, cmd);
225 VDDK_CALL_END (VixDiskLib_ReadAsync, count * VIXDISKLIB_SECTOR_SIZE);
226 if (err != VIX_ASYNC) {
227 VDDK_ERROR (err, "VixDiskLib_ReadAsync");
228 return -1;
231 return 0;
234 static int
235 do_write (struct command *cmd, struct vddk_handle *h)
237 VixError err;
238 uint32_t count = cmd->count;
239 uint64_t offset = cmd->offset;
240 const void *buf = cmd->ptr;
242 /* Align to sectors. */
243 if (!IS_ALIGNED (offset, VIXDISKLIB_SECTOR_SIZE)) {
244 nbdkit_error ("%s is not aligned to sectors", "write");
245 errno = EINVAL;
246 return -1;
248 if (!IS_ALIGNED (count, VIXDISKLIB_SECTOR_SIZE)) {
249 nbdkit_error ("%s is not aligned to sectors", "write");
250 errno = EINVAL;
251 return -1;
253 offset /= VIXDISKLIB_SECTOR_SIZE;
254 count /= VIXDISKLIB_SECTOR_SIZE;
256 VDDK_CALL_START (VixDiskLib_WriteAsync,
257 "handle, %" PRIu64 " sectors, "
258 "%" PRIu32 " sectors, buffer, callback, %" PRIu64,
259 offset, count, cmd->id)
260 err = VixDiskLib_WriteAsync (h->handle, offset, count, buf,
261 complete_command, cmd);
262 VDDK_CALL_END (VixDiskLib_WriteAsync, count * VIXDISKLIB_SECTOR_SIZE);
263 if (err != VIX_ASYNC) {
264 VDDK_ERROR (err, "VixDiskLib_WriteAsync");
265 return -1;
268 return 0;
271 static int
272 do_flush (struct command *cmd, struct vddk_handle *h)
274 VixError err;
276 /* It seems safer to wait for outstanding asynchronous commands to
277 * complete before doing a flush, so do this but ignore errors
278 * except to print them.
280 VDDK_CALL_START (VixDiskLib_Wait, "handle")
281 err = VixDiskLib_Wait (h->handle);
282 VDDK_CALL_END (VixDiskLib_Wait, 0);
283 if (err != VIX_OK)
284 VDDK_ERROR (err, "VixDiskLib_Wait");
286 /* The documentation for Flush is missing, but the comment in the
287 * header file seems to indicate that it waits for WriteAsync
288 * commands to finish. There's a new function Wait to wait for
289 * those. However I verified using strace that in fact Flush calls
290 * fsync on the file so it appears to be the correct call to use
291 * here.
293 VDDK_CALL_START (VixDiskLib_Flush, "handle")
294 err = VixDiskLib_Flush (h->handle);
295 VDDK_CALL_END (VixDiskLib_Flush, 0);
296 if (err != VIX_OK) {
297 VDDK_ERROR (err, "VixDiskLib_Flush");
298 return -1;
301 return 0;
304 static int
305 do_can_extents (struct command *cmd, struct vddk_handle *h)
307 VixError err;
308 VixDiskLibBlockList *block_list;
310 /* This call was added in VDDK 6.7. In earlier versions the
311 * function pointer will be NULL and we cannot query extents.
313 if (VixDiskLib_QueryAllocatedBlocks == NULL) {
314 nbdkit_debug ("can_extents: VixDiskLib_QueryAllocatedBlocks == NULL, "
315 "probably this is VDDK < 6.7");
316 return 0;
319 /* Suppress errors around this call. See:
320 * https://bugzilla.redhat.com/show_bug.cgi?id=1709211#c7
322 error_suppression = 1;
324 /* However even when the call is available it rarely works well so
325 * the best thing we can do here is to try the call and if it's
326 * non-functional return false.
328 VDDK_CALL_START (VixDiskLib_QueryAllocatedBlocks,
329 "handle, 0, %d sectors, %d sectors",
330 VIXDISKLIB_MIN_CHUNK_SIZE, VIXDISKLIB_MIN_CHUNK_SIZE)
331 err = VixDiskLib_QueryAllocatedBlocks (h->handle,
332 0, VIXDISKLIB_MIN_CHUNK_SIZE,
333 VIXDISKLIB_MIN_CHUNK_SIZE,
334 &block_list);
335 VDDK_CALL_END (VixDiskLib_QueryAllocatedBlocks, 0);
336 error_suppression = 0;
337 if (err == VIX_OK) {
338 VDDK_CALL_START (VixDiskLib_FreeBlockList, "block_list")
339 VixDiskLib_FreeBlockList (block_list);
340 VDDK_CALL_END (VixDiskLib_FreeBlockList, 0);
342 if (err != VIX_OK) {
343 char *errmsg = VixDiskLib_GetErrorText (err, NULL);
344 nbdkit_debug ("can_extents: "
345 "VixDiskLib_QueryAllocatedBlocks test failed, "
346 "extents support will be disabled: "
347 "original error: %s",
348 errmsg);
349 VixDiskLib_FreeErrorText (errmsg);
350 return 0;
353 return 1;
356 /* Add an extent to the list of extents. */
357 static int
358 add_extent (struct nbdkit_extents *extents,
359 uint64_t *position, uint64_t next_position, bool is_hole)
361 uint32_t type = 0;
362 const uint64_t length = next_position - *position;
364 if (is_hole) {
365 type = NBDKIT_EXTENT_HOLE;
366 /* Images opened as single link might be backed by another file in the
367 chain, so the holes are not guaranteed to be zeroes. */
368 if (!single_link)
369 type |= NBDKIT_EXTENT_ZERO;
372 assert (*position <= next_position);
373 if (*position == next_position)
374 return 0;
376 if (vddk_debug_extents)
377 nbdkit_debug ("adding extent type %s at [%" PRIu64 "...%" PRIu64 "]",
378 is_hole ? "hole" : "allocated data",
379 *position, next_position-1);
380 if (nbdkit_add_extent (extents, *position, length, type) == -1)
381 return -1;
383 *position = next_position;
384 return 0;
387 static int
388 do_extents (struct command *cmd, struct vddk_handle *h)
390 uint32_t count = cmd->count;
391 uint64_t offset = cmd->offset;
392 bool req_one = cmd->req_one;
393 struct nbdkit_extents *extents = cmd->ptr;
394 uint64_t position, end, start_sector;
396 position = offset;
397 end = offset + count;
399 /* We can only query whole chunks. Therefore start with the
400 * first chunk before offset.
402 start_sector =
403 ROUND_DOWN (offset, VIXDISKLIB_MIN_CHUNK_SIZE * VIXDISKLIB_SECTOR_SIZE)
404 / VIXDISKLIB_SECTOR_SIZE;
405 while (start_sector * VIXDISKLIB_SECTOR_SIZE < end) {
406 VixError err;
407 uint32_t i;
408 uint64_t nr_chunks, nr_sectors;
409 VixDiskLibBlockList *block_list;
411 assert (IS_ALIGNED (start_sector, VIXDISKLIB_MIN_CHUNK_SIZE));
413 nr_chunks =
414 ROUND_UP (end - start_sector * VIXDISKLIB_SECTOR_SIZE,
415 VIXDISKLIB_MIN_CHUNK_SIZE * VIXDISKLIB_SECTOR_SIZE)
416 / (VIXDISKLIB_MIN_CHUNK_SIZE * VIXDISKLIB_SECTOR_SIZE);
417 nr_chunks = MIN (nr_chunks, VIXDISKLIB_MAX_CHUNK_NUMBER);
418 nr_sectors = nr_chunks * VIXDISKLIB_MIN_CHUNK_SIZE;
420 VDDK_CALL_START (VixDiskLib_QueryAllocatedBlocks,
421 "handle, %" PRIu64 " sectors, %" PRIu64 " sectors, "
422 "%d sectors",
423 start_sector, nr_sectors, VIXDISKLIB_MIN_CHUNK_SIZE)
424 err = VixDiskLib_QueryAllocatedBlocks (h->handle,
425 start_sector, nr_sectors,
426 VIXDISKLIB_MIN_CHUNK_SIZE,
427 &block_list);
428 VDDK_CALL_END (VixDiskLib_QueryAllocatedBlocks, 0);
429 if (err != VIX_OK) {
430 VDDK_ERROR (err, "VixDiskLib_QueryAllocatedBlocks");
431 return -1;
434 for (i = 0; i < block_list->numBlocks; ++i) {
435 uint64_t blk_offset, blk_length;
437 blk_offset = block_list->blocks[i].offset * VIXDISKLIB_SECTOR_SIZE;
438 blk_length = block_list->blocks[i].length * VIXDISKLIB_SECTOR_SIZE;
440 /* The query returns allocated blocks. We must insert holes
441 * between the blocks as necessary.
443 if ((position < blk_offset &&
444 add_extent (extents, &position, blk_offset, true) == -1) ||
445 (add_extent (extents,
446 &position, blk_offset + blk_length, false) == -1)) {
447 VDDK_CALL_START (VixDiskLib_FreeBlockList, "block_list")
448 VixDiskLib_FreeBlockList (block_list);
449 VDDK_CALL_END (VixDiskLib_FreeBlockList, 0);
450 return -1;
453 VDDK_CALL_START (VixDiskLib_FreeBlockList, "block_list")
454 VixDiskLib_FreeBlockList (block_list);
455 VDDK_CALL_END (VixDiskLib_FreeBlockList, 0);
457 /* There's an implicit hole after the returned list of blocks,
458 * up to the end of the QueryAllocatedBlocks request.
460 if (add_extent (extents,
461 &position,
462 (start_sector + nr_sectors) * VIXDISKLIB_SECTOR_SIZE,
463 true) == -1) {
464 return -1;
467 start_sector += nr_sectors;
469 /* If one extent was requested, as long as we've added an extent
470 * overlapping the original offset we're done.
472 if (req_one && position > offset)
473 break;
476 return 0;
479 /* Background worker thread, one per connection, which is where the
480 * VDDK commands are issued.
482 void *
483 vddk_worker_thread (void *handle)
485 struct vddk_handle *h = handle;
486 bool stop = false;
488 while (!stop) {
489 struct command *cmd;
490 int r;
491 bool async = false;
493 /* Wait until we are sent at least one command. */
495 ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&h->commands_lock);
496 while (h->commands.len == 0)
497 pthread_cond_wait (&h->commands_cond, &h->commands_lock);
498 cmd = h->commands.ptr[0];
499 command_queue_remove (&h->commands, 0);
502 switch (cmd->type) {
503 case STOP:
504 r = do_stop (cmd, h);
505 stop = true;
506 break;
508 case INFO:
509 r = do_info (cmd, h);
510 break;
512 case READ:
513 r = do_read (cmd, h);
514 /* If async is true, don't retire this command now. */
515 async = r == 0;
516 break;
518 case WRITE:
519 r = do_write (cmd, h);
520 /* If async is true, don't retire this command now. */
521 async = r == 0;
522 break;
524 case FLUSH:
525 r = do_flush (cmd, h);
526 break;
528 case CAN_EXTENTS:
529 r = do_can_extents (cmd, h);
530 if (r >= 0)
531 *(int *)cmd->ptr = r;
532 break;
534 case EXTENTS:
535 r = do_extents (cmd, h);
536 break;
538 default: abort (); /* impossible, but keeps GCC happy */
539 } /* switch */
541 if (!async) {
542 /* Update the command status. */
543 ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&cmd->mutex);
544 cmd->status = r >= 0 ? SUCCEEDED : FAILED;
546 /* For synchronous commands signal the caller thread that the
547 * command has completed. (Asynchronous commands are completed in
548 * the callback handler).
550 pthread_cond_signal (&cmd->cond);
552 } /* while (!stop) */
554 /* Exit the worker thread. */
555 return NULL;