plugins: Wire up nbd plugin support for NBD_INFO_INIT_STATE
[nbdkit/ericb.git] / plugins / partitioning / partitioning.c
blob0b42b2e562f01e44fa5d60a6216ddb803dc02fea
1 /* nbdkit
2 * Copyright (C) 2018-2020 Red Hat Inc.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are
6 * met:
8 * * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
11 * * Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
15 * * Neither the name of Red Hat nor the names of its contributors may be
16 * used to endorse or promote products derived from this software without
17 * specific prior written permission.
19 * THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
21 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
22 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR
23 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
26 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
27 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
29 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
33 #include <config.h>
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <stdbool.h>
38 #include <stdint.h>
39 #include <inttypes.h>
40 #include <string.h>
41 #include <unistd.h>
42 #include <fcntl.h>
43 #include <assert.h>
44 #include <errno.h>
45 #include <time.h>
46 #include <sys/types.h>
47 #include <sys/stat.h>
49 #include <nbdkit-plugin.h>
51 #include "byte-swapping.h"
52 #include "isaligned.h"
53 #include "iszero.h"
54 #include "rounding.h"
56 #include "random.h"
57 #include "regions.h"
58 #include "virtual-disk.h"
60 #ifndef HAVE_FDATASYNC
61 #define fdatasync fsync
62 #endif
64 /* Debug flag: -D partitioning.regions=1: Print the regions table. */
65 int partitioning_debug_regions;
67 /* alignment, mbr_id, type_guid set on the command line for
68 * following partitions.
70 unsigned long alignment = DEFAULT_ALIGNMENT;
71 uint8_t mbr_id = DEFAULT_MBR_ID;
72 char type_guid[16]; /* initialized by partitioning_load function below */
74 /* partition-type parameter. */
75 int parttype = PARTTYPE_UNSET;
77 /* Files supplied on the command line. */
78 struct file *files = NULL;
79 size_t nr_files = 0;
81 /* Virtual disk layout. */
82 struct regions regions;
84 /* Primary and secondary partition tables and extended boot records.
85 * Secondary PT is only used for GPT. EBR array of sectors is only
86 * used for MBR with > 4 partitions and has length equal to nr_files-3.
88 unsigned char *primary = NULL, *secondary = NULL, **ebr = NULL;
90 /* Used to generate random unique partition GUIDs for GPT. */
91 static struct random_state random_state;
93 static void
94 partitioning_load (void)
96 init_regions (&regions);
97 parse_guid (DEFAULT_TYPE_GUID, type_guid);
98 xsrandom (time (NULL), &random_state);
101 static void
102 partitioning_unload (void)
104 size_t i;
106 for (i = 0; i < nr_files; ++i)
107 close (files[i].fd);
108 free (files);
110 /* We don't need to free regions.regions[].u.data because it points
111 * to primary, secondary or ebr which we free here.
113 free_regions (&regions);
115 free (primary);
116 free (secondary);
117 if (ebr) {
118 for (i = 0; i < nr_files-3; ++i)
119 free (ebr[i]);
120 free (ebr);
124 static int
125 partitioning_config (const char *key, const char *value)
127 struct file *p;
128 struct file file;
129 size_t i;
130 int err;
132 if (strcmp (key, "file") == 0) {
133 file.filename = value;
134 file.alignment = alignment;
135 file.mbr_id = mbr_id;
136 memcpy (file.type_guid, type_guid, sizeof type_guid);
138 file.fd = open (file.filename, O_RDWR);
139 if (file.fd == -1) {
140 nbdkit_error ("%s: %m", file.filename);
141 return -1;
143 if (fstat (file.fd, &file.statbuf) == -1) {
144 err = errno;
145 close (file.fd);
146 errno = err;
147 nbdkit_error ("%s: stat: %m", file.filename);
148 return -1;
151 if (file.statbuf.st_size == 0) {
152 nbdkit_error ("%s: zero length partitions are not allowed",
153 file.filename);
154 return -1;
157 /* Create a random GUID used as "Unique partition GUID". However
158 * this doesn't follow GUID conventions so in theory could make an
159 * invalid value. This is only used by GPT, and we store it in
160 * the file structure because it must be the same across primary
161 * and secondary PT entries.
163 for (i = 0; i < 16; ++i)
164 file.guid[i] = xrandom (&random_state) & 0xff;
166 p = realloc (files, (nr_files+1) * sizeof (struct file));
167 if (p == NULL) {
168 err = errno;
169 close (file.fd);
170 errno = err;
171 nbdkit_error ("realloc: %m");
172 return -1;
174 files = p;
175 files[nr_files] = file;
176 nr_files++;
178 else if (strcmp (key, "partition-type") == 0) {
179 if (strcasecmp (value, "mbr") == 0 || strcasecmp (value, "dos") == 0)
180 parttype = PARTTYPE_MBR;
181 else if (strcasecmp (value, "gpt") == 0)
182 parttype = PARTTYPE_GPT;
183 else {
184 nbdkit_error ("unknown partition-type: %s", value);
185 return -1;
188 else if (strcmp (key, "alignment") == 0) {
189 int64_t r;
191 r = nbdkit_parse_size (value);
192 if (r == -1)
193 return -1;
195 if (!(r >= SECTOR_SIZE && r <= MAX_ALIGNMENT)) {
196 nbdkit_error ("partition alignment %" PRIi64 " should be "
197 ">= sector size %" PRIu64 " and "
198 "<= maximum alignment %" PRIu64,
199 r, SECTOR_SIZE, MAX_ALIGNMENT);
200 return -1;
202 if (!IS_ALIGNED (r, SECTOR_SIZE)) {
203 nbdkit_error ("partition alignment %" PRIi64 " should be "
204 "a multiple of sector size %" PRIu64,
205 r, SECTOR_SIZE);
206 return -1;
209 alignment = r;
211 else if (strcmp (key, "mbr-id") == 0) {
212 if (strcasecmp (value, "default") == 0)
213 mbr_id = DEFAULT_MBR_ID;
214 else if (nbdkit_parse_uint8_t ("mbr-id", value, &mbr_id) == -1)
215 return -1;
217 else if (strcmp (key, "type-guid") == 0) {
218 if (strcasecmp (value, "default") == 0)
219 parse_guid (DEFAULT_TYPE_GUID, type_guid);
220 else if (parse_guid (value, type_guid) == -1) {
221 nbdkit_error ("could not validate GUID: %s", value);
222 return -1;
225 else {
226 nbdkit_error ("unknown parameter '%s'", key);
227 return -1;
230 return 0;
233 static int
234 partitioning_config_complete (void)
236 size_t i;
237 uint64_t total_size;
238 bool needs_gpt;
240 /* Not enough / too many files? */
241 if (nr_files == 0) {
242 nbdkit_error ("at least one file= parameter must be supplied");
243 return -1;
246 total_size = 0;
247 for (i = 0; i < nr_files; ++i)
248 total_size += files[i].statbuf.st_size;
249 needs_gpt = total_size > MAX_MBR_DISK_SIZE;
251 /* Choose default parttype if not set. */
252 if (parttype == PARTTYPE_UNSET) {
253 if (needs_gpt || nr_files > 4) {
254 parttype = PARTTYPE_GPT;
255 nbdkit_debug ("picking partition type GPT");
257 else {
258 parttype = PARTTYPE_MBR;
259 nbdkit_debug ("picking partition type MBR");
262 else if (parttype == PARTTYPE_MBR && needs_gpt) {
263 nbdkit_error ("MBR partition table type supports "
264 "a maximum virtual disk size of about 2 TB, "
265 "but you requested %zu partition(s) "
266 "and a total size of %" PRIu64 " bytes (> %" PRIu64 "). "
267 "Try using: partition-type=gpt",
268 nr_files, total_size, (uint64_t) MAX_MBR_DISK_SIZE);
269 return -1;
272 return create_virtual_disk_layout ();
275 #define partitioning_config_help \
276 "file=<FILENAME> (required) File(s) containing partitions\n" \
277 "partition-type=mbr|gpt Partition type"
279 /* Create the per-connection handle. */
280 static void *
281 partitioning_open (int readonly)
283 return NBDKIT_HANDLE_NOT_NEEDED;
286 #define THREAD_MODEL NBDKIT_THREAD_MODEL_PARALLEL
288 /* Get the disk size. */
289 static int64_t
290 partitioning_get_size (void *handle)
292 return virtual_size (&regions);
295 /* Serves the same data over multiple connections. */
296 static int
297 partitioning_can_multi_conn (void *handle)
299 return 1;
302 /* Cache. */
303 static int
304 partitioning_can_cache (void *handle)
306 /* Let nbdkit call pread to populate the file system cache. */
307 return NBDKIT_CACHE_EMULATE;
310 /* Initial state. */
311 static int
312 partitioning_init_sparse (void *handle)
314 /* region_zero regions mean we are sparse, even if we don't yet
315 * support .extents
317 return 1;
320 /* Read data. */
321 static int
322 partitioning_pread (void *handle, void *buf, uint32_t count, uint64_t offset)
324 while (count > 0) {
325 const struct region *region = find_region (&regions, offset);
326 size_t i, len;
327 ssize_t r;
329 /* Length to end of region. */
330 len = region->end - offset + 1;
331 if (len > count)
332 len = count;
334 switch (region->type) {
335 case region_file:
336 i = region->u.i;
337 assert (i < nr_files);
338 r = pread (files[i].fd, buf, len, offset - region->start);
339 if (r == -1) {
340 nbdkit_error ("pread: %s: %m", files[i].filename);
341 return -1;
343 if (r == 0) {
344 nbdkit_error ("pread: %s: unexpected end of file", files[i].filename);
345 return -1;
347 len = r;
348 break;
350 case region_data:
351 memcpy (buf, &region->u.data[offset - region->start], len);
352 break;
354 case region_zero:
355 memset (buf, 0, len);
356 break;
359 count -= len;
360 buf += len;
361 offset += len;
364 return 0;
367 /* Write data. */
368 static int
369 partitioning_pwrite (void *handle,
370 const void *buf, uint32_t count, uint64_t offset)
372 while (count > 0) {
373 const struct region *region = find_region (&regions, offset);
374 size_t i, len;
375 ssize_t r;
377 /* Length to end of region. */
378 len = region->end - offset + 1;
379 if (len > count)
380 len = count;
382 switch (region->type) {
383 case region_file:
384 i = region->u.i;
385 assert (i < nr_files);
386 r = pwrite (files[i].fd, buf, len, offset - region->start);
387 if (r == -1) {
388 nbdkit_error ("pwrite: %s: %m", files[i].filename);
389 return -1;
391 len = r;
392 break;
394 case region_data:
395 /* You can only write same data as already present. */
396 if (memcmp (&region->u.data[offset - region->start], buf, len) != 0) {
397 nbdkit_error ("attempt to change partition table of virtual disk");
398 errno = EIO;
399 return -1;
401 break;
403 case region_zero:
404 /* You can only write zeroes. */
405 if (!is_zero (buf, len)) {
406 nbdkit_error ("write non-zeroes to padding region");
407 errno = EIO;
408 return -1;
410 break;
413 count -= len;
414 buf += len;
415 offset += len;
418 return 0;
421 /* Flush. */
422 static int
423 partitioning_flush (void *handle)
425 size_t i;
427 for (i = 0; i < nr_files; ++i) {
428 if (fdatasync (files[i].fd) == -1) {
429 nbdkit_error ("fdatasync: %m");
430 return -1;
434 return 0;
437 static struct nbdkit_plugin plugin = {
438 .name = "partitioning",
439 .version = PACKAGE_VERSION,
440 .load = partitioning_load,
441 .unload = partitioning_unload,
442 .config = partitioning_config,
443 .config_complete = partitioning_config_complete,
444 .config_help = partitioning_config_help,
445 .magic_config_key = "file",
446 .open = partitioning_open,
447 .get_size = partitioning_get_size,
448 .can_multi_conn = partitioning_can_multi_conn,
449 .can_cache = partitioning_can_cache,
450 .init_sparse = partitioning_init_sparse,
451 .pread = partitioning_pread,
452 .pwrite = partitioning_pwrite,
453 .flush = partitioning_flush,
454 /* In this plugin, errno is preserved properly along error return
455 * paths from failed system calls.
457 .errno_is_preserved = 1,
460 NBDKIT_REGISTER_PLUGIN(plugin)