chunk-format.c

   1 #include "git-compat-util.h"
   2 #include "chunk-format.h"
   3 #include "csum-file.h"
   4 #include "gettext.h"
   5 #include "hash.h"
   6 #include "trace2.h"
   7
   8 /*
   9  * When writing a chunk-based file format, collect the chunks in
  10  * an array of chunk_info structs. The size stores the _expected_
  11  * amount of data that will be written by write_fn.
  12  */
  13 struct chunk_info {
  14         uint32_t id;
  15         uint64_t size;
  16         chunk_write_fn write_fn;
  17
  18         const void *start;
  19 };
  20
  21 struct chunkfile {
  22         struct hashfile *f;
  23
  24         struct chunk_info *chunks;
  25         size_t chunks_nr;
  26         size_t chunks_alloc;
  27 };
  28
  29 struct chunkfile *init_chunkfile(struct hashfile *f)
  30 {
  31         struct chunkfile *cf = xcalloc(1, sizeof(*cf));
  32         cf->f = f;
  33         return cf;
  34 }
  35
  36 void free_chunkfile(struct chunkfile *cf)
  37 {
  38         if (!cf)
  39                 return;
  40         free(cf->chunks);
  41         free(cf);
  42 }
  43
  44 int get_num_chunks(struct chunkfile *cf)
  45 {
  46         return cf->chunks_nr;
  47 }
  48
  49 void add_chunk(struct chunkfile *cf,
  50                uint32_t id,
  51                size_t size,
  52                chunk_write_fn fn)
  53 {
  54         ALLOC_GROW(cf->chunks, cf->chunks_nr + 1, cf->chunks_alloc);
  55
  56         cf->chunks[cf->chunks_nr].id = id;
  57         cf->chunks[cf->chunks_nr].write_fn = fn;
  58         cf->chunks[cf->chunks_nr].size = size;
  59         cf->chunks_nr++;
  60 }
  61
  62 int write_chunkfile(struct chunkfile *cf, void *data)
  63 {
  64         int i, result = 0;
  65         uint64_t cur_offset = hashfile_total(cf->f);
  66
  67         trace2_region_enter("chunkfile", "write", the_repository);
  68
  69         /* Add the table of contents to the current offset */
  70         cur_offset += (cf->chunks_nr + 1) * CHUNK_TOC_ENTRY_SIZE;
  71
  72         for (i = 0; i < cf->chunks_nr; i++) {
  73                 hashwrite_be32(cf->f, cf->chunks[i].id);
  74                 hashwrite_be64(cf->f, cur_offset);
  75
  76                 cur_offset += cf->chunks[i].size;
  77         }
  78
  79         /* Trailing entry marks the end of the chunks */
  80         hashwrite_be32(cf->f, 0);
  81         hashwrite_be64(cf->f, cur_offset);
  82
  83         for (i = 0; i < cf->chunks_nr; i++) {
  84                 off_t start_offset = hashfile_total(cf->f);
  85                 result = cf->chunks[i].write_fn(cf->f, data);
  86
  87                 if (result)
  88                         goto cleanup;
  89
  90                 if (hashfile_total(cf->f) - start_offset != cf->chunks[i].size)
  91                         BUG("expected to write %"PRId64" bytes to chunk %"PRIx32", but wrote %"PRId64" instead",
  92                             cf->chunks[i].size, cf->chunks[i].id,
  93                             hashfile_total(cf->f) - start_offset);
  94         }
  95
  96 cleanup:
  97         trace2_region_leave("chunkfile", "write", the_repository);
  98         return result;
  99 }
 100
 101 int read_table_of_contents(struct chunkfile *cf,
 102                            const unsigned char *mfile,
 103                            size_t mfile_size,
 104                            uint64_t toc_offset,
 105                            int toc_length,
 106                            unsigned expected_alignment)
 107 {
 108         int i;
 109         uint32_t chunk_id;
 110         const unsigned char *table_of_contents = mfile + toc_offset;
 111
 112         ALLOC_GROW(cf->chunks, toc_length, cf->chunks_alloc);
 113
 114         while (toc_length--) {
 115                 uint64_t chunk_offset, next_chunk_offset;
 116
 117                 chunk_id = get_be32(table_of_contents);
 118                 chunk_offset = get_be64(table_of_contents + 4);
 119
 120                 if (!chunk_id) {
 121                         error(_("terminating chunk id appears earlier than expected"));
 122                         return 1;
 123                 }
 124                 if (chunk_offset % expected_alignment != 0) {
 125                         error(_("chunk id %"PRIx32" not %d-byte aligned"),
 126                               chunk_id, expected_alignment);
 127                         return 1;
 128                 }
 129
 130                 table_of_contents += CHUNK_TOC_ENTRY_SIZE;
 131                 next_chunk_offset = get_be64(table_of_contents + 4);
 132
 133                 if (next_chunk_offset < chunk_offset ||
 134                     next_chunk_offset > mfile_size - the_hash_algo->rawsz) {
 135                         error(_("improper chunk offset(s) %"PRIx64" and %"PRIx64""),
 136                               chunk_offset, next_chunk_offset);
 137                         return -1;
 138                 }
 139
 140                 for (i = 0; i < cf->chunks_nr; i++) {
 141                         if (cf->chunks[i].id == chunk_id) {
 142                                 error(_("duplicate chunk ID %"PRIx32" found"),
 143                                         chunk_id);
 144                                 return -1;
 145                         }
 146                 }
 147
 148                 cf->chunks[cf->chunks_nr].id = chunk_id;
 149                 cf->chunks[cf->chunks_nr].start = mfile + chunk_offset;
 150                 cf->chunks[cf->chunks_nr].size = next_chunk_offset - chunk_offset;
 151                 cf->chunks_nr++;
 152         }
 153
 154         chunk_id = get_be32(table_of_contents);
 155         if (chunk_id) {
 156                 error(_("final chunk has non-zero id %"PRIx32""), chunk_id);
 157                 return -1;
 158         }
 159
 160         return 0;
 161 }
 162
 163 struct pair_chunk_data {
 164         const unsigned char **p;
 165         size_t *size;
 166 };
 167
 168 static int pair_chunk_fn(const unsigned char *chunk_start,
 169                          size_t chunk_size,
 170                          void *data)
 171 {
 172         struct pair_chunk_data *pcd = data;
 173         *pcd->p = chunk_start;
 174         *pcd->size = chunk_size;
 175         return 0;
 176 }
 177
 178 int pair_chunk(struct chunkfile *cf,
 179                uint32_t chunk_id,
 180                const unsigned char **p,
 181                size_t *size)
 182 {
 183         struct pair_chunk_data pcd = { .p = p, .size = size };
 184         return read_chunk(cf, chunk_id, pair_chunk_fn, &pcd);
 185 }
 186
 187 int read_chunk(struct chunkfile *cf,
 188                uint32_t chunk_id,
 189                chunk_read_fn fn,
 190                void *data)
 191 {
 192         int i;
 193
 194         for (i = 0; i < cf->chunks_nr; i++) {
 195                 if (cf->chunks[i].id == chunk_id)
 196                         return fn(cf->chunks[i].start, cf->chunks[i].size, data);
 197         }
 198
 199         return CHUNK_NOT_FOUND;
 200 }
 201
 202 uint8_t oid_version(const struct git_hash_algo *algop)
 203 {
 204         switch (hash_algo_by_ptr(algop)) {
 205         case GIT_HASH_SHA1:
 206                 return 1;
 207         case GIT_HASH_SHA256:
 208                 return 2;
 209         default:
 210                 die(_("invalid hash version"));
 211         }
 212 }