chunk-format.c

   1 #include "cache.h"
   2 #include "chunk-format.h"
   3 #include "csum-file.h"
   4
   5 /*
   6  * When writing a chunk-based file format, collect the chunks in
   7  * an array of chunk_info structs. The size stores the _expected_
   8  * amount of data that will be written by write_fn.
   9  */
  10 struct chunk_info {
  11         uint32_t id;
  12         uint64_t size;
  13         chunk_write_fn write_fn;
  14
  15         const void *start;
  16 };
  17
  18 struct chunkfile {
  19         struct hashfile *f;
  20
  21         struct chunk_info *chunks;
  22         size_t chunks_nr;
  23         size_t chunks_alloc;
  24 };
  25
  26 struct chunkfile *init_chunkfile(struct hashfile *f)
  27 {
  28         struct chunkfile *cf = xcalloc(1, sizeof(*cf));
  29         cf->f = f;
  30         return cf;
  31 }
  32
  33 void free_chunkfile(struct chunkfile *cf)
  34 {
  35         if (!cf)
  36                 return;
  37         free(cf->chunks);
  38         free(cf);
  39 }
  40
  41 int get_num_chunks(struct chunkfile *cf)
  42 {
  43         return cf->chunks_nr;
  44 }
  45
  46 void add_chunk(struct chunkfile *cf,
  47                uint32_t id,
  48                size_t size,
  49                chunk_write_fn fn)
  50 {
  51         ALLOC_GROW(cf->chunks, cf->chunks_nr + 1, cf->chunks_alloc);
  52
  53         cf->chunks[cf->chunks_nr].id = id;
  54         cf->chunks[cf->chunks_nr].write_fn = fn;
  55         cf->chunks[cf->chunks_nr].size = size;
  56         cf->chunks_nr++;
  57 }
  58
  59 int write_chunkfile(struct chunkfile *cf, void *data)
  60 {
  61         int i, result = 0;
  62         uint64_t cur_offset = hashfile_total(cf->f);
  63
  64         trace2_region_enter("chunkfile", "write", the_repository);
  65
  66         /* Add the table of contents to the current offset */
  67         cur_offset += (cf->chunks_nr + 1) * CHUNK_TOC_ENTRY_SIZE;
  68
  69         for (i = 0; i < cf->chunks_nr; i++) {
  70                 hashwrite_be32(cf->f, cf->chunks[i].id);
  71                 hashwrite_be64(cf->f, cur_offset);
  72
  73                 cur_offset += cf->chunks[i].size;
  74         }
  75
  76         /* Trailing entry marks the end of the chunks */
  77         hashwrite_be32(cf->f, 0);
  78         hashwrite_be64(cf->f, cur_offset);
  79
  80         for (i = 0; i < cf->chunks_nr; i++) {
  81                 off_t start_offset = hashfile_total(cf->f);
  82                 result = cf->chunks[i].write_fn(cf->f, data);
  83
  84                 if (result)
  85                         goto cleanup;
  86
  87                 if (hashfile_total(cf->f) - start_offset != cf->chunks[i].size)
  88                         BUG("expected to write %"PRId64" bytes to chunk %"PRIx32", but wrote %"PRId64" instead",
  89                             cf->chunks[i].size, cf->chunks[i].id,
  90                             hashfile_total(cf->f) - start_offset);
  91         }
  92
  93 cleanup:
  94         trace2_region_leave("chunkfile", "write", the_repository);
  95         return result;
  96 }
  97
  98 int read_table_of_contents(struct chunkfile *cf,
  99                            const unsigned char *mfile,
 100                            size_t mfile_size,
 101                            uint64_t toc_offset,
 102                            int toc_length)
 103 {
 104         int i;
 105         uint32_t chunk_id;
 106         const unsigned char *table_of_contents = mfile + toc_offset;
 107
 108         ALLOC_GROW(cf->chunks, toc_length, cf->chunks_alloc);
 109
 110         while (toc_length--) {
 111                 uint64_t chunk_offset, next_chunk_offset;
 112
 113                 chunk_id = get_be32(table_of_contents);
 114                 chunk_offset = get_be64(table_of_contents + 4);
 115
 116                 if (!chunk_id) {
 117                         error(_("terminating chunk id appears earlier than expected"));
 118                         return 1;
 119                 }
 120
 121                 table_of_contents += CHUNK_TOC_ENTRY_SIZE;
 122                 next_chunk_offset = get_be64(table_of_contents + 4);
 123
 124                 if (next_chunk_offset < chunk_offset ||
 125                     next_chunk_offset > mfile_size - the_hash_algo->rawsz) {
 126                         error(_("improper chunk offset(s) %"PRIx64" and %"PRIx64""),
 127                               chunk_offset, next_chunk_offset);
 128                         return -1;
 129                 }
 130
 131                 for (i = 0; i < cf->chunks_nr; i++) {
 132                         if (cf->chunks[i].id == chunk_id) {
 133                                 error(_("duplicate chunk ID %"PRIx32" found"),
 134                                         chunk_id);
 135                                 return -1;
 136                         }
 137                 }
 138
 139                 cf->chunks[cf->chunks_nr].id = chunk_id;
 140                 cf->chunks[cf->chunks_nr].start = mfile + chunk_offset;
 141                 cf->chunks[cf->chunks_nr].size = next_chunk_offset - chunk_offset;
 142                 cf->chunks_nr++;
 143         }
 144
 145         chunk_id = get_be32(table_of_contents);
 146         if (chunk_id) {
 147                 error(_("final chunk has non-zero id %"PRIx32""), chunk_id);
 148                 return -1;
 149         }
 150
 151         return 0;
 152 }
 153
 154 static int pair_chunk_fn(const unsigned char *chunk_start,
 155                          size_t chunk_size,
 156                          void *data)
 157 {
 158         const unsigned char **p = data;
 159         *p = chunk_start;
 160         return 0;
 161 }
 162
 163 int pair_chunk(struct chunkfile *cf,
 164                uint32_t chunk_id,
 165                const unsigned char **p)
 166 {
 167         return read_chunk(cf, chunk_id, pair_chunk_fn, p);
 168 }
 169
 170 int read_chunk(struct chunkfile *cf,
 171                uint32_t chunk_id,
 172                chunk_read_fn fn,
 173                void *data)
 174 {
 175         int i;
 176
 177         for (i = 0; i < cf->chunks_nr; i++) {
 178                 if (cf->chunks[i].id == chunk_id)
 179                         return fn(cf->chunks[i].start, cf->chunks[i].size, data);
 180         }
 181
 182         return CHUNK_NOT_FOUND;
 183 }
 184
 185 uint8_t oid_version(const struct git_hash_algo *algop)
 186 {
 187         switch (hash_algo_by_ptr(algop)) {
 188         case GIT_HASH_SHA1:
 189                 return 1;
 190         case GIT_HASH_SHA256:
 191                 return 2;
 192         default:
 193                 die(_("invalid hash version"));
 194         }
 195 }