2 * hash.c : dumping and reading hash tables to/from files.
4 * ====================================================================
5 * Licensed to the Apache Software Foundation (ASF) under one
6 * or more contributor license agreements. See the NOTICE file
7 * distributed with this work for additional information
8 * regarding copyright ownership. The ASF licenses this file
9 * to you under the Apache License, Version 2.0 (the
10 * "License"); you may not use this file except in compliance
11 * with the License. You may obtain a copy of the License at
13 * http://www.apache.org/licenses/LICENSE-2.0
15 * Unless required by applicable law or agreed to in writing,
16 * software distributed under the License is distributed on an
17 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
18 * KIND, either express or implied. See the License for the
19 * specific language governing permissions and limitations
21 * ====================================================================
29 #include <apr_version.h>
30 #include <apr_pools.h>
32 #include <apr_file_io.h>
34 #include "svn_types.h"
35 #include "svn_string.h"
36 #include "svn_error.h"
38 #include "svn_sorts.h"
40 #include "svn_pools.h"
42 #include "private/svn_dep_compat.h"
43 #include "private/svn_subr_private.h"
45 #include "svn_private_config.h"
51 * The format of a dumped hash table is:
54 * name (a string of <nlength> bytes, followed by a newline)
56 * val (a string of <vlength> bytes, followed by a newline)
61 * (Yes, there is a newline after END.)
72 * A forthright entrance, yet coquettish on the tongue, its deceptively
73 * fruity exterior hides the warm mahagony undercurrent that is the
74 * hallmark of Chateau Fraisant-Pitre. Connoisseurs of the region will
75 * be pleased to note the familiar, subtle hints of mulberries and
76 * carburator fluid. Its confident finish is marred only by a barely
77 * detectable suggestion of rancid squid ink.
89 /*** Dumping and loading hash files. */
91 /* Implements svn_hash_read2 and svn_hash_read_incremental. */
93 hash_read(apr_hash_t
*hash
, svn_stream_t
*stream
, const char *terminator
,
94 svn_boolean_t incremental
, apr_pool_t
*pool
)
98 apr_size_t len
, keylen
, vallen
;
99 char c
, *keybuf
, *valbuf
;
100 apr_pool_t
*iterpool
= svn_pool_create(pool
);
107 svn_pool_clear(iterpool
);
109 /* Read a key length line. Might be END, though. */
110 SVN_ERR(svn_stream_readline(stream
, &buf
, "\n", &eof
, iterpool
));
112 /* Check for the end of the hash. */
113 if ((!terminator
&& eof
&& buf
->len
== 0)
114 || (terminator
&& (strcmp(buf
->data
, terminator
) == 0)))
117 /* Check for unexpected end of stream */
119 return svn_error_create(SVN_ERR_MALFORMED_FILE
, NULL
,
120 _("Serialized hash missing terminator"));
122 if ((buf
->len
>= 3) && (buf
->data
[0] == 'K') && (buf
->data
[1] == ' '))
124 /* Get the length of the key */
125 err
= svn_cstring_strtoui64(&ui64
, buf
->data
+ 2,
126 0, APR_SIZE_MAX
, 10);
128 return svn_error_create(SVN_ERR_MALFORMED_FILE
, err
,
129 _("Serialized hash malformed"));
130 keylen
= (apr_size_t
)ui64
;
132 /* Now read that much into a buffer. */
133 keybuf
= apr_palloc(pool
, keylen
+ 1);
134 SVN_ERR(svn_stream_read(stream
, keybuf
, &keylen
));
135 keybuf
[keylen
] = '\0';
137 /* Suck up extra newline after key data */
139 SVN_ERR(svn_stream_read(stream
, &c
, &len
));
141 return svn_error_create(SVN_ERR_MALFORMED_FILE
, NULL
,
142 _("Serialized hash malformed"));
144 /* Read a val length line */
145 SVN_ERR(svn_stream_readline(stream
, &buf
, "\n", &eof
, iterpool
));
147 if ((buf
->data
[0] == 'V') && (buf
->data
[1] == ' '))
149 err
= svn_cstring_strtoui64(&ui64
, buf
->data
+ 2,
150 0, APR_SIZE_MAX
, 10);
152 return svn_error_create(SVN_ERR_MALFORMED_FILE
, err
,
153 _("Serialized hash malformed"));
154 vallen
= (apr_size_t
)ui64
;
156 valbuf
= apr_palloc(iterpool
, vallen
+ 1);
157 SVN_ERR(svn_stream_read(stream
, valbuf
, &vallen
));
158 valbuf
[vallen
] = '\0';
160 /* Suck up extra newline after val data */
162 SVN_ERR(svn_stream_read(stream
, &c
, &len
));
164 return svn_error_create(SVN_ERR_MALFORMED_FILE
, NULL
,
165 _("Serialized hash malformed"));
167 /* Add a new hash entry. */
168 apr_hash_set(hash
, keybuf
, keylen
,
169 svn_string_ncreate(valbuf
, vallen
, pool
));
172 return svn_error_create(SVN_ERR_MALFORMED_FILE
, NULL
,
173 _("Serialized hash malformed"));
175 else if (incremental
&& (buf
->len
>= 3)
176 && (buf
->data
[0] == 'D') && (buf
->data
[1] == ' '))
178 /* Get the length of the key */
179 err
= svn_cstring_strtoui64(&ui64
, buf
->data
+ 2,
180 0, APR_SIZE_MAX
, 10);
182 return svn_error_create(SVN_ERR_MALFORMED_FILE
, err
,
183 _("Serialized hash malformed"));
184 keylen
= (apr_size_t
)ui64
;
186 /* Now read that much into a buffer. */
187 keybuf
= apr_palloc(iterpool
, keylen
+ 1);
188 SVN_ERR(svn_stream_read(stream
, keybuf
, &keylen
));
189 keybuf
[keylen
] = '\0';
191 /* Suck up extra newline after key data */
193 SVN_ERR(svn_stream_read(stream
, &c
, &len
));
195 return svn_error_create(SVN_ERR_MALFORMED_FILE
, NULL
,
196 _("Serialized hash malformed"));
198 /* Remove this hash entry. */
199 apr_hash_set(hash
, keybuf
, keylen
, NULL
);
203 return svn_error_create(SVN_ERR_MALFORMED_FILE
, NULL
,
204 _("Serialized hash malformed"));
208 svn_pool_destroy(iterpool
);
213 /* Implements svn_hash_write2 and svn_hash_write_incremental. */
215 hash_write(apr_hash_t
*hash
, apr_hash_t
*oldhash
, svn_stream_t
*stream
,
216 const char *terminator
, apr_pool_t
*pool
)
220 apr_array_header_t
*list
;
223 subpool
= svn_pool_create(pool
);
225 list
= svn_sort__hash(hash
, svn_sort_compare_items_lexically
, pool
);
226 for (i
= 0; i
< list
->nelts
; i
++)
228 svn_sort__item_t
*item
= &APR_ARRAY_IDX(list
, i
, svn_sort__item_t
);
229 svn_string_t
*valstr
= item
->value
;
231 svn_pool_clear(subpool
);
233 /* Don't output entries equal to the ones in oldhash, if present. */
236 svn_string_t
*oldstr
= apr_hash_get(oldhash
, item
->key
, item
->klen
);
238 if (oldstr
&& svn_string_compare(valstr
, oldstr
))
243 return svn_error_create(SVN_ERR_MALFORMED_FILE
, NULL
,
244 _("Cannot serialize negative length"));
247 SVN_ERR(svn_stream_printf(stream
, subpool
,
248 "K %" APR_SIZE_T_FMT
"\n%s\n"
249 "V %" APR_SIZE_T_FMT
"\n",
250 (apr_size_t
) item
->klen
,
251 (const char *) item
->key
,
254 SVN_ERR(svn_stream_write(stream
, valstr
->data
, &len
));
255 SVN_ERR(svn_stream_puts(stream
, "\n"));
260 /* Output a deletion entry for each property in oldhash but not hash. */
261 list
= svn_sort__hash(oldhash
, svn_sort_compare_items_lexically
,
263 for (i
= 0; i
< list
->nelts
; i
++)
265 svn_sort__item_t
*item
= &APR_ARRAY_IDX(list
, i
, svn_sort__item_t
);
267 svn_pool_clear(subpool
);
269 /* If it's not present in the new hash, write out a D entry. */
270 if (! apr_hash_get(hash
, item
->key
, item
->klen
))
271 SVN_ERR(svn_stream_printf(stream
, subpool
,
272 "D %" APR_SSIZE_T_FMT
"\n%s\n",
273 item
->klen
, (const char *) item
->key
));
278 SVN_ERR(svn_stream_printf(stream
, subpool
, "%s\n", terminator
));
280 svn_pool_destroy(subpool
);
285 svn_error_t
*svn_hash_read2(apr_hash_t
*hash
, svn_stream_t
*stream
,
286 const char *terminator
, apr_pool_t
*pool
)
288 return hash_read(hash
, stream
, terminator
, FALSE
, pool
);
292 svn_error_t
*svn_hash_read_incremental(apr_hash_t
*hash
,
293 svn_stream_t
*stream
,
294 const char *terminator
,
297 return hash_read(hash
, stream
, terminator
, TRUE
, pool
);
302 svn_hash_write2(apr_hash_t
*hash
, svn_stream_t
*stream
,
303 const char *terminator
, apr_pool_t
*pool
)
305 return hash_write(hash
, NULL
, stream
, terminator
, pool
);
310 svn_hash_write_incremental(apr_hash_t
*hash
, apr_hash_t
*oldhash
,
311 svn_stream_t
*stream
, const char *terminator
,
314 SVN_ERR_ASSERT(oldhash
!= NULL
);
315 return hash_write(hash
, oldhash
, stream
, terminator
, pool
);
320 svn_hash_write(apr_hash_t
*hash
, apr_file_t
*destfile
, apr_pool_t
*pool
)
322 return hash_write(hash
, NULL
, svn_stream_from_aprfile2(destfile
, TRUE
, pool
),
323 SVN_HASH_TERMINATOR
, pool
);
327 /* There are enough quirks in the deprecated svn_hash_read that we
328 should just preserve its implementation. */
330 svn_hash_read(apr_hash_t
*hash
,
335 char buf
[SVN_KEYLINE_MAXLEN
];
343 /* Read a key length line. Might be END, though. */
344 apr_size_t len
= sizeof(buf
);
346 err
= svn_io_read_length_line(srcfile
, buf
, &len
, pool
);
347 if (err
&& APR_STATUS_IS_EOF(err
->apr_err
) && first_time
)
349 /* We got an EOF on our very first attempt to read, which
350 means it's a zero-byte file. No problem, just go home. */
351 svn_error_clear(err
);
355 /* Any other circumstance is a genuine error. */
360 if (((len
== 3) && (buf
[0] == 'E') && (buf
[1] == 'N') && (buf
[2] == 'D'))
363 && (buf
[1] == 'R') /* We formerly used just "END" to */
364 && (buf
[2] == 'O') /* end a property hash, but later */
365 && (buf
[3] == 'P') /* we added "PROPS-END", so that */
366 && (buf
[4] == 'S') /* the fs dump format would be */
367 && (buf
[5] == '-') /* more human-readable. That's */
368 && (buf
[6] == 'E') /* why we accept either way here. */
372 /* We've reached the end of the dumped hash table, so leave. */
375 else if ((buf
[0] == 'K') && (buf
[1] == ' '))
381 /* Get the length of the key */
382 SVN_ERR(svn_cstring_atoi(&parsed_len
, buf
+ 2));
385 /* Now read that much into a buffer, + 1 byte for null terminator */
386 keybuf
= apr_palloc(pool
, keylen
+ 1);
387 SVN_ERR(svn_io_file_read_full2(srcfile
,
389 &num_read
, NULL
, pool
));
390 ((char *) keybuf
)[keylen
] = '\0';
392 /* Suck up extra newline after key data */
393 SVN_ERR(svn_io_file_getc(&c
, srcfile
, pool
));
395 return svn_error_create(SVN_ERR_MALFORMED_FILE
, NULL
, NULL
);
397 /* Read a val length line */
399 SVN_ERR(svn_io_read_length_line(srcfile
, buf
, &len
, pool
));
401 if ((buf
[0] == 'V') && (buf
[1] == ' '))
403 svn_string_t
*value
= apr_palloc(pool
, sizeof(*value
));
407 /* Get the length of the value */
408 SVN_ERR(svn_cstring_atoi(&parsed_len
, buf
+ 2));
411 /* Again, 1 extra byte for the null termination. */
412 valbuf
= apr_palloc(pool
, vallen
+ 1);
413 SVN_ERR(svn_io_file_read_full2(srcfile
,
415 &num_read
, NULL
, pool
));
416 ((char *) valbuf
)[vallen
] = '\0';
418 /* Suck up extra newline after val data */
419 SVN_ERR(svn_io_file_getc(&c
, srcfile
, pool
));
421 return svn_error_create(SVN_ERR_MALFORMED_FILE
, NULL
, NULL
);
423 value
->data
= valbuf
;
426 /* The Grand Moment: add a new hash entry! */
427 apr_hash_set(hash
, keybuf
, keylen
, value
);
431 return svn_error_create(SVN_ERR_MALFORMED_FILE
, NULL
, NULL
);
436 return svn_error_create(SVN_ERR_MALFORMED_FILE
, NULL
, NULL
);
443 /*** Diffing hashes ***/
446 svn_hash_diff(apr_hash_t
*hash_a
,
448 svn_hash_diff_func_t diff_func
,
449 void *diff_func_baton
,
452 apr_hash_index_t
*hi
;
455 for (hi
= apr_hash_first(pool
, hash_a
); hi
; hi
= apr_hash_next(hi
))
460 apr_hash_this(hi
, &key
, &klen
, NULL
);
462 if (hash_b
&& (apr_hash_get(hash_b
, key
, klen
)))
463 SVN_ERR((*diff_func
)(key
, klen
, svn_hash_diff_key_both
,
466 SVN_ERR((*diff_func
)(key
, klen
, svn_hash_diff_key_a
,
471 for (hi
= apr_hash_first(pool
, hash_b
); hi
; hi
= apr_hash_next(hi
))
476 apr_hash_this(hi
, &key
, &klen
, NULL
);
478 if (! (hash_a
&& apr_hash_get(hash_a
, key
, klen
)))
479 SVN_ERR((*diff_func
)(key
, klen
, svn_hash_diff_key_b
,
487 /*** Misc. hash APIs ***/
490 svn_hash_keys(apr_array_header_t
**array
,
494 apr_hash_index_t
*hi
;
496 *array
= apr_array_make(pool
, apr_hash_count(hash
), sizeof(const char *));
498 for (hi
= apr_hash_first(pool
, hash
); hi
; hi
= apr_hash_next(hi
))
500 APR_ARRAY_PUSH(*array
, const char *) = svn__apr_hash_index_key(hi
);
508 svn_hash_from_cstring_keys(apr_hash_t
**hash_p
,
509 const apr_array_header_t
*keys
,
513 apr_hash_t
*hash
= svn_hash__make(pool
);
514 for (i
= 0; i
< keys
->nelts
; i
++)
517 apr_pstrdup(pool
, APR_ARRAY_IDX(keys
, i
, const char *));
518 svn_hash_sets(hash
, key
, key
);
525 #if !APR_VERSION_AT_LEAST(1, 3, 0)
527 svn_hash__clear(apr_hash_t
*hash
)
529 apr_hash_index_t
*hi
;
533 for (hi
= apr_hash_first(NULL
, hash
); hi
; hi
= apr_hash_next(hi
))
535 apr_hash_this(hi
, &key
, &klen
, NULL
);
536 apr_hash_set(hash
, key
, klen
, NULL
);
543 /*** Specialized getter APIs ***/
546 svn_hash__get_cstring(apr_hash_t
*hash
,
548 const char *default_value
)
552 const char *value
= svn_hash_gets(hash
, key
);
553 return value
? value
: default_value
;
556 return default_value
;
561 svn_hash__get_bool(apr_hash_t
*hash
, const char *key
,
562 svn_boolean_t default_value
)
564 const char *tmp_value
= svn_hash__get_cstring(hash
, key
, NULL
);
565 svn_tristate_t value
= svn_tristate__from_word(tmp_value
);
567 if (value
== svn_tristate_true
)
569 else if (value
== svn_tristate_false
)
572 return default_value
;
577 /*** Optimized hash function ***/
579 /* Optimized version of apr_hashfunc_default in APR 1.4.5 and earlier.
580 * It assumes that the CPU has 32-bit multiplications with high throughput
581 * of at least 1 operation every 3 cycles. Latency is not an issue. Another
582 * optimization is a mildly unrolled main loop and breaking the dependency
583 * chain within the loop.
585 * Note that most CPUs including Intel Atom, VIA Nano, ARM feature the
586 * assumed pipelined multiplication circuitry. They can do one MUL every
587 * or every other cycle.
589 * The performance is ultimately limited by the fact that most CPUs can
590 * do only one LOAD and only one BRANCH operation per cycle. The best we
591 * can do is to process one character per cycle - provided the processor
592 * is wide enough to do 1 LOAD, COMPARE, BRANCH, MUL and ADD per cycle.
595 hashfunc_compatible(const char *char_key
, apr_ssize_t
*klen
)
597 unsigned int hash
= 0;
598 const unsigned char *key
= (const unsigned char *)char_key
;
599 const unsigned char *p
;
602 if (*klen
== APR_HASH_KEY_STRING
)
604 for (p
= key
; ; p
+=4)
606 unsigned int new_hash
= hash
* 33 * 33 * 33 * 33;
608 new_hash
+= p
[0] * 33 * 33 * 33;
610 new_hash
+= p
[1] * 33 * 33;
612 new_hash
+= p
[2] * 33;
614 hash
= new_hash
+ p
[3];
617 hash
= hash
* 33 + *p
;
623 for (p
= key
, i
= *klen
; i
>= 4; i
-=4, p
+=4)
625 hash
= hash
* 33 * 33 * 33 * 33
626 + p
[0] * 33 * 33 * 33
632 hash
= hash
* 33 + *p
;
639 svn_hash__make(apr_pool_t
*pool
)
641 return apr_hash_make_custom(pool
, hashfunc_compatible
);