Change default sync hash table sizing to 66% full
[pacman-ng.git] / lib / libalpm / be_sync.c
blob4ad045c29aa380a687e88032392cb99dc517325b
1 /*
2 * be_sync.c
4 * Copyright (c) 2006-2011 Pacman Development Team <pacman-dev@archlinux.org>
5 * Copyright (c) 2002-2006 by Judd Vinet <jvinet@zeroflux.org>
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program. If not, see <http://www.gnu.org/licenses/>.
21 #include "config.h"
23 #include <errno.h>
24 #include <limits.h>
26 /* libarchive */
27 #include <archive.h>
28 #include <archive_entry.h>
30 /* libalpm */
31 #include "util.h"
32 #include "log.h"
33 #include "alpm.h"
34 #include "alpm_list.h"
35 #include "package.h"
36 #include "handle.h"
37 #include "delta.h"
38 #include "deps.h"
39 #include "dload.h"
41 /** Update a package database
43 * An update of the package database \a db will be attempted. Unless
44 * \a force is true, the update will only be performed if the remote
45 * database was modified since the last update.
47 * A transaction is necessary for this operation, in order to obtain a
48 * database lock. During this transaction the front-end will be informed
49 * of the download progress of the database via the download callback.
51 * Example:
52 * @code
53 * alpm_list_t *syncs = alpm_option_get_syncdbs();
54 * if(alpm_trans_init(0, NULL, NULL, NULL) == 0) {
55 * for(i = syncs; i; i = alpm_list_next(i)) {
56 * pmdb_t *db = alpm_list_getdata(i);
57 * result = alpm_db_update(0, db);
58 * alpm_trans_release();
60 * if(result < 0) {
61 * printf("Unable to update database: %s\n", alpm_strerrorlast());
62 * } else if(result == 1) {
63 * printf("Database already up to date\n");
64 * } else {
65 * printf("Database updated\n");
66 * }
67 * }
68 * }
69 * @endcode
71 * @ingroup alpm_databases
72 * @note After a successful update, the \link alpm_db_get_pkgcache()
73 * package cache \endlink will be invalidated
74 * @param force if true, then forces the update, otherwise update only in case
75 * the database isn't up to date
76 * @param db pointer to the package database to update
77 * @return 0 on success, -1 on error (pm_errno is set accordingly), 1 if up to
78 * to date
80 int SYMEXPORT alpm_db_update(int force, pmdb_t *db)
82 char *dbfile, *syncpath;
83 const char *dbpath;
84 struct stat buf;
85 size_t len;
86 int ret;
88 ALPM_LOG_FUNC;
90 /* Sanity checks */
91 ASSERT(handle != NULL, RET_ERR(PM_ERR_HANDLE_NULL, -1));
92 ASSERT(db != NULL && db != handle->db_local, RET_ERR(PM_ERR_WRONG_ARGS, -1));
94 if(!alpm_list_find_ptr(handle->dbs_sync, db)) {
95 RET_ERR(PM_ERR_DB_NOT_FOUND, -1);
98 len = strlen(db->treename) + 4;
99 MALLOC(dbfile, len, RET_ERR(PM_ERR_MEMORY, -1));
100 sprintf(dbfile, "%s.db", db->treename);
102 dbpath = alpm_option_get_dbpath();
103 len = strlen(dbpath) + 6;
104 MALLOC(syncpath, len, RET_ERR(PM_ERR_MEMORY, -1));
105 sprintf(syncpath, "%s%s", dbpath, "sync/");
107 if(stat(syncpath, &buf) != 0) {
108 _alpm_log(PM_LOG_DEBUG, "database dir '%s' does not exist, creating it\n",
109 syncpath);
110 if(_alpm_makepath(syncpath) != 0) {
111 free(dbfile);
112 free(syncpath);
113 RET_ERR(PM_ERR_SYSTEM, -1);
115 } else if(!S_ISDIR(buf.st_mode)) {
116 _alpm_log(PM_LOG_WARNING, _("removing invalid file: %s\n"), syncpath);
117 if(unlink(syncpath) != 0 || _alpm_makepath(syncpath) != 0) {
118 free(dbfile);
119 free(syncpath);
120 RET_ERR(PM_ERR_SYSTEM, -1);
124 ret = _alpm_download_single_file(dbfile, db->servers, syncpath, force);
125 free(dbfile);
126 free(syncpath);
128 if(ret == 1) {
129 /* files match, do nothing */
130 pm_errno = 0;
131 return(1);
132 } else if(ret == -1) {
133 /* pm_errno was set by the download code */
134 _alpm_log(PM_LOG_DEBUG, "failed to sync db: %s\n", alpm_strerrorlast());
135 return(-1);
138 /* Cache needs to be rebuilt */
139 _alpm_db_free_pkgcache(db);
141 return(0);
144 /* Forward decl so I don't reorganize the whole file right now */
145 static int sync_db_read(pmdb_t *db, struct archive *archive,
146 struct archive_entry *entry, pmpkg_t *likely_pkg);
149 * This is the data table used to generate the estimating function below.
150 * "Weighted Avg" means averaging the bottom table values; thus each repo, big
151 * or small, will have equal influence. "Unweighted Avg" means averaging the
152 * sums of the top table columns, thus each package has equal influence. The
153 * final values are calculated by (surprise) averaging the averages, because
154 * why the hell not.
156 * Database Pkgs tar bz2 gz xz
157 * community 2096 5294080 256391 421227 301296
158 * core 180 460800 25257 36850 29356
159 * extra 2606 6635520 294647 470818 339392
160 * multilib 126 327680 16120 23261 18732
161 * testing 76 204800 10902 14348 12100
163 * Bytes Per Package
164 * community 2096 2525.80 122.32 200.97 143.75
165 * core 180 2560.00 140.32 204.72 163.09
166 * extra 2606 2546.25 113.06 180.67 130.23
167 * multilib 126 2600.63 127.94 184.61 148.67
168 * testing 76 2694.74 143.45 188.79 159.21
170 * Weighted Avg 2585.48 129.42 191.95 148.99
171 * Unweighted Avg 2543.39 118.74 190.16 137.93
172 * Average of Avgs 2564.44 124.08 191.06 143.46
174 static int estimate_package_count(struct stat *st, struct archive *archive)
176 unsigned int per_package;
178 switch(archive_compression(archive)) {
179 case ARCHIVE_COMPRESSION_NONE:
180 per_package = 2564;
181 break;
182 case ARCHIVE_COMPRESSION_GZIP:
183 per_package = 191;
184 break;
185 case ARCHIVE_COMPRESSION_BZIP2:
186 per_package = 124;
187 break;
188 case ARCHIVE_COMPRESSION_COMPRESS:
189 per_package = 193;
190 break;
191 case ARCHIVE_COMPRESSION_LZMA:
192 case ARCHIVE_COMPRESSION_XZ:
193 per_package = 143;
194 break;
195 case ARCHIVE_COMPRESSION_UU:
196 per_package = 3543;
197 break;
198 default:
199 /* assume it is at least somewhat compressed */
200 per_package = 200;
202 return((int)(st->st_size / per_package) + 1);
205 static int sync_db_populate(pmdb_t *db)
207 int est_count, count = 0;
208 struct stat buf;
209 struct archive *archive;
210 struct archive_entry *entry;
211 pmpkg_t *pkg = NULL;
213 ALPM_LOG_FUNC;
215 ASSERT(db != NULL, RET_ERR(PM_ERR_DB_NULL, -1));
217 if((archive = archive_read_new()) == NULL)
218 RET_ERR(PM_ERR_LIBARCHIVE, 1);
220 archive_read_support_compression_all(archive);
221 archive_read_support_format_all(archive);
223 if(archive_read_open_filename(archive, _alpm_db_path(db),
224 ARCHIVE_DEFAULT_BYTES_PER_BLOCK) != ARCHIVE_OK) {
225 _alpm_log(PM_LOG_ERROR, _("could not open %s: %s\n"), _alpm_db_path(db),
226 archive_error_string(archive));
227 archive_read_finish(archive);
228 RET_ERR(PM_ERR_DB_OPEN, 1);
230 if(lstat(_alpm_db_path(db), &buf) != 0) {
231 RET_ERR(PM_ERR_DB_OPEN, 1);
233 est_count = estimate_package_count(&buf, archive);
235 /* initialize hash at 66% full */
236 db->pkgcache = _alpm_pkghash_create(est_count * 3 / 2);
238 while(archive_read_next_header(archive, &entry) == ARCHIVE_OK) {
239 const struct stat *st;
241 st = archive_entry_stat(entry);
243 if(S_ISDIR(st->st_mode)) {
244 const char *name;
246 pkg = _alpm_pkg_new();
247 if(pkg == NULL) {
248 archive_read_finish(archive);
249 RET_ERR(PM_ERR_MEMORY, -1);
252 name = archive_entry_pathname(entry);
254 if(_alpm_splitname(name, pkg) != 0) {
255 _alpm_log(PM_LOG_ERROR, _("invalid name for database entry '%s'\n"),
256 name);
257 _alpm_pkg_free(pkg);
258 continue;
261 /* duplicated database entries are not allowed */
262 if(_alpm_pkghash_find(db->pkgcache, pkg->name)) {
263 _alpm_log(PM_LOG_ERROR, _("duplicated database entry '%s'\n"), pkg->name);
264 _alpm_pkg_free(pkg);
265 continue;
268 pkg->origin = PKG_FROM_SYNCDB;
269 pkg->ops = &default_pkg_ops;
270 pkg->origin_data.db = db;
272 /* add to the collection */
273 _alpm_log(PM_LOG_FUNCTION, "adding '%s' to package cache for db '%s'\n",
274 pkg->name, db->treename);
275 db->pkgcache = _alpm_pkghash_add(db->pkgcache, pkg);
276 count++;
277 } else {
278 /* we have desc, depends or deltas - parse it */
279 sync_db_read(db, archive, entry, pkg);
283 if(count > 0) {
284 db->pkgcache->list = alpm_list_msort(db->pkgcache->list, (size_t)count, _alpm_pkg_cmp);
286 archive_read_finish(archive);
288 return(count);
291 #define READ_NEXT(s) do { \
292 if(_alpm_archive_fgets(archive, &buf) != ARCHIVE_OK) goto error; \
293 s = _alpm_strtrim(buf.line); \
294 } while(0)
296 #define READ_AND_STORE(f) do { \
297 READ_NEXT(line); \
298 STRDUP(f, line, goto error); \
299 } while(0)
301 #define READ_AND_STORE_ALL(f) do { \
302 char *linedup; \
303 READ_NEXT(line); \
304 if(strlen(line) == 0) break; \
305 STRDUP(linedup, line, goto error); \
306 f = alpm_list_add(f, linedup); \
307 } while(1) /* note the while(1) and not (0) */
309 static int sync_db_read(pmdb_t *db, struct archive *archive,
310 struct archive_entry *entry, pmpkg_t *likely_pkg)
312 const char *entryname = NULL, *filename;
313 char *pkgname, *p, *q;
314 pmpkg_t *pkg;
315 struct archive_read_buffer buf;
317 ALPM_LOG_FUNC;
319 if(db == NULL) {
320 RET_ERR(PM_ERR_DB_NULL, -1);
323 if(entry != NULL) {
324 entryname = archive_entry_pathname(entry);
326 if(entryname == NULL) {
327 _alpm_log(PM_LOG_DEBUG, "invalid archive entry provided to _alpm_sync_db_read, skipping\n");
328 return(-1);
331 _alpm_log(PM_LOG_FUNCTION, "loading package data from archive entry %s\n",
332 entryname);
334 memset(&buf, 0, sizeof(buf));
335 /* 512K for a line length seems reasonable */
336 buf.max_line_size = 512 * 1024;
338 /* get package and db file names */
339 STRDUP(pkgname, entryname, RET_ERR(PM_ERR_MEMORY, -1));
340 p = pkgname + strlen(pkgname);
341 for(q = --p; *q && *q != '/'; q--);
342 filename = q + 1;
343 for(p = --q; *p && *p != '-'; p--);
344 for(q = --p; *q && *q != '-'; q--);
345 *q = '\0';
347 /* package is already in db due to parsing of directory name */
348 if(likely_pkg && strcmp(likely_pkg->name, pkgname) == 0) {
349 pkg = likely_pkg;
350 } else {
351 pkg = _alpm_pkghash_find(db->pkgcache, pkgname);
353 if(pkg == NULL) {
354 _alpm_log(PM_LOG_DEBUG, "package %s not found in %s sync database",
355 pkgname, db->treename);
356 return(-1);
359 if(strcmp(filename, "desc") == 0 || strcmp(filename, "depends") == 0
360 || strcmp(filename, "deltas") == 0) {
361 while(_alpm_archive_fgets(archive, &buf) == ARCHIVE_OK) {
362 char *line = _alpm_strtrim(buf.line);
364 if(strcmp(line, "%NAME%") == 0) {
365 READ_NEXT(line);
366 if(strcmp(line, pkg->name) != 0) {
367 _alpm_log(PM_LOG_ERROR, _("%s database is inconsistent: name "
368 "mismatch on package %s\n"), db->treename, pkg->name);
370 } else if(strcmp(line, "%VERSION%") == 0) {
371 READ_NEXT(line);
372 if(strcmp(line, pkg->version) != 0) {
373 _alpm_log(PM_LOG_ERROR, _("%s database is inconsistent: version "
374 "mismatch on package %s\n"), db->treename, pkg->name);
376 } else if(strcmp(line, "%FILENAME%") == 0) {
377 READ_AND_STORE(pkg->filename);
378 } else if(strcmp(line, "%DESC%") == 0) {
379 READ_AND_STORE(pkg->desc);
380 } else if(strcmp(line, "%GROUPS%") == 0) {
381 READ_AND_STORE_ALL(pkg->groups);
382 } else if(strcmp(line, "%URL%") == 0) {
383 READ_AND_STORE(pkg->url);
384 } else if(strcmp(line, "%LICENSE%") == 0) {
385 READ_AND_STORE_ALL(pkg->licenses);
386 } else if(strcmp(line, "%ARCH%") == 0) {
387 READ_AND_STORE(pkg->arch);
388 } else if(strcmp(line, "%BUILDDATE%") == 0) {
389 READ_NEXT(line);
390 pkg->builddate = _alpm_parsedate(line);
391 } else if(strcmp(line, "%PACKAGER%") == 0) {
392 READ_AND_STORE(pkg->packager);
393 } else if(strcmp(line, "%CSIZE%") == 0) {
394 /* Note: the CSIZE and SIZE fields both share the "size" field in the
395 * pkginfo_t struct. This can be done b/c CSIZE is currently only used
396 * in sync databases, and SIZE is only used in local databases.
398 READ_NEXT(line);
399 pkg->size = atol(line);
400 /* also store this value to isize if isize is unset */
401 if(pkg->isize == 0) {
402 pkg->isize = pkg->size;
404 } else if(strcmp(line, "%ISIZE%") == 0) {
405 READ_NEXT(line);
406 pkg->isize = atol(line);
407 } else if(strcmp(line, "%MD5SUM%") == 0) {
408 READ_AND_STORE(pkg->md5sum);
409 } else if(strcmp(line, "%REPLACES%") == 0) {
410 READ_AND_STORE_ALL(pkg->replaces);
411 } else if(strcmp(line, "%DEPENDS%") == 0) {
412 /* Different than the rest because of the _alpm_splitdep call. */
413 while(1) {
414 READ_NEXT(line);
415 if(strlen(line) == 0) break;
416 pkg->depends = alpm_list_add(pkg->depends, _alpm_splitdep(line));
418 } else if(strcmp(line, "%OPTDEPENDS%") == 0) {
419 READ_AND_STORE_ALL(pkg->optdepends);
420 } else if(strcmp(line, "%CONFLICTS%") == 0) {
421 READ_AND_STORE_ALL(pkg->conflicts);
422 } else if(strcmp(line, "%PROVIDES%") == 0) {
423 READ_AND_STORE_ALL(pkg->provides);
424 } else if(strcmp(line, "%DELTAS%") == 0) {
425 READ_AND_STORE_ALL(pkg->deltas);
428 } else {
429 /* unknown database file */
430 _alpm_log(PM_LOG_DEBUG, "unknown database file: %s", filename);
433 error:
434 FREE(pkgname);
435 /* TODO: return 0 always? */
436 return(0);
439 struct db_operations sync_db_ops = {
440 .populate = sync_db_populate,
441 .unregister = _alpm_db_unregister,
444 pmdb_t *_alpm_db_register_sync(const char *treename)
446 pmdb_t *db;
447 alpm_list_t *i;
449 ALPM_LOG_FUNC;
451 for(i = handle->dbs_sync; i; i = i->next) {
452 pmdb_t *sdb = i->data;
453 if(strcmp(treename, sdb->treename) == 0) {
454 _alpm_log(PM_LOG_DEBUG, "attempt to re-register the '%s' database, using existing\n", sdb->treename);
455 return sdb;
459 _alpm_log(PM_LOG_DEBUG, "registering sync database '%s'\n", treename);
461 db = _alpm_db_new(treename, 0);
462 db->ops = &sync_db_ops;
463 if(db == NULL) {
464 RET_ERR(PM_ERR_DB_CREATE, NULL);
467 handle->dbs_sync = alpm_list_add(handle->dbs_sync, db);
468 return(db);
472 /* vim: set ts=2 sw=2 noet: */