translit: now from koi8-u
[k8muffin.git] / src / tagload.c
blob4d0bcf6544e1ef86b5423731ebddbea891818946
1 ////////////////////////////////////////////////////////////////////////////////
2 static int opt_tagload_koi8 = 1;
3 static int opt_tagload_dostat = 0;
6 static char *str_transliterate (const char *str) {
7 iconv_t cd;
8 size_t il, ol, ool;
9 char *outs, *ibuf, *obuf, *res;
10 int asis = 1;
11 if (str == NULL) return strdup("");
12 if (str == NULL || !str[0]) { outs = strdup(str); goto done; }
13 for (const unsigned char *u = (const unsigned char *)str; *u; ++u) if (*u >= 128) { asis = 0; break; }
14 if (asis) { outs = strdup(str); goto done; }
15 //cd = iconv_open("ascii//translit//ignore", "utf-8");
16 cd = iconv_open("cp866//translit//ignore", "utf-8");
17 if (cd == (iconv_t)-1) return NULL;
18 outs = calloc(1, strlen(str)*6+4);
19 if (outs == NULL) {
20 iconv_close(cd);
21 return NULL;
23 ibuf = (char *)str;
24 obuf = outs;
25 il = strlen(str);
26 ool = ol = il*4;
27 il = iconv(cd, &ibuf, &il, &obuf, &ol);
28 iconv_close(cd);
29 if (il == (size_t)-1) {
30 free(outs);
31 return NULL;
33 res = calloc(ool-ol+1, 1);
34 if (ool-ol > 0) memcpy(res, outs, ool-ol);
35 free(outs);
36 outs = translitstr(res);
37 free(res);
38 done:
39 for (char *s = outs; *s; ++s) {
40 *s = tolower(*s);
41 if (!isalnum(*s)) *s = '_';
43 while (*outs && *outs == '_') memmove(outs, outs+1, strlen(outs));
44 if (outs[0]) {
45 obuf = outs+1;
46 for (char *s = outs+1; *s; ++s) {
47 if (*s == '_') {
48 if (obuf[-1] != '_') *obuf++ = *s;
49 } else {
50 *obuf++ = *s;
53 while (obuf > outs && obuf[-1] == '_') --obuf;
54 *obuf = 0;
56 return outs;
60 ////////////////////////////////////////////////////////////////////////////////
61 static char *str_tokoi (const char *str) {
62 if (opt_tagload_koi8) {
63 iconv_t cd;
64 size_t il, ol, ool;
65 char *outs, *ibuf, *obuf, *res;
66 int asis = 1;
67 if (str == NULL) return strdup("");
68 if (str == NULL || !str[0]) { res = strdup(str); goto done; }
69 for (const unsigned char *u = (const unsigned char *)str; *u; ++u) if (*u >= 128) { asis = 0; break; }
70 if (asis) { res = strdup(str); goto done; }
71 cd = iconv_open("koi8-u//translit//ignore", "utf-8");
72 if (cd == (iconv_t)-1) return NULL;
73 outs = calloc(1, strlen(str)*6+4);
74 if (outs == NULL) {
75 iconv_close(cd);
76 return NULL;
78 ibuf = (char *)str;
79 obuf = outs;
80 il = strlen(str);
81 ool = ol = il*4;
82 il = iconv(cd, &ibuf, &il, &obuf, &ol);
83 iconv_close(cd);
84 if (il == (size_t)-1) {
85 free(outs);
86 return NULL;
88 res = calloc(ool-ol+1, 1);
89 if (ool-ol > 0) memcpy(res, outs, ool-ol);
90 free(outs);
91 done:
92 return res;
94 if (str == NULL) return strdup("");
95 return strdup(str);
99 ////////////////////////////////////////////////////////////////////////////////
100 typedef struct {
101 char *filename;
102 uint64_t inode;
103 uint64_t size;
104 uint64_t mtime;
105 char *artist; // can't be NULL
106 char *album; // can't be NULL
107 char *title; // can't be NULL
108 char *genre; // can be NULL
109 uint16_t year; // can be 0
110 uint8_t track; // can be 0
111 } tagfile_item_t;
114 ////////////////////////////////////////////////////////////////////////////////
115 static inline char *dupstrn (const void *ptr, int size) {
116 char *res = calloc(1, size+1);
117 if (size > 0) memcpy(res, ptr, size);
118 res[size] = 0;
119 return res;
123 static inline void trimstr (char *s) {
124 char *ns;
125 for (ns = s; *ns && isspace(*ns); ++ns) ;
126 if (ns != s) memmove(s, ns, strlen(ns)+1);
127 ns = s+strlen(s)-1;
128 while (ns >= s && isspace(*ns)) --ns;
129 ns[1] = 0;
130 for (; *s; ++s) if (*s == '/') *s = '_';
134 #define GETFIELDS(tagname,fldname) do { \
135 if (strcmp(nm, tagname) == 0) { \
136 if (fi->fldname != NULL) free(fi->fldname); \
137 fi->fldname = dupstrn(p, sz); \
138 if (fi->fldname != NULL && strcmp(tagname, "filename") != 0) trimstr(fi->fldname); \
139 if (!fi->fldname[0]) { free(fi->fldname); fi->fldname = NULL; } \
140 goto goon; \
142 } while (0)
145 #define GETFIELDN(tagname,fldname,nsz) do { \
146 if (strcmp(nm, tagname) == 0) { \
147 if (sz != nsz) goto error; \
148 memcpy(&fi->fldname, p, nsz); \
149 goto goon; \
151 } while (0)
154 // and skip to next one
155 static int tagfile_item_read (FILE *fl, tagfile_item_t *fi) {
156 uint16_t size;
157 static char buf[65536], nm[65536];
158 char *p = buf;
159 memset(fi, 0, sizeof(*fi));
160 if (fread(&size, 2, 1, fl) != 1 || size < 1) return -1;
161 if (fread(buf, size, 1, fl) != 1) return -1;
162 while (size >= 2) {
163 uint16_t sz;
164 // name
165 memcpy(&sz, p, 2);
166 p += 2;
167 size -= 2;
168 if (sz > size) goto error;
169 size -= sz;
170 if (sz > 0) {
171 memcpy(nm, p, sz);
172 p += sz;
174 nm[sz] = 0;
175 // value
176 if (size < 2) goto error;
177 memcpy(&sz, p, 2);
178 p += 2;
179 size -= 2;
180 if (sz > size) goto error;
181 size -= sz;
183 GETFIELDS("filename", filename);
184 GETFIELDS("artist", artist);
185 GETFIELDS("album", album);
186 GETFIELDS("title", title);
187 GETFIELDS("genre", genre);
188 GETFIELDN("inode", inode, 8);
189 GETFIELDN("size", size, 8);
190 GETFIELDN("mtime", mtime, 8);
191 GETFIELDN("year", year, 2);
192 GETFIELDN("track", track, 1);
193 goon:
194 p += sz;
196 if (fi->filename == NULL) goto error;
197 if (size == 0) {
198 if (fi->title == NULL) fi->title = strdup("unknown title");
199 if (fi->album == NULL) fi->album = strdup("unknown album");
200 if (fi->artist == NULL) fi->artist = strdup("unknown artist");
201 if (fi->genre == NULL) fi->genre = strdup("unknown genre");
202 return 0;
204 error:
205 if (fi->genre != NULL) free(fi->genre);
206 if (fi->title != NULL) free(fi->title);
207 if (fi->album != NULL) free(fi->album);
208 if (fi->artist != NULL) free(fi->artist);
209 if (fi->filename != NULL) free(fi->filename);
210 memset(fi, 0, sizeof(*fi));
211 return -1;
215 static void tagfile_item_clear (tagfile_item_t *fi) {
216 if (fi != NULL) {
217 if (fi->genre != NULL) free(fi->genre);
218 if (fi->title != NULL) free(fi->title);
219 if (fi->album != NULL) free(fi->album);
220 if (fi->artist != NULL) free(fi->artist);
221 if (fi->filename != NULL) free(fi->filename);
222 memset(fi, 0, sizeof(*fi));
227 ////////////////////////////////////////////////////////////////////////////////
228 typedef struct file_info_t file_info_t;
231 typedef struct {
232 char *s;
233 // file ids for this tag
234 int idcount;
235 int idallocated;
236 file_info_t **ids;
237 UT_hash_handle hh;
238 } tagvalue_t;
241 typedef struct {
242 tagvalue_t *tv;
243 char *s; // lowercased, must be free()d
244 UT_hash_handle hh;
245 } tagalias_t;
248 // 'transliterated' tags
249 static tagvalue_t *tagv_hash_t = NULL;
250 static tagvalue_t *tagv_artist_hash_t = NULL;
251 static tagvalue_t *tagv_album_hash_t = NULL;
252 static tagvalue_t *tagv_title_hash_t = NULL;
253 static tagvalue_t *tagv_genre_hash_t = NULL;
254 // 'original' tags
255 static tagvalue_t *tagv_hash_o = NULL;
256 static tagvalue_t *tagv_artist_hash_o = NULL;
257 static tagvalue_t *tagv_album_hash_o = NULL;
258 static tagvalue_t *tagv_title_hash_o = NULL;
259 static tagvalue_t *tagv_genre_hash_o = NULL;
260 // year is the same for both
261 static tagvalue_t *tagv_year_hash = NULL;
263 static tagalias_t *tagv_norm_hash = NULL; // 'normalized aliases' hash
266 ////////////////////////////////////////////////////////////////////////////////
267 // note that two years here is just for convience, they are the same
268 struct file_info_t {
269 char *fullname; // full name
270 char *shortname; // should be unique
271 // 'original'
272 tagvalue_t *year_o; // can be NULL
273 tagvalue_t *artist_o; // can't be NULL
274 tagvalue_t *album_o; // can't be NULL
275 tagvalue_t *title_o; // can't be NULL
276 tagvalue_t *genre_o; // can' be NULL
277 // 'normalized'
278 tagvalue_t *year_t; // can be NULL
279 tagvalue_t *artist_t; // can't be NULL
280 tagvalue_t *album_t; // can't be NULL
281 tagvalue_t *title_t; // can't be NULL
282 tagvalue_t *genre_t; // can' be NULL
284 UT_hash_handle hh;
288 typedef struct {
289 file_info_t *fi;
290 char *shortname; // points to fi->shortname
291 UT_hash_handle hh;
292 } file_sname_t;
295 ////////////////////////////////////////////////////////////////////////////////
296 static int file_info_count = 0;
297 static file_info_t *file_name_hash = NULL;
298 static file_sname_t *file_sname_hash = NULL;
301 ////////////////////////////////////////////////////////////////////////////////
302 static void tagvalue_add_id (tagvalue_t *tv, file_info_t *fi) {
303 //if (bsearch(fi, tv->ids, tv->idcount, sizeof(tv->ids[0]), cmp_ptr) != NULL) return;
304 for (int f = 0; f < tv->idcount; ++f) if (tv->ids[f] == fi) return;
305 if (tv->idcount+1 > tv->idallocated) {
306 int newsz = ((tv->idcount+1)|0x1f)+1;
307 file_info_t **n = realloc(tv->ids, sizeof(tv->ids[0])*newsz);
308 if (n == NULL) {
309 fprintf(stderr, "FATAL: out of memory in tagvalue_add_id!\n");
310 abort();
312 tv->idallocated = newsz;
313 tv->ids = n;
315 tv->ids[tv->idcount++] = fi;
316 //qsort(tv->ids, tv->idcount, sizeof(tv->ids[0]), cmp_ptr);
320 static inline void normalize_tag_str (char *d, const char *s) {
321 for (; *s; ++s) {
322 unsigned char ch = (unsigned char)(*s);
323 if (isspace(*s)) continue;
324 if (ch == 127) continue;
325 if (opt_tagload_koi8) {
326 *d = le2lower(*s);
327 if (ch >= 128) {
328 if (*d == '£') *d = 'Å';
329 if (*d == 'Ý') *d = 'Û';
330 if (*d == 'Ó') *d = 'c'; // russian to latin
331 ++d;
332 } else if (isdigit(*d) || (*d >= 'a' && *d <= 'z')) {
333 ++d;
335 } else {
336 *d = *s;
337 if (ch >= 128) {
338 ++d;
339 } else if (isdigit(*d) || (*d >= 'a' && *d <= 'z')) {
340 ++d;
344 *d = 0;
348 static void tagfile_item_regtag (file_info_t *fi, tagvalue_t **tv_o, tagvalue_t **tv_t, const char *value) {
349 tagvalue_t *tv = NULL;
350 char *ss, *s1, *s2;
351 int freess = 1, frees1 = 1;
353 ss = str_tokoi(value);
354 s1 = str_transliterate(value);
355 if (ss == NULL || s1 == NULL) {
356 fprintf(stderr, "FATAL: tagfile_item_regtag iconv error!\n");
357 abort();
359 s2 = alloca(strlen(ss)+strlen(s1)+2);
360 // 'original' tag
361 *tv_o = *tv_t = NULL;
362 if (ss[0]) {
363 HASH_FIND_STR(tagv_hash_o, ss, tv);
364 if (tv == NULL) {
365 tagalias_t *ta;
366 // add this tag
367 normalize_tag_str(s2, ss);
368 HASH_FIND_STR(tagv_norm_hash, s2, ta);
369 if (ta == NULL) {
370 freess = 0;
371 tv = calloc(1, sizeof(*tv));
372 tv->s = ss;
373 HASH_ADD_KEYPTR(hh, tagv_hash_o, tv->s, strlen(tv->s), tv);
375 ta = calloc(1, sizeof(*ta));
376 ta->tv = tv;
377 ta->s = strdup(s2);
378 HASH_ADD_KEYPTR(hh, tagv_norm_hash, ta->s, strlen(ta->s), ta);
379 } else {
380 tv = ta->tv;
383 tagvalue_add_id(tv, fi);
384 *tv_o = tv;
386 // 'transliterated' tag
387 if (s1[0]) {
388 HASH_FIND_STR(tagv_hash_t, s1, tv);
389 if (tv == NULL) {
390 // add this tag
391 frees1 = 0;
392 tv = calloc(1, sizeof(*tv));
393 tv->s = s1;
394 HASH_ADD_KEYPTR(hh, tagv_hash_t, tv->s, strlen(tv->s), tv);
396 tagvalue_add_id(tv, fi);
397 *tv_t = tv;
400 if (frees1) free(s1);
401 if (freess) free(ss);
405 static void tagfile_item_regtag_ex (file_info_t *fi, tagvalue_t *tv_x, tagvalue_t **tagh) {
406 if (tv_x != NULL) {
407 tagvalue_t *tv;
408 HASH_FIND_STR(*tagh, tv_x->s, tv);
409 if (tv == NULL) {
410 // add this tag
411 tv = calloc(1, sizeof(*tv));
412 tv->s = tv_x->s;
413 HASH_ADD_KEYPTR(hh, *tagh, tv->s, strlen(tv->s), tv);
415 tagvalue_add_id(tv, fi);
420 static void tagfile_item_register (const tagfile_item_t *tfi) {
421 file_info_t *fi, *of;
422 char buf[64];
424 HASH_FIND_STR(file_name_hash, tfi->filename, of);
425 if (of != NULL) {
426 dlogf("duplicate file: [%s]\n", tfi->filename);
427 return; // nothing to do, this file already registered
430 fi = calloc(1, sizeof(*fi));
431 fi->fullname = strdup(tfi->filename);
433 HASH_ADD_KEYPTR(hh, file_name_hash, fi->fullname, strlen(fi->fullname), fi);
435 tagfile_item_regtag(fi, &fi->artist_o, &fi->artist_t, tfi->artist);
436 tagfile_item_regtag(fi, &fi->album_o, &fi->album_t, tfi->album);
437 tagfile_item_regtag(fi, &fi->title_o, &fi->title_t, tfi->title);
438 tagfile_item_regtag(fi, &fi->genre_o, &fi->genre_t, tfi->genre);
440 tagfile_item_regtag_ex(fi, fi->artist_o, &tagv_artist_hash_o);
441 tagfile_item_regtag_ex(fi, fi->album_o, &tagv_album_hash_o);
442 tagfile_item_regtag_ex(fi, fi->title_o, &tagv_title_hash_o);
443 tagfile_item_regtag_ex(fi, fi->genre_o, &tagv_genre_hash_o);
445 tagfile_item_regtag_ex(fi, fi->artist_t, &tagv_artist_hash_t);
446 tagfile_item_regtag_ex(fi, fi->album_t, &tagv_album_hash_t);
447 tagfile_item_regtag_ex(fi, fi->title_t, &tagv_title_hash_t);
448 tagfile_item_regtag_ex(fi, fi->genre_t, &tagv_genre_hash_t);
450 fi->year_o = fi->year_t = NULL;
451 if (tfi->year != 0) {
452 sprintf(buf, "%u", tfi->year);
453 tagfile_item_regtag(fi, &fi->year_o, &fi->year_t, buf);
454 tagfile_item_regtag_ex(fi, fi->year_o, &tagv_year_hash);
456 // short name
458 file_sname_t *of;
459 char *t = strrchr(fi->fullname, '/')+1;
460 HASH_FIND_STR(file_sname_hash, t, of);
461 if (of != NULL) {
462 // build unique name
463 //dlogf("000: [%s]", t);
464 char *newn = alloca(strlen(t)+32);
465 char *ext = strrchr(t, '.');
466 if (ext == NULL) {
467 for (int n = 1; n < 999999; ++n) {
468 sprintf(newn, "%s_%d", t, n);
469 HASH_FIND_STR(file_sname_hash, newn, of);
470 if (of == NULL) break;
472 } else {
473 char och = *ext;
474 *ext = 0;
475 for (int n = 1; n < 999999; ++n) {
476 sprintf(newn, "%s_%d.%s", t, n, ext+1);
477 HASH_FIND_STR(file_sname_hash, newn, of);
478 if (of == NULL) break;
480 *ext = och;
482 fi->shortname = strdup(newn);
483 //dlogf("DUPNAME: [%s] -> [%s]\n", t, newn);
484 } else {
485 fi->shortname = strdup(t);
488 of = calloc(1, sizeof(*of));
489 of->fi = fi;
490 of->shortname = fi->shortname;
491 //dlogf("002: [%s]", of->shortname);
492 HASH_ADD_KEYPTR(hh, file_sname_hash, of->shortname, strlen(of->shortname), of);
494 //dlogf("006");
498 ////////////////////////////////////////////////////////////////////////////////
499 static int load_tagfile (const char *fname) {
501 int tyh_sort (const tagvalue_t *t0, const tagvalue_t *t1) {
502 return strcmp(t0->s, t1->s);
505 int sname_sort (const file_sname_t *t0, const file_sname_t *t1) {
506 return strcmp(t0->shortname, t1->shortname);
509 int name_sort (const file_info_t *t0, const file_info_t *t1) {
510 return strcmp(t0->fullname, t1->fullname);
513 tagalias_t *ta, *tatmp;
514 FILE *fl;
515 char sign[4];
516 uint32_t fc;
517 fl = fopen(fname, "r");
518 if (fl == NULL) return -1; // no tags
519 if (fread(sign, 4, 1, fl) != 1) goto error;
520 if (memcmp(sign, "TFI0", 4) != 0) goto error;
521 if (fread(&fc, 4, 1, fl) != 1) goto error;
522 printf("loading %u files...\n", (unsigned int)fc);
523 while (fc-- > 0) {
524 tagfile_item_t tfi;
525 struct stat st;
526 if (tagfile_item_read(fl, &tfi) != 0) goto error;
527 if (opt_tagload_dostat) {
528 if (stat(tfi.filename, &st) != 0) goto fileerror;
529 if (!S_ISREG(st.st_mode)) goto fileerror;
531 //if (st.st_ino != tfi.inode || st.st_mtime != tfi.mtime || st.st_size != tfi.size) goto fileerror; // file was changed, drop and rescan
532 tagfile_item_register(&tfi);
533 fileerror:
534 tagfile_item_clear(&tfi);
535 //if ((fc&0xff) == 0) { fprintf(stdout, "\r%u left (years: %d)\x1b[K", fc, HASH_COUNT(tagv_year_hash)); fflush(stdout); }
537 fclose(fl);
538 file_info_count = HASH_COUNT(file_name_hash);
539 printf("%d files loaded, %d tags registered, %d normal tags registered.\n", file_info_count, HASH_COUNT(tagv_hash_o), HASH_COUNT(tagv_hash_t));
542 HASH_SORT(tagv_year_hash, tyh_sort);
543 HASH_SORT(file_name_hash, name_sort);
544 HASH_SORT(file_sname_hash, sname_sort);
547 for (const tagvalue_t *tv = tagv_year_hash; tv != NULL; tv = tv->hh.next) dlogf("year: [%s] (%d)\n", tv->s, tv->idcount);
549 // clear 'normalized' hash, we don't need it anymore
550 HASH_ITER(hh, tagv_norm_hash, ta, tatmp) {
551 HASH_DEL(tagv_norm_hash, ta);
552 free(ta->s);
553 free(ta);
555 return 0;
556 error:
557 // invalid tag file
558 fclose(fl);
559 return -1;
563 ////////////////////////////////////////////////////////////////////////////////
564 #define ARRAYLEN(arr) (sizeof((arr))/sizeof((arr)[0]))
566 static const char *specdirs[] = {
567 ":artist",
568 ":album",
569 ":title",
570 ":genre",
571 ":year",
572 ":oartist",
573 ":oalbum",
574 ":otitle",
575 ":ogenre",
576 ":oyear",
580 ////////////////////////////////////////////////////////////////////////////////
581 static inline tagvalue_t **special_hash (const char *name) {
582 if (strcmp(name, "artist") == 0) return &tagv_artist_hash_t;
583 if (strcmp(name, "album") == 0) return &tagv_album_hash_t;
584 if (strcmp(name, "title") == 0) return &tagv_title_hash_t;
585 if (strcmp(name, "genre") == 0) return &tagv_genre_hash_t;
586 if (strcmp(name, "year") == 0) return &tagv_year_hash;
587 if (strcmp(name, "oartist") == 0) return &tagv_artist_hash_o;
588 if (strcmp(name, "oalbum") == 0) return &tagv_album_hash_o;
589 if (strcmp(name, "otitle") == 0) return &tagv_title_hash_o;
590 if (strcmp(name, "ogenre") == 0) return &tagv_genre_hash_o;
591 if (strcmp(name, "oyear") == 0) return &tagv_year_hash;
592 return NULL;
596 // offset in file_info_t
597 static inline int special_ofs (const char *name) {
598 if (strcmp(name, "artist") == 0) return __builtin_offsetof(file_info_t, artist_t);
599 if (strcmp(name, "album") == 0) return __builtin_offsetof(file_info_t, album_t);
600 if (strcmp(name, "title") == 0) return __builtin_offsetof(file_info_t, title_t);
601 if (strcmp(name, "genre") == 0) return __builtin_offsetof(file_info_t, genre_t);
602 if (strcmp(name, "year") == 0) return __builtin_offsetof(file_info_t, year_t);
603 if (strcmp(name, "oartist") == 0) return __builtin_offsetof(file_info_t, artist_o);
604 if (strcmp(name, "oalbum") == 0) return __builtin_offsetof(file_info_t, album_o);
605 if (strcmp(name, "otitle") == 0) return __builtin_offsetof(file_info_t, title_o);
606 if (strcmp(name, "ogenre") == 0) return __builtin_offsetof(file_info_t, genre_o);
607 if (strcmp(name, "oyear") == 0) return __builtin_offsetof(file_info_t, year_o);
608 return -1;