1 /* logjam - a GTK client for LiveJournal.
2 * Copyright (C) 2000-2005 Evan Martin <evan@livejournal.com>
4 * vim: tabstop=4 shiftwidth=4 noexpandtab :
11 #include <sys/types.h>
18 #include <netinet/in.h>
23 #include <libxml/parser.h>
25 #include "journalstore.h"
31 #define JOURNAL_STORE_INDEX_VERSION 3
32 #define JOURNAL_STORE_XML_VERSION 2
34 struct _JournalStore
{
38 /* the index is a flat array of itemid->time_t.
39 * it's small enough to scan through when
40 * we need to do a reverse lookup. */
43 /* has the file format changed in such a way that we need to resync? */
46 /* we cache the "current" xml doc around in memory,
47 * because we usually want to grab multiple entries from
49 xmlDocPtr xml_doc
; int xml_year
, xml_mon
; gboolean xml_dirty
;
53 journal_store_get_account(JournalStore
*js
) {
57 #define index_at(idx, i) g_array_index(idx, time_t, i)
58 #define index_get(idx, i) ntohl(index_at(idx, i))
59 #define index_set(idx, i, val) index_at(idx, i) = htonl(val)
62 index_load(JournalStore
*js
, GError
**err
) {
70 path
= g_build_filename(js
->path
, "index", NULL
);
72 if (stat(path
, &statbuf
) < 0 && errno
== ENOENT
) {
77 f
= fopen(path
, "rb");
80 g_print("XXX index fopen: %s\n", g_strerror(errno
));
84 itemcount
= statbuf
.st_size
/ sizeof(time_t);
85 g_array_set_size(js
->index
, itemcount
);
86 ret
= (int)fread(js
->index
->data
, sizeof(time_t), itemcount
, f
);
87 if (ret
< itemcount
) {
88 g_print("XXX index fread read too little\n");
92 ver
= index_get(js
->index
, 0);
93 if ((ver
& 0xFF000000) != 0) {
94 /* byte order is messed up in older versions. fix it here. yuck. */
96 for (i
= 0; i
< itemcount
; i
++) {
97 guint32 v
= index_at(js
->index
, i
);
98 index_at(js
->index
, i
) =
99 ((v
& 0x000000FF) << 24) |
100 ((v
& 0x0000FF00) << 8) |
101 ((v
& 0x00FF0000) >> 8) |
102 ((v
& 0xFF000000) >> 24);
104 ver
= index_get(js
->index
, 0);
107 if (ver
< JOURNAL_STORE_INDEX_VERSION
) {
108 /* file format somehow changed. clear the index. */
109 g_array_set_size(js
->index
, 1);
110 index_set(js
->index
, 0, JOURNAL_STORE_INDEX_VERSION
);
118 index_write(const char *storepath
, GArray
*idx
, GError
**err
) {
123 path
= g_build_filename(storepath
, "index", NULL
);
125 f
= fopen(path
, "wb");
128 g_set_error(err
, 0, 0, _("Error opening index: %s"),
133 wrote
= fwrite(idx
->data
, sizeof(time_t), idx
->len
, f
);
134 if (wrote
< idx
->len
) {
135 g_set_error(err
, 0, 0, _("Error writing index: %s"),
145 index_save(JournalStore
*js
, GError
**err
) {
146 return index_write(js
->path
, js
->index
, err
);
150 time_to_docidx(const time_t *entrytime
, int *year
, int *mon
, int *day
) {
153 lt
= gmtime(entrytime
);
154 *year
= lt
->tm_year
+1900;
158 *year
= *mon
= *day
= 0;
162 docidx_to_str(char *base
, int year
, int mon
) {
163 return g_strdup_printf("%s/%d/%02d.xml", base
, year
, mon
);
167 delete_unused_whitespace_r(xmlNodePtr node
) {
170 /* whitespace is significant within many nodes, like the event,
171 * but we only get isolated pure-whitespace nodes when whitespace
172 * is used alongside nodes. all journal content is escaped, so
173 * all nodes that contain both whitespace and nodes should be
176 for (node
= node
->xmlChildrenNode
; node
; node
= next
) {
178 if (xmlIsBlankNode(node
)) {
182 delete_unused_whitespace_r(node
);
188 delete_unused_whitespace(xmlDocPtr doc
) {
189 delete_unused_whitespace_r(xmlDocGetRootElement(doc
));
193 make_new_doc(int year
, int mon
) {
196 jam_xmlNewDoc(&doc
, &node
, "entrymonth");
197 jam_xmlSetIntProp(node
, "version", JOURNAL_STORE_XML_VERSION
);
198 jam_xmlSetIntProp(node
, "year", year
);
199 jam_xmlSetIntProp(node
, "month", mon
);
204 switch_xml_file(JournalStore
*js
, int year
, int mon
, GError
**err
) {
207 xmlDocPtr doc
= NULL
;
209 /* are we already there? */
210 if (year
== js
->xml_year
&& mon
== js
->xml_mon
) {
214 /* otherwise, switch to this file.
215 * XXX protective locking would be good. */
216 /* first write out the old file, if we have one. */
217 if (js
->xml_year
&& js
->xml_dirty
) {
219 path
= docidx_to_str(js
->path
, js
->xml_year
, js
->xml_mon
);
220 if (!verify_path(path
, FALSE
, err
)) {
224 /* write to a temp file and then rename,
225 * to avoid losing data if we die mid-write. */
226 tmppath
= g_strconcat(path
, ".tmp", NULL
);
227 if (xmlSaveFormatFile(tmppath
, js
->xml_doc
, TRUE
) < 0) {
228 g_set_error(err
, 0, 0, _("Error writing journal xml file to %s: %s"),
229 tmppath
, g_strerror(errno
));
230 g_free(tmppath
); g_free(path
);
233 if (rename(tmppath
, path
) < 0) {
234 g_set_error(err
, 0, 0, _("Error renaming journal xml file %s to %s: %s"),
235 tmppath
, path
, g_strerror(errno
));
236 g_free(tmppath
); g_free(path
);
243 /* then switch to the new file, if we have one. */
245 path
= docidx_to_str(js
->path
, year
, mon
);
246 if (stat(path
, &statbuf
) < 0 && errno
== ENOENT
) {
247 doc
= make_new_doc(year
, mon
);
250 doc
= xmlParseFile(path
);
252 if (!jam_xmlGetIntProp(xmlDocGetRootElement(doc
), "version", &ver
))
255 if (ver
< JOURNAL_STORE_XML_VERSION
) {
256 /* out of date document. */
258 doc
= make_new_doc(year
, mon
);
260 /* if there is any whitespace in nodes where we don't
261 * care about whitespace, libxml thinks that the whitespace
262 * was important and won't reformat it correctly.
263 * so we need to delete all the whitespace on load. */
264 delete_unused_whitespace(doc
);
267 g_set_error(err
, 0, 0,
268 _("Error parsing journal XML file %s"), path
);
275 js
->xml_dirty
= FALSE
;
285 switch_xml_file_from_time(JournalStore
*js
, time_t *entrytime
, GError
**err
) {
287 time_to_docidx(entrytime
, &year
, &mon
, &day
);
288 return switch_xml_file(js
, year
, mon
, err
);
292 journal_store_lookup_entry_time(JournalStore
*js
, int itemid
) {
293 if (itemid
>= (int)js
->index
->len
)
295 return index_get(js
->index
, itemid
);
299 make_day_node(xmlDocPtr doc
, int day
) {
301 newnode
= xmlNewDocNode(doc
, NULL
, BAD_CAST
"day", NULL
);
302 jam_xmlSetIntProp(newnode
, "number", day
);
307 find_day(xmlDocPtr doc
, int day
, gboolean create
) {
308 xmlNodePtr root
, node
, newnode
;
312 root
= xmlDocGetRootElement(doc
);
314 for (node
= root
->xmlChildrenNode
; node
; node
= node
->next
) {
315 if ((eday
= xmlGetProp(node
, BAD_CAST
"number")) != NULL
) {
316 fday
= atoi((char*)eday
);
319 return node
; /* found it. */
320 } else if (fday
> day
) {
321 /* we didn't find it, but we know where to insert it. */
323 newnode
= make_day_node(doc
, day
);
324 xmlAddPrevSibling(node
, newnode
);
332 /* we're either the first day inserted or last day for the month. */
333 newnode
= make_day_node(doc
, day
);
334 xmlAddChild(root
, newnode
);
341 find_entry(xmlNodePtr day
, int itemid
) {
344 for (node
= day
->xmlChildrenNode
; node
; node
= node
->next
) {
345 if ((eitemid
= xmlGetProp(node
, BAD_CAST
"itemid")) != NULL
) {
346 if (atoi((char*)eitemid
) == itemid
) {
357 remove_old(JournalStore
*js
, int itemid
, GError
**err
) {
362 xmlDocPtr doc
= NULL
;
363 xmlNodePtr nday
, node
;
365 entrytime
= journal_store_lookup_entry_time(js
, itemid
);
367 return TRUE
; /* this entry isn't in the index. */
369 /* are we already there? */
370 time_to_docidx(&entrytime
, &year
, &mon
, &day
);
371 if (year
== js
->xml_year
&& mon
== js
->xml_mon
) {
372 /* remove this from the in-memory doc. */
375 path
= docidx_to_str(js
->path
, year
, mon
);
376 if (!stat(path
, &statbuf
) && errno
== ENOENT
) {
377 /* no document means there's nothing to delete. */
381 doc
= xmlParseFile(path
);
384 nday
= find_day(doc
, day
, FALSE
);
386 /* find the entry node and remove it. */
387 node
= find_entry(nday
, itemid
);
393 /* and delete day if it's empty. */
394 if (nday
->xmlChildrenNode
== NULL
) {
400 /* if we deleted from somewhere other than the current file,
401 * we want to save it out immediately. */
403 xmlSaveFormatFile(path
, doc
, TRUE
);
407 js
->xml_dirty
= TRUE
;
414 journal_store_put(JournalStore
*js
, LJEntry
*entry
, GError
**err
) {
416 xmlNodePtr node
, newnode
;
418 entrytime
= lj_timegm(&entry
->time
);
420 if (!switch_xml_file_from_time(js
, &entrytime
, err
))
423 if (!remove_old(js
, entry
->itemid
, err
))
426 /* write main xml. */
427 node
= find_day(js
->xml_doc
, entry
->time
.tm_mday
, TRUE
);
428 newnode
= lj_entry_to_xml_node(entry
, js
->xml_doc
);
430 xmlAddChild(node
, newnode
);
431 js
->xml_dirty
= TRUE
;
434 if (entry
->itemid
+1 > (int)js
->index
->len
)
435 g_array_set_size(js
->index
, entry
->itemid
+1);
436 index_set(js
->index
, entry
->itemid
, entrytime
);
442 journal_store_put_group(JournalStore
*js
, LJEntry
**entries
, int c
, GError
**err
) {
445 for (i
= 0; i
< c
; i
++) {
446 if (!journal_store_put(js
, entries
[i
], err
))
450 if (!switch_xml_file_from_time(js
, NULL
, err
))
452 if (!index_save(js
, err
))
459 journal_store_free(JournalStore
*js
) {
461 index_save(js
, NULL
);
462 g_array_free(js
->index
, TRUE
);
469 journal_store_get_month_entries(JournalStore
*js
, int year
, int mon
) {
476 if (!switch_xml_file(js
, year
, mon
, &err
)) {
477 g_warning("journalstore couldn't switch files: %s\n", err
->message
);
481 nday
= xmlDocGetRootElement(js
->xml_doc
)->xmlChildrenNode
;
482 for (; nday
; nday
= nday
->next
) {
483 if (jam_xmlGetIntProp(nday
, "number", &day
))
490 call_summarycb(JournalStore
*js
, xmlNodePtr nentry
,
491 JournalStoreSummaryCallback cb_func
, gpointer cb_data
) {
494 xmlChar
*event
= NULL
;
495 xmlChar
*subject
= NULL
;
499 LJSecurity sec
= {0};
501 for (nchild
= nentry
->xmlChildrenNode
; nchild
; nchild
= nchild
->next
) {
502 if (xmlStrcmp(nchild
->name
, BAD_CAST
"event") == 0) {
503 event
= xmlNodeListGetString(js
->xml_doc
,
504 nchild
->xmlChildrenNode
, TRUE
);
505 sitemid
= xmlGetProp(nentry
, BAD_CAST
"itemid");
508 itemid
= atoi((char*)sitemid
);
511 } else if (xmlStrcmp(nchild
->name
, BAD_CAST
"time") == 0) {
512 lj_ljdate_to_tm((const char*)XML_GET_CONTENT(nchild
->xmlChildrenNode
), &etm
);
513 } else if (xmlStrcmp(nchild
->name
, BAD_CAST
"subject") == 0) {
514 subject
= xmlNodeListGetString(js
->xml_doc
,
515 nchild
->xmlChildrenNode
, TRUE
);
516 } else if (xmlStrcmp(nchild
->name
, BAD_CAST
"security") == 0) {
517 xmlChar
*type
= NULL
, *mask
= NULL
;
518 type
= xmlGetProp(nchild
, BAD_CAST
"type");
519 mask
= xmlGetProp(nchild
, BAD_CAST
"mask");
520 lj_security_from_strings(&sec
, (char*)type
, (char*)mask
);
521 if (type
) xmlFree(type
);
522 if (mask
) xmlFree(mask
);
526 summary
= lj_get_summary((char*)subject
, (char*)event
);
527 cb_func(itemid
, lj_timegm(&etm
), summary
, &sec
, cb_data
);
536 journal_store_get_day_entries(JournalStore
*js
, int year
, int mon
, int day
,
537 JournalStoreSummaryCallback cb_func
, gpointer cb_data
) {
538 xmlNodePtr nday
, nentry
;
540 switch_xml_file(js
, year
, mon
, NULL
);
541 nday
= find_day(js
->xml_doc
, day
, FALSE
);
543 if (!nday
) /* no entries today. */
546 for (nentry
= nday
->xmlChildrenNode
; nentry
; nentry
= nentry
->next
)
547 call_summarycb(js
, nentry
, cb_func
, cb_data
);
553 JournalStoreScanCallback scan_cb
;
555 JournalStoreSummaryCallback summary_cb
;
556 gpointer summary_data
;
561 match_node(JournalStore
*js
, xmlNodePtr node
, const Scan
*scan
) {
565 content
= xmlNodeListGetString(js
->xml_doc
, node
->xmlChildrenNode
, TRUE
);
566 found
= scan
->scan_cb((const char*)content
, scan
->scan_data
);
572 match_entry(JournalStore
*js
, xmlNodePtr nentry
, const Scan
*scan
) {
576 nchild
= nentry
->xmlChildrenNode
;
578 for ( ; nchild
; nchild
= nchild
->next
) {
579 if (xmlStrcmp(nchild
->name
, BAD_CAST
"event") == 0)
580 matched
= matched
|| match_node(js
, nchild
, scan
);
581 else if (xmlStrcmp(nchild
->name
, BAD_CAST
"subject") == 0)
582 matched
= matched
|| match_node(js
, nchild
, scan
);
588 scan_month(JournalStore
*js
, int year
, int month
, Scan
*scan
) {
589 xmlNodePtr nday
, nentry
;
590 if (!switch_xml_file(js
, year
, month
, NULL
))
593 nday
= xmlDocGetRootElement(js
->xml_doc
)->xmlChildrenNode
;
594 for (; nday
; nday
= nday
->next
) {
595 for (nentry
= nday
->xmlChildrenNode
; nentry
; nentry
= nentry
->next
) {
596 if (match_entry(js
, nentry
, scan
)) {
597 call_summarycb(js
, nentry
,
598 scan
->summary_cb
, scan
->summary_data
);
599 if (++scan
->matchcount
== MAX_MATCHES
)
608 journal_store_scan(JournalStore
*js
,
609 JournalStoreScanCallback scan_cb
, gpointer scan_data
,
610 JournalStoreSummaryCallback cb_func
, gpointer cb_data
) {
611 GDir
*journaldir
, *yeardir
;
612 const char *yearname
;
621 journaldir
= g_dir_open(js
->path
, 0, NULL
);
625 yearname
= g_dir_read_name(journaldir
);
626 while (yearname
&& scan
.matchcount
< MAX_MATCHES
) {
627 yearpath
= g_build_filename(js
->path
, yearname
, NULL
);
628 yeardir
= g_dir_open(yearpath
, 0, NULL
);
632 year
= atoi(yearname
);
633 for (month
= 1; month
<= 12; month
++) {
634 if (!scan_month(js
, year
, month
, &scan
))
637 g_dir_close(yeardir
);
639 yearname
= g_dir_read_name(journaldir
);
641 g_dir_close(journaldir
);
648 journal_store_find_relative_by_time(JournalStore
*js
, time_t when
,
649 int *ritemid
, int dir
, GError
*err
) {
656 /* XXX need to handle items with same date. */
658 for (i
= js
->index
->len
-1; i
>= 1; i
--) {
659 if (index_get(js
->index
, i
) == when
)
660 continue; /* skip self */
661 candidate
= journal_store_lookup_entry_time(js
, i
);
662 if (candidate
< when
&& candidate
> ftime
) {
667 } else if (dir
> 0) {
668 for (i
= 1; i
< (int)js
->index
->len
; i
++) {
669 if (index_get(js
->index
, i
) == when
)
670 continue; /* skip self */
671 candidate
= journal_store_lookup_entry_time(js
, i
);
672 if (candidate
> when
) {
673 if (!ftime
|| (ftime
&& candidate
< ftime
)) {
689 journal_store_get_entry(JournalStore
*js
, int itemid
) {
691 xmlNodePtr nday
, nentry
;
695 entrytime
= journal_store_lookup_entry_time(js
, itemid
);
696 if (!switch_xml_file_from_time(js
, &entrytime
, &err
)) {
697 g_warning("journalstore couldn't switch files: %s\n", err
->message
);
702 etm
= gmtime(&entrytime
);
703 nday
= find_day(js
->xml_doc
, etm
->tm_mday
, FALSE
);
705 nentry
= find_entry(nday
, itemid
);
707 return lj_entry_new_from_xml_node(js
->xml_doc
, nentry
);
713 journal_store_get_latest_id(JournalStore
*js
) {
715 for (itemid
= js
->index
->len
-1; itemid
; itemid
--)
716 if (journal_store_lookup_entry_time(js
, itemid
))
718 return 0; /* no non-deleted messages in store */
722 journal_store_get_count(JournalStore
*js
) {
723 int itemid
, count
= 0;
724 for (itemid
= 0; itemid
< (int)js
->index
->len
; itemid
++)
725 if (index_at(js
->index
, itemid
) != 0)
731 journal_store_get_invalid(JournalStore
*js
) {
736 journal_store_make_path(JamAccount
*acc
) {
737 return conf_make_account_path(acc
, "journal");
741 journal_store_open(JamAccount
*acc
, gboolean create
, GError
**err
) {
745 js
= g_new0(JournalStore
, 1);
747 js
->path
= journal_store_make_path(acc
);
748 js
->index
= g_array_new(FALSE
, TRUE
, sizeof(time_t));
750 if (!create
&& !g_file_test(js
->path
, G_FILE_TEST_EXISTS
)) {
751 g_set_error(err
, 0, 0, _("No offline copy of this journal."));
755 /* need at least one slot for the version.
756 * if the on-disk index is an older version,
757 * loading the index will overwrite this version anyway. */
758 g_array_set_size(js
->index
, 1);
759 index_set(js
->index
, 0, JOURNAL_STORE_INDEX_VERSION
);
761 if (!verify_path(js
->path
, TRUE
, err
))
764 if (!index_load(js
, err
))
770 journal_store_free(js
);
775 reindex_month(char *storepath
, int year
, int mon
, GArray
*idx
) {
778 xmlNodePtr nday
, nentry
, nchild
;
781 xmlpath
= docidx_to_str(storepath
, year
, mon
);
782 doc
= xmlParseFile(xmlpath
);
786 nday
= xmlDocGetRootElement(doc
)->xmlChildrenNode
;
787 for (; nday
; nday
= nday
->next
) {
788 for (nentry
= nday
->xmlChildrenNode
; nentry
; nentry
= nentry
->next
) {
792 if ((sitemid
= xmlGetProp(nentry
, BAD_CAST
"itemid")) != NULL
) {
793 itemid
= atoi((char*)sitemid
);
796 for (nchild
= nentry
->xmlChildrenNode
; nchild
; nchild
= nchild
->next
) {
797 if (xmlStrcmp(nchild
->name
, BAD_CAST
"time") == 0) {
798 lj_ljdate_to_tm((const char*)XML_GET_CONTENT(nchild
->xmlChildrenNode
),
801 if (itemid
+1 > (int)idx
->len
)
802 g_array_set_size(idx
, itemid
+1);
803 index_set(idx
, itemid
, lj_timegm(&etm
));
814 journal_store_reindex(JamAccount
*acc
, GError
**err
) {
816 GDir
*journaldir
, *yeardir
;
817 const char *yearname
, *monthname
;
823 storepath
= journal_store_make_path(acc
);
825 /* ick, duplication of the store code. */
826 journaldir
= g_dir_open(storepath
, 0, NULL
);
830 index
= g_array_new(FALSE
, TRUE
, sizeof(time_t));
831 g_array_set_size(index
, 1);
832 index_set(index
, 0, JOURNAL_STORE_INDEX_VERSION
);
834 yearname
= g_dir_read_name(journaldir
);
836 yearpath
= g_build_filename(storepath
, yearname
, NULL
);
837 yeardir
= g_dir_open(yearpath
, 0, NULL
);
841 year
= atoi(yearname
);
843 monthname
= g_dir_read_name(yeardir
);
845 month
= atoi(monthname
);
846 reindex_month(storepath
, year
, month
, index
);
847 monthname
= g_dir_read_name(yeardir
);
849 g_dir_close(yeardir
);
851 yearname
= g_dir_read_name(journaldir
);
853 g_dir_close(journaldir
);
855 ret
= index_write(storepath
, index
, err
);
857 g_array_free(index
, TRUE
);