1 /* logjam - a GTK client for LiveJournal.
2 * Copyright (C) 2000-2005 Evan Martin <evan@livejournal.com>
11 #include <netinet/in.h>
12 #include <sys/types.h>
15 #include <libxml/parser.h>
19 #include "journalstore.h"
22 #define JOURNAL_STORE_INDEX_VERSION (3)
23 #define JOURNAL_STORE_XML_VERSION (2)
26 struct _JournalStore
{
29 /* the index is a flat array of itemid->time_t.
30 * it's small enough to scan through when
31 * we need to do a reverse lookup. */
33 /* has the file format changed in such a way that we need to resync? */
35 /* we cache the "current" xml doc around in memory,
36 * because we usually want to grab multiple entries from
39 int xml_year
, xml_mon
;
44 JamAccount
*journal_store_get_account (JournalStore
*js
) {
49 #define index_at(idx, i) g_array_index(idx, time_t, i)
50 #define index_get(idx, i) ntohl(index_at(idx, i))
51 #define index_set(idx, i, val) index_at(idx, i) = htonl(val)
54 static gboolean
index_load (JournalStore
*js
, GError
**err
) {
62 path
= g_build_filename(js
->path
, "index", NULL
);
64 if (stat(path
, &statbuf
) < 0 && errno
== ENOENT
) {
69 f
= fopen(path
, "rb");
72 g_print("XXX index fopen: %s\n", g_strerror(errno
));
76 itemcount
= statbuf
.st_size
/ sizeof(time_t);
77 g_array_set_size(js
->index
, itemcount
);
78 ret
= (int)fread(js
->index
->data
, sizeof(time_t), itemcount
, f
);
79 if (ret
< itemcount
) {
80 g_print("XXX index fread read too little\n");
84 ver
= index_get(js
->index
, 0);
85 if ((ver
& 0xFF000000) != 0) {
86 /* byte order is messed up in older versions. fix it here. yuck. */
88 for (i
= 0; i
< itemcount
; i
++) {
89 guint32 v
= index_at(js
->index
, i
);
90 index_at(js
->index
, i
) = ((v
& 0x000000FF) << 24) | ((v
& 0x0000FF00) << 8) | ((v
& 0x00FF0000) >> 8) | ((v
& 0xFF000000) >> 24);
92 ver
= index_get(js
->index
, 0);
95 if (ver
< JOURNAL_STORE_INDEX_VERSION
) {
96 /* file format somehow changed. clear the index. */
97 g_array_set_size(js
->index
, 1);
98 index_set(js
->index
, 0, JOURNAL_STORE_INDEX_VERSION
);
106 static gboolean
index_write (const char *storepath
, GArray
*idx
, GError
**err
) {
111 path
= g_build_filename(storepath
, "index", NULL
);
113 f
= fopen(path
, "wb");
116 g_set_error(err
, 0, 0, _("Error opening index: %s"), g_strerror(errno
));
120 wrote
= fwrite(idx
->data
, sizeof(time_t), idx
->len
, f
);
121 if (wrote
< idx
->len
) {
122 g_set_error(err
, 0, 0, _("Error writing index: %s"), g_strerror(errno
));
132 static gboolean
index_save (JournalStore
*js
, GError
**err
) {
133 return index_write(js
->path
, js
->index
, err
);
137 static void time_to_docidx (const time_t *entrytime
, int *year
, int *mon
, int *day
) {
140 lt
= gmtime(entrytime
);
141 *year
= lt
->tm_year
+1900;
145 *year
= *mon
= *day
= 0;
150 static char *docidx_to_str (char *base
, int year
, int mon
) {
151 return g_strdup_printf("%s/%d/%02d.xml", base
, year
, mon
);
155 static void delete_unused_whitespace_r (xmlNodePtr node
) {
157 /* whitespace is significant within many nodes, like the event,
158 * but we only get isolated pure-whitespace nodes when whitespace
159 * is used alongside nodes. all journal content is escaped, so
160 * all nodes that contain both whitespace and nodes should be
162 for (node
= node
->xmlChildrenNode
; node
; node
= next
) {
164 if (xmlIsBlankNode(node
)) {
168 delete_unused_whitespace_r(node
);
174 static void delete_unused_whitespace (xmlDocPtr doc
) {
175 delete_unused_whitespace_r(xmlDocGetRootElement(doc
));
179 static xmlDocPtr
make_new_doc (int year
, int mon
) {
182 jam_xmlNewDoc(&doc
, &node
, "entrymonth");
183 jam_xmlSetIntProp(node
, "version", JOURNAL_STORE_XML_VERSION
);
184 jam_xmlSetIntProp(node
, "year", year
);
185 jam_xmlSetIntProp(node
, "month", mon
);
190 static gboolean
switch_xml_file (JournalStore
*js
, int year
, int mon
, GError
**err
) {
193 xmlDocPtr doc
= NULL
;
195 /* are we already there? */
196 if (year
== js
->xml_year
&& mon
== js
->xml_mon
) {
200 /* otherwise, switch to this file.
201 * XXX protective locking would be good. */
202 /* first write out the old file, if we have one. */
203 if (js
->xml_year
&& js
->xml_dirty
) {
205 path
= docidx_to_str(js
->path
, js
->xml_year
, js
->xml_mon
);
206 if (!verify_path(path
, FALSE
, err
)) {
210 /* write to a temp file and then rename,
211 * to avoid losing data if we die mid-write. */
212 tmppath
= g_strconcat(path
, ".tmp", NULL
);
213 if (xmlSaveFormatFile(tmppath
, js
->xml_doc
, TRUE
) < 0) {
214 g_set_error(err
, 0, 0, _("Error writing journal xml file to %s: %s"), tmppath
, g_strerror(errno
));
219 if (rename(tmppath
, path
) < 0) {
220 g_set_error(err
, 0, 0, _("Error renaming journal xml file %s to %s: %s"), tmppath
, path
, g_strerror(errno
));
229 /* then switch to the new file, if we have one. */
231 path
= docidx_to_str(js
->path
, year
, mon
);
232 if (stat(path
, &statbuf
) < 0 && errno
== ENOENT
) {
233 doc
= make_new_doc(year
, mon
);
236 doc
= xmlParseFile(path
);
238 if (!jam_xmlGetIntProp(xmlDocGetRootElement(doc
), "version", &ver
))
241 if (ver
< JOURNAL_STORE_XML_VERSION
) {
242 /* out of date document. */
244 doc
= make_new_doc(year
, mon
);
246 /* if there is any whitespace in nodes where we don't
247 * care about whitespace, libxml thinks that the whitespace
248 * was important and won't reformat it correctly.
249 * so we need to delete all the whitespace on load. */
250 delete_unused_whitespace(doc
);
253 g_set_error(err
, 0, 0, _("Error parsing journal XML file %s"), path
);
260 js
->xml_dirty
= FALSE
;
270 static gboolean
switch_xml_file_from_time (JournalStore
*js
, time_t *entrytime
, GError
**err
) {
272 time_to_docidx(entrytime
, &year
, &mon
, &day
);
273 return switch_xml_file(js
, year
, mon
, err
);
277 time_t journal_store_lookup_entry_time (JournalStore
*js
, int itemid
) {
278 if (itemid
>= (int)js
->index
->len
) return 0;
279 return index_get(js
->index
, itemid
);
283 static xmlNodePtr
make_day_node (xmlDocPtr doc
, int day
) {
285 newnode
= xmlNewDocNode(doc
, NULL
, BAD_CAST
"day", NULL
);
286 jam_xmlSetIntProp(newnode
, "number", day
);
291 static xmlNodePtr
find_day (xmlDocPtr doc
, int day
, gboolean create
) {
292 xmlNodePtr root
, node
, newnode
;
295 root
= xmlDocGetRootElement(doc
);
296 for (node
= root
->xmlChildrenNode
; node
; node
= node
->next
) {
297 if ((eday
= xmlGetProp(node
, BAD_CAST
"number")) != NULL
) {
298 fday
= atoi((char *)eday
);
301 return node
; /* found it. */
302 } else if (fday
> day
) {
303 /* we didn't find it, but we know where to insert it. */
305 newnode
= make_day_node(doc
, day
);
306 xmlAddPrevSibling(node
, newnode
);
314 /* we're either the first day inserted or last day for the month. */
315 newnode
= make_day_node(doc
, day
);
316 xmlAddChild(root
, newnode
);
323 static xmlNodePtr
find_entry (xmlNodePtr day
, int itemid
) {
326 for (node
= day
->xmlChildrenNode
; node
; node
= node
->next
) {
327 if ((eitemid
= xmlGetProp(node
, BAD_CAST
"itemid")) != NULL
) {
328 if (atoi((char *)eitemid
) == itemid
) {
339 static gboolean
remove_old (JournalStore
*js
, int itemid
, GError
**err
) {
344 xmlDocPtr doc
= NULL
;
345 xmlNodePtr nday
, node
;
347 entrytime
= journal_store_lookup_entry_time(js
, itemid
);
348 if (entrytime
== 0) return TRUE
; /* this entry isn't in the index. */
350 /* are we already there? */
351 time_to_docidx(&entrytime
, &year
, &mon
, &day
);
352 if (year
== js
->xml_year
&& mon
== js
->xml_mon
) {
353 /* remove this from the in-memory doc. */
356 path
= docidx_to_str(js
->path
, year
, mon
);
357 if (!stat(path
, &statbuf
) && errno
== ENOENT
) {
358 /* no document means there's nothing to delete. */
362 doc
= xmlParseFile(path
);
365 nday
= find_day(doc
, day
, FALSE
);
367 /* find the entry node and remove it. */
368 node
= find_entry(nday
, itemid
);
374 /* and delete day if it's empty. */
375 if (nday
->xmlChildrenNode
== NULL
) {
381 /* if we deleted from somewhere other than the current file,
382 * we want to save it out immediately. */
384 xmlSaveFormatFile(path
, doc
, TRUE
);
388 js
->xml_dirty
= TRUE
;
395 gboolean
journal_store_put (JournalStore
*js
, LJEntry
*entry
, GError
**err
) {
397 xmlNodePtr node
, newnode
;
399 entrytime
= lj_timegm(&entry
->time
);
401 if (!switch_xml_file_from_time(js
, &entrytime
, err
)) return FALSE
;
402 if (!remove_old(js
, entry
->itemid
, err
)) return FALSE
;
404 /* write main xml. */
405 node
= find_day(js
->xml_doc
, entry
->time
.tm_mday
, TRUE
);
406 newnode
= lj_entry_to_xml_node(entry
, js
->xml_doc
);
408 xmlAddChild(node
, newnode
);
409 js
->xml_dirty
= TRUE
;
412 if (entry
->itemid
+ 1 > (int)js
->index
->len
)
413 g_array_set_size(js
->index
, entry
->itemid
+ 1);
414 index_set(js
->index
, entry
->itemid
, entrytime
);
420 gboolean
journal_store_put_group (JournalStore
*js
, LJEntry
**entries
, int c
, GError
**err
) {
421 for (int i
= 0; i
< c
; ++i
) if (!journal_store_put(js
, entries
[i
], err
)) return FALSE
;
422 if (!switch_xml_file_from_time(js
, NULL
, err
)) return FALSE
;
423 if (!index_save(js
, err
)) return FALSE
;
428 void journal_store_free(JournalStore
*js
) {
430 index_save(js
, NULL
);
431 g_array_free(js
->index
, TRUE
);
438 guint32
journal_store_get_month_entries (JournalStore
*js
, int year
, int mon
) {
445 if (!switch_xml_file(js
, year
, mon
, &err
)) {
446 g_warning("journalstore couldn't switch files: %s\n", err
->message
);
450 nday
= xmlDocGetRootElement(js
->xml_doc
)->xmlChildrenNode
;
451 for (; nday
; nday
= nday
->next
) if (jam_xmlGetIntProp(nday
, "number", &day
)) days
|= 1<<day
;
456 static void call_summarycb (JournalStore
*js
, xmlNodePtr nentry
, JournalStoreSummaryCallback cb_func
, gpointer cb_data
) {
459 xmlChar
*event
= NULL
;
460 xmlChar
*subject
= NULL
;
464 LJSecurity sec
= { 0 };
466 for (nchild
= nentry
->xmlChildrenNode
; nchild
; nchild
= nchild
->next
) {
467 if (xmlStrcmp(nchild
->name
, BAD_CAST
"event") == 0) {
468 event
= xmlNodeListGetString(js
->xml_doc
, nchild
->xmlChildrenNode
, TRUE
);
469 sitemid
= xmlGetProp(nentry
, BAD_CAST
"itemid");
472 itemid
= atoi((char *)sitemid
);
475 } else if (xmlStrcmp(nchild
->name
, BAD_CAST
"time") == 0) {
476 lj_ljdate_to_tm((const char *)XML_GET_CONTENT(nchild
->xmlChildrenNode
), &etm
);
477 } else if (xmlStrcmp(nchild
->name
, BAD_CAST
"subject") == 0) {
478 subject
= xmlNodeListGetString(js
->xml_doc
, nchild
->xmlChildrenNode
, TRUE
);
479 } else if (xmlStrcmp(nchild
->name
, BAD_CAST
"security") == 0) {
480 xmlChar
*type
= NULL
, *mask
= NULL
;
481 type
= xmlGetProp(nchild
, BAD_CAST
"type");
482 mask
= xmlGetProp(nchild
, BAD_CAST
"mask");
483 lj_security_from_strings(&sec
, (char *)type
, (char *)mask
);
491 summary
= lj_get_summary((char *)subject
, (char *)event
);
492 cb_func(itemid
, lj_timegm(&etm
), summary
, &sec
, cb_data
);
501 gboolean
journal_store_get_day_entries (JournalStore
*js
, int year
, int mon
, int day
, JournalStoreSummaryCallback cb_func
, gpointer cb_data
) {
502 xmlNodePtr nday
, nentry
;
503 switch_xml_file(js
, year
, mon
, NULL
);
504 nday
= find_day(js
->xml_doc
, day
, FALSE
);
505 if (!nday
) return TRUE
; /* no entries today. */
506 for (nentry
= nday
->xmlChildrenNode
; nentry
; nentry
= nentry
->next
) call_summarycb(js
, nentry
, cb_func
, cb_data
);
512 JournalStoreScanCallback scan_cb
;
514 JournalStoreSummaryCallback summary_cb
;
515 gpointer summary_data
;
520 static gboolean
match_node (JournalStore
*js
, xmlNodePtr node
, const Scan
*scan
) {
523 content
= xmlNodeListGetString(js
->xml_doc
, node
->xmlChildrenNode
, TRUE
);
524 found
= scan
->scan_cb((const char *)content
, scan
->scan_data
);
530 static gboolean
match_entry (JournalStore
*js
, xmlNodePtr nentry
, const Scan
*scan
) {
533 nchild
= nentry
->xmlChildrenNode
;
535 for (; nchild
; nchild
= nchild
->next
) {
536 if (xmlStrcmp(nchild
->name
, BAD_CAST
"event") == 0) matched
= (matched
|| match_node(js
, nchild
, scan
));
537 else if (xmlStrcmp(nchild
->name
, BAD_CAST
"subject") == 0) matched
= (matched
|| match_node(js
, nchild
, scan
));
543 static gboolean
scan_month (JournalStore
*js
, int year
, int month
, Scan
*scan
) {
544 xmlNodePtr nday
, nentry
;
545 if (!switch_xml_file(js
, year
, month
, NULL
)) return FALSE
;
546 nday
= xmlDocGetRootElement(js
->xml_doc
)->xmlChildrenNode
;
547 for (; nday
; nday
= nday
->next
) {
548 for (nentry
= nday
->xmlChildrenNode
; nentry
; nentry
= nentry
->next
) {
549 if (match_entry(js
, nentry
, scan
)) {
550 call_summarycb(js
, nentry
, scan
->summary_cb
, scan
->summary_data
);
551 if (++scan
->matchcount
== MAX_MATCHES
)
560 gboolean
journal_store_scan (JournalStore
*js
,
561 JournalStoreScanCallback scan_cb
, gpointer scan_data
, JournalStoreSummaryCallback cb_func
, gpointer cb_data
)
563 GDir
*journaldir
, *yeardir
;
564 const char *yearname
;
573 journaldir
= g_dir_open(js
->path
, 0, NULL
);
574 if (!journaldir
) return FALSE
;
576 yearname
= g_dir_read_name(journaldir
);
577 while (yearname
&& scan
.matchcount
< MAX_MATCHES
) {
578 yearpath
= g_build_filename(js
->path
, yearname
, NULL
);
579 yeardir
= g_dir_open(yearpath
, 0, NULL
);
582 year
= atoi(yearname
);
583 for (month
= 1; month
<= 12; ++month
) if (!scan_month(js
, year
, month
, &scan
)) break;
584 g_dir_close(yeardir
);
586 yearname
= g_dir_read_name(journaldir
);
588 g_dir_close(journaldir
);
594 gboolean
journal_store_find_relative_by_time (JournalStore
*js
, time_t when
, int *ritemid
, int dir
, GError
*err
) {
601 /* XXX need to handle items with same date. */
603 for (i
= js
->index
->len
- 1; i
>= 1; --i
) {
604 if (index_get(js
->index
, i
) == when
) continue; /* skip self */
605 candidate
= journal_store_lookup_entry_time(js
, i
);
606 if (candidate
< when
&& candidate
> ftime
) {
611 } else if (dir
> 0) {
612 for (i
= 1; i
< (int)js
->index
->len
; ++i
) {
613 if (index_get(js
->index
, i
) == when
) continue; /* skip self */
614 candidate
= journal_store_lookup_entry_time(js
, i
);
615 if (candidate
> when
) {
616 if (!ftime
|| (ftime
&& candidate
< ftime
)) {
633 LJEntry
*journal_store_get_entry (JournalStore
*js
, int itemid
) {
635 xmlNodePtr nday
, nentry
;
639 entrytime
= journal_store_lookup_entry_time(js
, itemid
);
640 if (!switch_xml_file_from_time(js
, &entrytime
, &err
)) {
641 g_warning("journalstore couldn't switch files: %s\n", err
->message
);
646 etm
= gmtime(&entrytime
);
647 nday
= find_day(js
->xml_doc
, etm
->tm_mday
, FALSE
);
649 nentry
= find_entry(nday
, itemid
);
650 if (nentry
) return lj_entry_new_from_xml_node(js
->xml_doc
, nentry
);
657 int journal_store_get_latest_id (JournalStore
*js
) {
658 for (int itemid
= js
->index
->len
-1; itemid
; --itemid
) if (journal_store_lookup_entry_time(js
, itemid
)) return itemid
;
659 return 0; /* no non-deleted messages in store */
663 int journal_store_get_count (JournalStore
*js
) {
664 int itemid
, count
= 0;
665 for (itemid
= 0; itemid
< (int)js
->index
->len
; ++itemid
) if (index_at(js
->index
, itemid
) != 0) ++count
;
670 gboolean
journal_store_get_invalid (JournalStore
*js
) {
675 static char *journal_store_make_path (JamAccount
*acc
) {
676 return conf_make_account_path(acc
, "journal");
680 JournalStore
*journal_store_open (JamAccount
*acc
, gboolean create
, GError
**err
) {
683 js
= g_new0(JournalStore
, 1);
685 js
->path
= journal_store_make_path(acc
);
686 js
->index
= g_array_new(FALSE
, TRUE
, sizeof(time_t));
688 if (!create
&& !g_file_test(js
->path
, G_FILE_TEST_EXISTS
)) {
689 g_set_error(err
, 0, 0, "%s", _("No offline copy of this journal."));
693 /* need at least one slot for the version.
694 * if the on-disk index is an older version,
695 * loading the index will overwrite this version anyway. */
696 g_array_set_size(js
->index
, 1);
697 index_set(js
->index
, 0, JOURNAL_STORE_INDEX_VERSION
);
699 if (!verify_path(js
->path
, TRUE
, err
)) goto err
;
700 if (!index_load(js
, err
)) goto err
;
705 journal_store_free(js
);
710 static void reindex_month (char *storepath
, int year
, int mon
, GArray
*idx
) {
713 xmlNodePtr nday
, nentry
, nchild
;
716 xmlpath
= docidx_to_str(storepath
, year
, mon
);
717 doc
= xmlParseFile(xmlpath
);
719 nday
= xmlDocGetRootElement(doc
)->xmlChildrenNode
;
720 for (; nday
; nday
= nday
->next
) {
721 for (nentry
= nday
->xmlChildrenNode
; nentry
; nentry
= nentry
->next
) {
724 if ((sitemid
= xmlGetProp(nentry
, BAD_CAST
"itemid")) != NULL
) {
725 itemid
= atoi((char *)sitemid
);
728 for (nchild
= nentry
->xmlChildrenNode
; nchild
; nchild
= nchild
->next
) {
729 if (xmlStrcmp(nchild
->name
, BAD_CAST
"time") == 0) {
730 lj_ljdate_to_tm((const char *)XML_GET_CONTENT(nchild
->xmlChildrenNode
), &etm
);
732 if (itemid
+ 1 > (int)idx
->len
)
733 g_array_set_size(idx
, itemid
+ 1);
734 index_set(idx
, itemid
, lj_timegm(&etm
));
745 gboolean
journal_store_reindex (JamAccount
*acc
, GError
**err
) {
747 GDir
*journaldir
, *yeardir
;
748 const char *yearname
, *monthname
;
754 storepath
= journal_store_make_path(acc
);
755 /* ick, duplication of the store code. */
756 journaldir
= g_dir_open(storepath
, 0, NULL
);
757 if (!journaldir
) return FALSE
;
758 index
= g_array_new(FALSE
, TRUE
, sizeof(time_t));
759 g_array_set_size(index
, 1);
760 index_set(index
, 0, JOURNAL_STORE_INDEX_VERSION
);
761 yearname
= g_dir_read_name(journaldir
);
763 yearpath
= g_build_filename(storepath
, yearname
, NULL
);
764 yeardir
= g_dir_open(yearpath
, 0, NULL
);
767 year
= atoi(yearname
);
768 monthname
= g_dir_read_name(yeardir
);
770 month
= atoi(monthname
);
771 reindex_month(storepath
, year
, month
, index
);
772 monthname
= g_dir_read_name(yeardir
);
774 g_dir_close(yeardir
);
776 yearname
= g_dir_read_name(journaldir
);
778 g_dir_close(journaldir
);
779 ret
= index_write(storepath
, index
, err
);
781 g_array_free(index
, TRUE
);