Avoid warning when splitting tags.
[gruta.git] / Gruta / Source / Mbox.pm
blobc40bef9ab9c643cb615a52901088bc263ef52c87
1 package Gruta::Source::Mbox;
3 # WARNING: This backend is DEPRECATED. Use it at your own risk.
5 use base 'Gruta::Source::BASE';
7 use strict;
8 use warnings;
10 use Gruta::Data;
12 sub _rfc822_to_gruta {
13 # converts an RFC822-style Date to Gruta
14 my $self = shift;
15 my $date = shift;
17 $date =~ s/^\w{3},\s+//;
18 my ($d, $m, $y, $H, $M, $S) =
19 ($date =~ /(\d+)\s+(\w+)\s+(\d+)\s(\d+):(\d+):(\d+)/);
21 return sprintf("%04d%02d%02d%02d%02d%02d",
22 $y, $self->{_month_hash}->{$m}, $d, $H, $M, $S);
26 package Gruta::Data::Mbox::BASE;
28 sub dummy {
31 package Gruta::Data::Mbox::Story;
33 use Carp;
35 use base 'Gruta::Data::Story';
36 use base 'Gruta::Data::Mbox::BASE';
38 sub load {
39 my $self = shift;
40 my $driver = shift;
42 $driver = $self->source( $driver );
44 if (my $s = $driver->{stories_h}->{$self->get('id')}) {
46 # read the content
47 open F, $driver->{file} or
48 croak "Can't open '$driver->{file}'";
50 seek F, $s->{offset}, 0;
51 my $c = '';
53 while (<F>) {
54 last if /^From /;
55 $c .= $_;
58 close F;
60 $self->set('title', $s->{title});
61 $self->set('date', $s->{date});
62 $self->set('format', $s->{format} || 'grutatxt');
63 $self->set('hits', 0);
64 $self->set('ctime', 0);
65 $self->set('userid', '');
66 $self->set('content', $c);
69 return $self;
72 sub tags {
73 my $self = shift;
74 my @ret = ();
76 unless (scalar(@_)) {
77 # get tags from the index
78 my $s = $self->source->{stories_h}->{$self->get('id')};
80 @ret = split(/\s*,\s*/, $s->{tags});
83 return @ret;
86 package Gruta::Data::Mbox::Topic;
88 use base 'Gruta::Data::Topic';
89 use base 'Gruta::Data::Mbox::BASE';
91 package Gruta::Source::Mbox;
93 use Carp;
95 sub _assert {
96 my $self = shift;
98 $self->{file} or croak "Mandatory file";
100 return $self;
103 sub _build_index {
104 my $self = shift;
106 open M, $self->{file} or
107 croak "Can't open '$self->{file}'";
109 my @s = ();
110 my %h = ();
111 my $r = undef;
113 while (<M>) {
114 chomp;
116 if (/^From / .. /^$/) {
117 if (not $r) {
118 $r = {};
121 # in header
122 if (/^Message-ID:\s*(.+)$/i) {
123 use Digest::MD5;
125 my $md5 = Digest::MD5->new();
126 $md5->add($1);
128 $r->{id} = $md5->hexdigest();
130 elsif (/^Subject:\s*(.+)$/i) {
131 $r->{title} = $1;
133 elsif (/^Date:\s*(.+)$/i) {
134 $r->{date} = $self->_rfc822_to_gruta($1);
136 elsif (/^X-Format:\s*(.+)$/i) {
137 $r->{format} = $1;
139 elsif (/^Content-Type:\s*.*text\/html/i and not $r->{format}) {
140 $r->{format} = 'filtered_html';
142 elsif (/^X-Tags:\s*(.+)$/i || /^Keywords:\s*(.+)$/i) {
143 $r->{tags} = $1;
145 elsif (/^$/) {
146 $r->{offset} = tell(M);
147 push(@s, $r);
148 $h{$r->{id}} = $r;
149 $r = undef;
154 close M;
156 # store stories in reverse date order
157 $self->{stories_l} = [ sort { $b->{date} <=> $a->{date} } @s ];
158 $self->{stories_h} = { %h };
160 return $self;
164 sub _save_index {
165 my $self = shift;
167 open O, '>' . $self->{index_file} or
168 croak "Can't write '$self->{index_file}'";
169 flock O, 2;
171 foreach my $s (@{ $self->{stories_l} }) {
172 print O join('|', $s->{id}, $s->{title},
173 $s->{date}, $s->{offset},
174 $s->{format} || 'grutatxt', $s->{tags} || ''),
175 "\n";
178 close O;
180 return $self;
184 sub _load_index {
185 my $self = shift;
187 open I, $self->{index_file} or
188 croak "Can't open '$self->{index_file}'";
189 flock I, 1;
191 my @s = ();
192 my %h = ();
194 while (<I>) {
195 chomp;
197 my $r = {};
198 ($r->{id}, $r->{title}, $r->{date},
199 $r->{offset}, $r->{format}, $r->{tags}) =
200 split(/\|/, $_);
201 push(@s, $r);
202 $h{$r->{id}} = $r;
205 $self->{stories_l} = [ @s ];
206 $self->{stories_h} = { %h };
208 close I;
210 return $self;
214 sub _index {
215 my $self = shift;
217 if (not -f $self->{index_file} or
218 -M $self->{index_file} > -M $self->{file}) {
219 $self->_build_index->_save_index();
221 else {
222 $self->_load_index();
225 return $self;
229 sub topic {
230 my $self = shift;
231 my $id = shift;
233 my $topic = undef;
235 if ($self->{topic_id} eq $id) {
236 $topic = Gruta::Data::Mbox::Topic->new(
237 id => $id,
238 name => $self->{topic_name},
239 editors => '',
240 internal => 0,
241 max_stories => 0
245 return $topic;
248 sub topics {
249 return ($_[0]->{topic_id}) ;
252 sub story {
253 my $self = shift;
254 my $topic_id = shift;
255 my $id = shift;
257 my $story = undef;
259 if ($self->{topic_id} eq $topic_id) {
261 $story = Gruta::Data::Mbox::Story->new (
262 id => $id, topic_id => $topic_id )->load($self);
265 return $story;
268 sub stories {
269 my $self = shift;
270 my $topic_id = shift;
272 my @r = ();
274 if ($self->{topic_id} eq $topic_id) {
275 @r = map { $_->{id} } @{ $self->{stories_l} };
278 return @r;
282 sub stories_by_date {
283 my $self = shift;
284 my $topics = shift;
285 my %args = @_;
287 my $topic_id;
289 if (!$topics) {
290 $topic_id = $self->{topic_id};
292 else {
293 $topic_id = $topics->[0];
296 $args{offset} += 0;
297 $args{offset} = 0 if $args{offset} < 0;
299 my @r = ();
300 my $o = 0;
302 if ($self->{topic_id} eq $topic_id) {
303 foreach my $s (@{ $self->{stories_l} }) {
304 my $date = $s->{date};
306 # skip future stories
307 next if not $args{future} and
308 $args{today} and
309 $date > $args{today};
311 # skip if date is above the threshold
312 next if $args{'to'} and $date > $args{'to'};
314 # exit if date is below the threshold
315 last if $args{'from'} and $date < $args{'from'};
317 # skip offset stories
318 next if $args{'offset'} and ++$o <= $args{'offset'};
320 push(@r, [ $topic_id, $s->{id}, $date ]);
322 # exit if we have all we need
323 last if $args{'num'} and $args{'num'} == scalar(@r);
327 return @r;
330 sub search_stories {
331 my $self = shift;
332 my $topic_id = shift;
333 my $query = shift;
334 my $future = shift;
336 # not this topic? return
337 if ($self->{topic_id} ne $topic_id) {
338 return ();
341 my @ret = ();
342 my @q = split(/\s+/,$query);
344 foreach my $e (@{$self->{stories_l}}) {
346 my $story = $self->story($topic_id, $e->{id});
347 my $content = $story->get('content');
348 my $found = 0;
350 # try complete query first
351 if ($content =~ /\b$query\b/i) {
352 $found = scalar(@q);
354 else {
355 # try separate words
356 foreach my $q (@q) {
357 if(length($q) > 1 and $content =~ /\b$q\b/i) {
358 $found++;
363 push(@ret, $e->{id}) if $found == scalar(@q);
366 return @ret;
369 sub stories_by_tag {
370 my $self = shift;
371 my $topics = shift;
372 my $tag = shift;
373 my $future = shift;
375 my $topic_id;
377 if (!$topics) {
378 $topic_id = $self->{topic_id};
380 else {
381 $topic_id = $topics->[0];
384 # not this topic? return
385 if ($self->{topic_id} ne $topic_id) {
386 return ();
389 my @tags = map { lc($_) } split(/\s*,\s*/, $tag);
390 my @ret = ();
392 foreach my $e (@{$self->{stories_l}}) {
393 my @ts = split(/\s*,\s*/, $e->{tags});
395 # skip stories with less tags than the wanted ones
396 if (scalar(@ts) < scalar(@tags)) {
397 next;
400 # count matches
401 my $c = 0;
403 foreach my $t (@ts) {
404 if (grep(/^\Q$t\E$/, @tags)) {
405 $c++;
409 if ($c >= scalar(@tags)) {
411 # if no future stories are wanted, discard them
412 if (!$future) {
413 if ($e->{date} > Gruta::Data::today()) {
414 next;
418 push(@ret, [ $topic_id, $e->{id}, $e->{date} ]);
422 return @ret;
426 sub tags {
427 my $self = shift;
429 my @ret = ();
430 my %h = ();
432 foreach my $e (@{$self->{stories_l}}) {
433 my $tags = $e->{tags};
435 foreach my $t (split(/\s*,\s*/, $tags)) {
436 $h{$t}++;
440 foreach my $k (keys(%h)) {
441 push(@ret, [ $k, $h{$k} ]);
444 return @ret;
448 sub new {
449 my $class = shift;
451 my $s = bless( { @_ }, $class);
453 $s->_assert();
455 if (!$s->{topic_id}) {
456 my ($topic_id) = ($s->{file} =~ /^(\w+)\.?.*$/);
457 $s->{topic_id} = $topic_id;
460 if (!$s->{topic_name}) {
461 $s->{topic_name} = $s->{topic_id};
464 if (!$s->{index_file}) {
465 $s->{index_file} = '/tmp/' . $s->{topic_id} . '.idx';
468 my $n = 0;
469 my %m = map { $_ => ++$n }
470 qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec);
472 $s->{_month_hash} = { %m };
474 $s->_index();
476 return $s;