Make sure EOF is defined
[xapian.git] / xapian-applications / omega / htdig2omega
blobb551a08e0c183e165db93d4e359eaf2412c8d2c5
1 #!/usr/bin/perl -w
2 # htdig2omega - dump an htdig database into a form suitable for indexing
3 # into a Xapian database using scriptindex.
5 # Copyright 2002,2003,2004 Olly Betts
7 # This program is free software; you can redistribute it and/or
8 # modify it under the terms of the GNU General Public License as
9 # published by the Free Software Foundation; either version 2 of the
10 # License, or (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with this program; if not, write to the Free Software
19 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
20 # USA
22 use strict;
24 $#ARGV == 0 or die "Syntax: $0 HTDIGDIR\nTypical usage: $0 HTDIGDIR|scriptindex XAPIANDB htdig2omega.script\n";
26 my $dir = shift @ARGV;
28 # dump the document database (-w suppresses dumping the word database)
29 system "htdump", "-w", $dir;
30 open DOCS, "$dir/db.docs" or die $!;
31 while (<DOCS>) {
32 my ($id, @x) = split /\t/;
33 my %f;
34 for (@x) {
35 unless (s/^([a-zA-Z])://) {
36 print STDERR "Bad field: $_\n";
37 next;
39 $f{$1} = $_;
41 print "url=$f{'u'}\ncaption=$f{'t'}\nlastmod=$f{'m'}\nsize=$f{'s'}\ntext=$f{'H'}\nmetadesc=$f{'h'}\n\n";
43 close DOCS;