[honey] Fix portability to systems without pread()
[xapian.git] / xapian-bindings / lua / docs / examples / simpleindex.lua
blob1d0f2a82e9e7060d394e5cd016c11103d0ddc672
1 #!/usr/bin/env lua
2 --
3 -- Index each paragraph of a text file as a Xapian document.
4 --
5 -- Copyright (C) 2011 Xiaona Han
6 --
7 -- This program is free software; you can redistribute it and/or
8 -- modify it under the terms of the GNU General Public License as
9 -- published by the Free Software Foundation; either version 2 of the
10 -- License, or (at your option) any later version.
12 -- This program is distributed in the hope that it will be useful,
13 -- but WITHOUT ANY WARRANTY; without even the implied warranty of
14 -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 -- GNU General Public License for more details.
17 -- You should have received a copy of the GNU General Public License
18 -- along with this program; if not, write to the Free Software
19 -- Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
20 -- USA
22 require("xapian")
24 -- Remove leading and trailing whitespace from a string
25 function trim(s)
26 return (s:gsub("^%s*(.-)%s*$", "%1"))
27 end
29 -- Require one command line argument to mean the database directory
30 if #arg ~= 1 then
31 io.stderr:write("Usage: " .. arg[0] .. " PATH_TO_DATABASE\n")
32 os.exit()
33 end
35 -- Open the database for update, creating a new database if necessary.
36 database = xapian.WritableDatabase(arg[1], xapian.DB_CREATE_OR_OPEN)
39 indexer = xapian.TermGenerator()
40 stemmer = xapian.Stem("english")
41 indexer:set_stemmer(stemmer)
43 -- Read document data from standard input line.
44 -- An empty line means the end of a doc and the start of another doc
45 local para = ''
46 for line in io.lines() do
47 if line == nil then
48 break
49 end
51 line = trim(line)
52 if line == '' then
53 if para ~= '' then
55 -- We've reached the end of a paragraph, so index it.
56 doc = xapian.Document()
57 doc:set_data(para)
59 indexer:set_document(doc)
60 indexer:index_text(para)
62 -- Add the document to the database.
63 database:add_document(doc)
64 para = ''
65 end
66 else
67 if para ~= '' then
68 para = para .. ' '
69 end
70 para = para .. line
71 end
72 end