1 //************************************************
2 //* Copyright (c) 2007 Newspiritcompany.com. All Rights Reserved
4 //* Created On: 11/6/2007
6 //* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
7 //* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
8 //* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
9 //* A PARTICULAR PURPOSE ARE DISCLAIMED.
11 //* (see /LICENSE for more details)
12 //************************************************
14 // Author: Berlin Brown
15 // Description: Utility for indexing (source code in scala) developer help
16 // documents with Lucene.
19 // * Index simple text based help documents, loaded from a input directory
20 // * Developer should be able to query documents through command-line interface
21 // * Shall be able to load text help document in the command-line interface
22 // based on a search query term.
24 import org
.apache
.lucene
.analysis
.Analyzer
;
25 import org
.apache
.lucene
.analysis
.standard
.StandardAnalyzer
;
26 import org
.apache
.lucene
.document
.Document
;
27 import org
.apache
.lucene
.index
.FilterIndexReader
;
28 import org
.apache
.lucene
.index
.IndexReader
;
29 import org
.apache
.lucene
.queryParser
.MultiFieldQueryParser
;
30 import org
.apache
.lucene
.search
.Hits
;
31 import org
.apache
.lucene
.search
.IndexSearcher
;
32 import org
.apache
.lucene
.search
.Query
;
33 import org
.apache
.lucene
.search
.Searcher
;
35 import org
.apache
.lucene
.search
.Sort
;
36 import org
.apache
.lucene
.search
.SortField
;
38 import java
.io
.BufferedReader
;
39 import java
.io
.FileReader
;
40 import java
.io
.IOException
;
41 import java
.io
.InputStreamReader
;
42 import java
.util
.Date
;
44 /** Simple command-line based search demo. */
45 public class SearchHelpDocs
{
47 public static final int MAX_LINES_DISPLAY_CONTENT
= 12;
48 public static final int MAX_COLS_DISPLAY_CONTENT
= 60;
50 public static final int HITS_PER_PAGE
= 5;
51 public static final String CMDLINE_PREFIX
= "Query>>> ";
53 private final static String LUC_KEY_FULL_PATH
= "full_path";
54 private final static String LUC_KEY_FILE_NAME
= "file_name";
55 private final static String LUC_KEY_CONTENT
= "content";
56 private final static String LUC_KEY_IDENTITY
= "id";
58 private static class OneNormsReader
extends FilterIndexReader
{
61 public OneNormsReader(IndexReader in
, String field
) {
66 public byte[] norms(String field
) throws IOException
{
67 return in
.norms(this.field
);
71 private SearchHelpDocs() {}
73 private static void printHelpInformation() {
74 System
.out
.println(CMDLINE_PREFIX
+ " Search Help System (Botlist Help Documents)");
75 System
.out
.println(CMDLINE_PREFIX
+ " v0.1 [Nov14.2007]");
76 System
.out
.println(CMDLINE_PREFIX
+ " At the prompt, enter search help term");
77 System
.out
.println(CMDLINE_PREFIX
+ " Use :quit to exit command loop.");
78 System
.out
.println(CMDLINE_PREFIX
+ " ===================");
83 * Default search, sort by score and date
85 private static Sort
createSort() throws Exception
{
86 Sort sort
= new Sort();
87 SortField fields
[] = {
88 SortField
.FIELD_SCORE
,
89 new SortField("yyyymmdd", SortField
.STRING
, true)
96 * Pretty print content; because of the size of our content in our help documentation,
97 * Only print N (E.g 12) number of lines and based on Y (E.g. 60) number of colummns.
99 private static String
prettyPrintContent(final String content
) {
100 // Split by newlines, shorten, and then append back together.
101 StringBuffer buf
= new StringBuffer();
102 String lines
[] = content
.split("\n");
103 final int maxLines
= (lines
.length
> MAX_LINES_DISPLAY_CONTENT
) ? MAX_LINES_DISPLAY_CONTENT
: lines
.length
;
104 for (int i
= 0; i
< maxLines
; i
++) {
105 final String line
= lines
[i
];
106 final int maxColLen
= (line
.length() > MAX_COLS_DISPLAY_CONTENT
) ? MAX_COLS_DISPLAY_CONTENT
: line
.length();
107 final String shortline
= line
.substring(0, maxColLen
) + "\n";
108 buf
.append(shortline
);
110 return buf
.toString();
113 /** Simple command-line based search demo. */
114 public static void main(String
[] args
) throws Exception
{
116 String usage
= "Usage: java SearchFiles index-dir";
117 if (args
.length
!= 1) {
118 System
.out
.println(usage
);
121 String index
= args
[0];
122 String field
= LUC_KEY_CONTENT
;
123 String queries
= null;
126 String normsField
= null;
128 System
.out
.println("INFO: index-directory=" + index
);
129 IndexReader reader
= IndexReader
.open(index
);
130 if (normsField
!= null)
131 reader
= new OneNormsReader(reader
, normsField
);
133 Searcher searcher
= new IndexSearcher(reader
);
134 Analyzer analyzer
= new StandardAnalyzer();
136 BufferedReader in
= null;
137 in
= new BufferedReader(new InputStreamReader(System
.in
, "UTF-8"));
139 String
[] fields
= { LUC_KEY_CONTENT
, LUC_KEY_FULL_PATH
, LUC_KEY_FILE_NAME
};
140 MultiFieldQueryParser parser
= new MultiFieldQueryParser( fields
, analyzer
);
142 printHelpInformation();
145 System
.out
.print(CMDLINE_PREFIX
); System
.out
.flush();
146 String line
= in
.readLine();
147 if (line
== null || line
.length() < 0)
149 if (line
.trim().length() == 0) {
153 if (line
.trim().equalsIgnoreCase(":quit")) {
154 System
.out
.println("INFO: quit successful");
158 // Modify for fuzzy query (E.g. ~0.58), also use wildcard postfix (*)
160 Object obj
= parser
.parse(line
);
161 Query query
= parser
.parse(line
);
162 System
.out
.println(CMDLINE_PREFIX
+ "Searching for: [" + line
+ "] query=" + query
.toString(field
));
164 // Search and also add the sort element
165 Hits hits
= searcher
.search(query
, createSort());
167 Date start
= new Date();
168 for (int i
= 0; i
< repeat
; i
++) {
169 hits
= searcher
.search(query
);
171 Date end
= new Date();
172 System
.out
.println(CMDLINE_PREFIX
+ "Time: "+(end
.getTime()-start
.getTime())+"ms");
174 System
.out
.println(hits
.length() + " total matching documents");
175 for (int start
= 0; start
< hits
.length(); start
+= HITS_PER_PAGE
) {
176 int end
= Math
.min(hits
.length(), start
+ HITS_PER_PAGE
);
177 for (int i
= start
; i
< end
; i
++) {
179 System
.out
.println(CMDLINE_PREFIX
+ "doc=" + hits
.id(i
) + " score="+hits
.score(i
));
181 // Ignore scores based on a certain threshold
182 if (hits
.score(i
) < 0.09) continue;
184 Document doc
= hits
.doc(i
);
185 String path
= doc
.get(LUC_KEY_CONTENT
);
187 // Attempt to pretty print help document information
188 System
.out
.println("\n == Help Document Found; docid=" + hits
.id(i
));
189 System
.out
.println("*************************");
190 String fullpath
= doc
.get(LUC_KEY_FULL_PATH
);
191 String filename
= doc
.get(LUC_KEY_FILE_NAME
);
192 String content
= doc
.get(LUC_KEY_CONTENT
);
193 String id
= doc
.get(LUC_KEY_IDENTITY
);
194 if (filename
!= null) {
195 System
.out
.println(" +Filename: " + doc
.get(filename
));
197 if (fullpath
!= null) {
198 System
.out
.println(" +Path: " + doc
.get(fullpath
));
200 System
.out
.println(" id: " + id
);
201 System
.out
.println(" == Content:");
202 System
.out
.println(prettyPrintContent(content
));
203 System
.out
.println("-------------------------");
205 System
.out
.println();
207 System
.out
.println((i
+1) + ". " + "No content for this document");
210 if (queries
!= null) // non-interactive
212 if (hits
.length() > end
) {
213 System
.out
.print("more (y/n) ? ");
214 line
= in
.readLine();
215 if (line
.length() == 0 || line
.charAt(0) == 'n')