adding all of botlist, initial add
[botlist.git] / botlistprojects / botlistdocs / src / java / SearchHelpDocs.java
blob64c469d041cfda19357666f59d458c16314eab32
1 //************************************************
2 //* Copyright (c) 2007 Newspiritcompany.com. All Rights Reserved
3 //*
4 //* Created On: 11/6/2007
5 //*
6 //* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
7 //* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
8 //* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
9 //* A PARTICULAR PURPOSE ARE DISCLAIMED.
10 //*
11 //* (see /LICENSE for more details)
12 //************************************************
14 // Author: Berlin Brown
15 // Description: Utility for indexing (source code in scala) developer help
16 // documents with Lucene.
18 // Specification:
19 // * Index simple text based help documents, loaded from a input directory
20 // * Developer should be able to query documents through command-line interface
21 // * Shall be able to load text help document in the command-line interface
22 // based on a search query term.
24 import org.apache.lucene.analysis.Analyzer;
25 import org.apache.lucene.analysis.standard.StandardAnalyzer;
26 import org.apache.lucene.document.Document;
27 import org.apache.lucene.index.FilterIndexReader;
28 import org.apache.lucene.index.IndexReader;
29 import org.apache.lucene.queryParser.MultiFieldQueryParser;
30 import org.apache.lucene.search.Hits;
31 import org.apache.lucene.search.IndexSearcher;
32 import org.apache.lucene.search.Query;
33 import org.apache.lucene.search.Searcher;
35 import org.apache.lucene.search.Sort;
36 import org.apache.lucene.search.SortField;
38 import java.io.BufferedReader;
39 import java.io.FileReader;
40 import java.io.IOException;
41 import java.io.InputStreamReader;
42 import java.util.Date;
44 /** Simple command-line based search demo. */
45 public class SearchHelpDocs {
47 public static final int MAX_LINES_DISPLAY_CONTENT = 12;
48 public static final int MAX_COLS_DISPLAY_CONTENT = 60;
50 public static final int HITS_PER_PAGE = 5;
51 public static final String CMDLINE_PREFIX = "Query>>> ";
53 private final static String LUC_KEY_FULL_PATH = "full_path";
54 private final static String LUC_KEY_FILE_NAME = "file_name";
55 private final static String LUC_KEY_CONTENT = "content";
56 private final static String LUC_KEY_IDENTITY = "id";
58 private static class OneNormsReader extends FilterIndexReader {
59 private String field;
61 public OneNormsReader(IndexReader in, String field) {
62 super(in);
63 this.field = field;
66 public byte[] norms(String field) throws IOException {
67 return in.norms(this.field);
71 private SearchHelpDocs() {}
73 private static void printHelpInformation() {
74 System.out.println(CMDLINE_PREFIX + " Search Help System (Botlist Help Documents)");
75 System.out.println(CMDLINE_PREFIX + " v0.1 [Nov14.2007]");
76 System.out.println(CMDLINE_PREFIX + " At the prompt, enter search help term");
77 System.out.println(CMDLINE_PREFIX + " Use :quit to exit command loop.");
78 System.out.println(CMDLINE_PREFIX + " ===================");
79 System.out.flush();
82 /**
83 * Default search, sort by score and date
85 private static Sort createSort() throws Exception {
86 Sort sort = new Sort();
87 SortField fields [] = {
88 SortField.FIELD_SCORE,
89 new SortField("yyyymmdd", SortField.STRING, true)
91 sort.setSort(fields);
92 return sort;
95 /**
96 * Pretty print content; because of the size of our content in our help documentation,
97 * Only print N (E.g 12) number of lines and based on Y (E.g. 60) number of colummns.
99 private static String prettyPrintContent(final String content) {
100 // Split by newlines, shorten, and then append back together.
101 StringBuffer buf = new StringBuffer();
102 String lines [] = content.split("\n");
103 final int maxLines = (lines.length > MAX_LINES_DISPLAY_CONTENT) ? MAX_LINES_DISPLAY_CONTENT : lines.length;
104 for (int i = 0; i < maxLines; i++) {
105 final String line = lines[i];
106 final int maxColLen = (line.length() > MAX_COLS_DISPLAY_CONTENT) ? MAX_COLS_DISPLAY_CONTENT : line.length();
107 final String shortline = line.substring(0, maxColLen) + "\n";
108 buf.append(shortline);
110 return buf.toString();
113 /** Simple command-line based search demo. */
114 public static void main(String[] args) throws Exception {
116 String usage = "Usage: java SearchFiles index-dir";
117 if (args.length != 1) {
118 System.out.println(usage);
119 System.exit(0);
121 String index = args[0];
122 String field = LUC_KEY_CONTENT;
123 String queries = null;
124 int repeat = 0;
125 boolean raw = false;
126 String normsField = null;
128 System.out.println("INFO: index-directory=" + index);
129 IndexReader reader = IndexReader.open(index);
130 if (normsField != null)
131 reader = new OneNormsReader(reader, normsField);
133 Searcher searcher = new IndexSearcher(reader);
134 Analyzer analyzer = new StandardAnalyzer();
136 BufferedReader in = null;
137 in = new BufferedReader(new InputStreamReader(System.in, "UTF-8"));
139 String [] fields = { LUC_KEY_CONTENT, LUC_KEY_FULL_PATH, LUC_KEY_FILE_NAME };
140 MultiFieldQueryParser parser = new MultiFieldQueryParser( fields, analyzer);
142 printHelpInformation();
143 while (true) {
144 // prompt the user
145 System.out.print(CMDLINE_PREFIX); System.out.flush();
146 String line = in.readLine();
147 if (line == null || line.length() < 0)
148 break;
149 if (line.trim().length() == 0) {
150 continue;
152 // Exit gracefully.
153 if (line.trim().equalsIgnoreCase(":quit")) {
154 System.out.println("INFO: quit successful");
155 break;
158 // Modify for fuzzy query (E.g. ~0.58), also use wildcard postfix (*)
159 line = line + "~";
160 Object obj = parser.parse(line);
161 Query query = parser.parse(line);
162 System.out.println(CMDLINE_PREFIX + "Searching for: [" + line + "] query=" + query.toString(field));
163 System.out.flush();
164 // Search and also add the sort element
165 Hits hits = searcher.search(query, createSort());
166 if (repeat > 0) {
167 Date start = new Date();
168 for (int i = 0; i < repeat; i++) {
169 hits = searcher.search(query);
171 Date end = new Date();
172 System.out.println(CMDLINE_PREFIX + "Time: "+(end.getTime()-start.getTime())+"ms");
174 System.out.println(hits.length() + " total matching documents");
175 for (int start = 0; start < hits.length(); start += HITS_PER_PAGE) {
176 int end = Math.min(hits.length(), start + HITS_PER_PAGE);
177 for (int i = start; i < end; i++) {
179 System.out.println(CMDLINE_PREFIX + "doc=" + hits.id(i) + " score="+hits.score(i));
181 // Ignore scores based on a certain threshold
182 if (hits.score(i) < 0.09) continue;
184 Document doc = hits.doc(i);
185 String path = doc.get(LUC_KEY_CONTENT);
186 if (path != null) {
187 // Attempt to pretty print help document information
188 System.out.println("\n == Help Document Found; docid=" + hits.id(i));
189 System.out.println("*************************");
190 String fullpath = doc.get(LUC_KEY_FULL_PATH);
191 String filename = doc.get(LUC_KEY_FILE_NAME);
192 String content = doc.get(LUC_KEY_CONTENT);
193 String id = doc.get(LUC_KEY_IDENTITY);
194 if (filename != null) {
195 System.out.println(" +Filename: " + doc.get(filename));
197 if (fullpath != null) {
198 System.out.println(" +Path: " + doc.get(fullpath));
200 System.out.println(" id: " + id);
201 System.out.println(" == Content:");
202 System.out.println(prettyPrintContent(content));
203 System.out.println("-------------------------");
205 System.out.println();
206 } else {
207 System.out.println((i+1) + ". " + "No content for this document");
210 if (queries != null) // non-interactive
211 break;
212 if (hits.length() > end) {
213 System.out.print("more (y/n) ? ");
214 line = in.readLine();
215 if (line.length() == 0 || line.charAt(0) == 'n')
216 break;
220 reader.close();