Make a branch to make krunner Good Enough For Aaron™.
[kdebase/uwolfer.git] / runtime / khelpcenter / htmlsearch / htmlsearch.cpp
blob046a009fed9fb368e18c88efa08621b3c1479eba
1 #include <QRegExp>
2 #include <QDir>
3 #include <assert.h>
5 #include <kapplication.h>
6 #include <kdebug.h>
7 #include <kstandarddirs.h>
8 #include <k3process.h>
9 #include <klocale.h>
10 #include <kconfig.h>
13 #include "progressdialog.h"
14 #include <QTextStream>
15 #include "htmlsearch.moc"
18 HTMLSearch::HTMLSearch()
19 : QObject(), _proc(0)
24 QString HTMLSearch::dataPath(const QString& _lang)
26 return kapp->dirs()->saveLocation("data", QString("khelpcenter/%1").arg(_lang));
30 void HTMLSearch::scanDir(const QString& dir)
32 assert( dir.at( dir.length() - 1 ) == '/' );
34 QStringList::ConstIterator it;
36 if ( KStandardDirs::exists( dir + "index.docbook" ) ) {
37 _files.append(dir + "index.docbook");
38 progress->setFilesScanned(++_filesScanned);
39 } else {
40 QDir d(dir, "*.html", QDir::Name|QDir::IgnoreCase, QDir::Files | QDir::Readable);
41 QStringList const &list = d.entryList();
42 QString adir = d.canonicalPath () + '/';
43 QString file;
44 for (it=list.begin(); it != list.end(); ++it)
46 file = adir + *it;
47 if ( !_files.contains( file ) ) {
48 _files.append(file);
49 progress->setFilesScanned(++_filesScanned);
54 QDir d2(dir, QString(), QDir::Name|QDir::IgnoreCase, QDir::Dirs);
55 QStringList const &dlist = d2.entryList();
56 for (it=dlist.begin(); it != dlist.end(); ++it)
57 if (*it != "." && *it != "..")
59 scanDir(dir + *it + '/');
60 kapp->processEvents();
65 bool HTMLSearch::saveFilesList(const QString& _lang)
67 QStringList dirs;
69 // throw away old files list
70 _files.clear();
72 // open config file
73 KConfig config("khelpcenterrc");
74 KConfigGroup scopeGroup( &config, "Scope" );
76 // add KDE help dirs
77 if (scopeGroup.readEntry("KDE", true))
78 dirs = kapp->dirs()->findDirs("html", _lang + '/');
79 kDebug() << "got " << dirs.count() << " dirs\n";
81 // TODO: Man and Info!!
83 // add local urls
84 QStringList add = scopeGroup.readEntry("Paths", QStringList() );
85 QStringList::Iterator it;
86 for (it = add.begin(); it != add.end(); ++it) {
87 if ( ( *it ).at( ( *it ).length() - 1 ) != '/' )
88 ( *it ) += '/';
89 dirs.append(*it);
92 _filesScanned = 0;
94 for (it = dirs.begin(); it != dirs.end(); ++it)
95 scanDir(*it);
97 return true;
101 bool HTMLSearch::createConfig(const QString& _lang)
103 QString fname = dataPath(_lang) + "/htdig.conf";
105 // locate the common dir
106 QString wrapper = locate("data", QString("khelpcenter/%1/wrapper.html").arg(_lang));
107 if (wrapper.isEmpty())
108 wrapper = locate("data", QString("khelpcenter/en/wrapper.html"));
109 if (wrapper.isEmpty())
110 return false;
111 wrapper = wrapper.left(wrapper.length() - 12);
113 // locate the image dir
114 QString images = locate("data", "khelpcenter/pics/star.png");
115 if (images.isEmpty())
116 return false;
117 images = images.left(images.length() - 8);
119 // This is an example replacement for the default bad_words file
120 // distributed with ht://Dig. It was compiled by Marjolein Katsma
121 // <HSH@taxon.demon.nl>.
122 QString bad_words = i18nc( "List of words to exclude from index",
123 "above:about:according:across:actually:\n"
124 "adj:after:afterwards:again:against:all:\n"
125 "almost:alone:along:already:also:although:\n"
126 "always:among:amongst:and:another:any:\n"
127 "anyhow:anyone:anything:anywhere:are:aren:\n"
128 "arent:around:became:because:become:\n"
129 "becomes:becoming:been:before:beforehand:\n"
130 "begin:beginning:behind:being:below:beside:\n"
131 "besides:between:beyond:billion:both:but:\n"
132 "can:cant:cannot:caption:could:couldnt:\n"
133 "did:didnt:does:doesnt:dont:down:during:\n" //krazy:exclude=spelling
134 "each:eight:eighty:either:else:elsewhere:\n"
135 "end:ending:enough:etc:even:ever:every:\n"
136 "everyone:everything:everywhere:except:few:\n"
137 "fifty:first:five:for:former:formerly:forty:\n"
138 "found:four:from:further:had:has:hasnt:have:\n" //krazy:exclude=spelling
139 "havent:hence:her:here:hereafter:hereby:\n"
140 "herein:heres:hereupon:hers:herself:hes:him:\n"
141 "himself:his:how:however:hundred:\n"
142 "inc:indeed:instead:into:isnt:its:\n"
143 "itself:last:later:latter:latterly:least:\n"
144 "less:let:like:likely:ltd:made:make:makes:\n"
145 "many:may:maybe:meantime:meanwhile:might:\n"
146 "million:miss:more:moreover:most:mostly:\n"
147 "mrs:much:must:myself:namely:neither:\n"
148 "never:nevertheless:next:nine:ninety:\n"
149 "nobody:none:nonetheless:noone:nor:not:\n" //krazy:exclude=spelling
150 "nothing:now:nowhere:off:often:once:\n"
151 "one:only:onto:others:otherwise:our:ours:\n"
152 "ourselves:out:over:overall:own:page:per:\n"
153 "perhaps:rather:recent:recently:same:\n"
154 "seem:seemed:seeming:seems:seven:seventy:\n"
155 "several:she:shes:should:shouldnt:since:six:\n" //krazy:exclude=spelling
156 "sixty:some:somehow:someone:something:\n"
157 "sometime:sometimes:somewhere:still:stop:\n"
158 "such:taking:ten:than:that:the:their:them:\n"
159 "themselves:then:thence:there:thereafter:\n"
160 "thereby:therefore:therein:thereupon:these:\n"
161 "they:thirty:this:those:though:thousand:\n"
162 "three:through:throughout:thru:thus:tips:\n"
163 "together:too:toward:towards:trillion:\n"
164 "twenty:two:under:unless:unlike:unlikely:\n"
165 "until:update:updated:updates:upon:\n"
166 "used:using:very:via:want:wanted:wants:\n"
167 "was:wasnt:way:ways:wed:well:were:\n"
168 "werent:what:whats:whatever:when:whence:\n"
169 "whenever:where:whereafter:whereas:whereby:\n"
170 "wherein:whereupon:wherever:wheres:whether:\n"
171 "which:while:whither:who:whoever:whole:\n"
172 "whom:whomever:whose:why:will:with:within:\n"
173 "without:wont:work:worked:works:working:\n"
174 "would:wouldnt:yes:yet:you:youd:youll:your:\n"
175 "youre:yours:yourself:yourselves:youve" );
177 QFile f;
178 f.setName( dataPath(_lang) + "/bad_words" );
179 if (f.open(QIODevice::WriteOnly))
181 QTextStream ts( &f );
182 QStringList words = bad_words.split( QRegExp ( "[\n:]" ), QString::SkipEmptyParts );
183 for ( QStringList::ConstIterator it = words.begin();
184 it != words.end(); ++it )
185 ts << *it << endl;
186 f.close();
189 f.setName(fname);
190 if (f.open(QIODevice::WriteOnly))
192 kDebug() << "Writing config for " << _lang << " to " << fname;
194 QTextStream ts(&f);
196 ts << "database_dir:\t\t" << dataPath(_lang) << endl;
197 ts << "start_url:\t\t`" << dataPath(_lang) << "/files`" << endl;
198 ts << "local_urls:\t\tfile:/=/" << endl;
199 ts << "local_urls_only:\ttrue" << endl;
200 ts << "maximum_pages:\t\t1" << endl;
201 ts << "image_url_prefix:\t" << images << endl;
202 ts << "star_image:\t\t" << images << "star.png" << endl;
203 ts << "star_blank:\t\t" << images << "star_blank.png" << endl;
204 ts << "compression_level:\t6" << endl;
205 ts << "max_hop_count:\t\t0" << endl;
207 ts << "search_results_wrapper:\t" << wrapper << "wrapper.html" << endl;
208 ts << "nothing_found_file:\t" << wrapper << "nomatch.html" << endl;
209 ts << "syntax_error_file:\t" << wrapper << "syntax.html" << endl;
210 ts << "bad_word_list:\t\t" << dataPath(_lang) << "/bad_words" << endl;
211 ts << "external_parsers:\t" << "text/xml\t" << locate( "data", "khelpcenter/meinproc_wrapper" ) << endl;
212 f.close();
213 return true;
216 return false;
220 #define CHUNK_SIZE 15
222 bool HTMLSearch::generateIndex( const QString & _lang, QWidget *parent)
224 if (_lang == "C")
225 _lang = "en";
227 if (!createConfig(_lang))
228 return false;
230 // create progress dialog
231 progress = new ProgressDialog(parent);
232 progress->show();
233 kapp->processEvents();
235 // create files list ----------------------------------------------
236 if (!saveFilesList(_lang))
237 return false;
239 progress->setState(1);
241 // run htdig ------------------------------------------------------
242 KConfig config("khelpcenterrc", true);
243 KConfigGroup group(&config, "htdig");
244 QString exe = group.readPathEntry("htdig", KGlobal::dirs()->findExe("htdig"));
246 if (exe.isEmpty())
248 return false;
250 bool initial = true;
251 bool done = false;
252 int count = 0;
254 _filesToDig = _files.count();
255 progress->setFilesToDig(_filesToDig);
256 _filesDigged = 0;
258 QDir d; d.mkdir(dataPath(_lang));
260 while (!done)
262 // kill old process
263 delete _proc;
265 // prepare new process
266 _proc = new K3Process();
267 *_proc << exe << "-v" << "-c" << dataPath(_lang)+"/htdig.conf";
268 if (initial)
270 *_proc << "-i";
271 initial = false;
274 kDebug() << "Running htdig";
276 connect(_proc, SIGNAL(receivedStdout(K3Process *,char*,int)),
277 this, SLOT(htdigStdout(K3Process *,char*,int)));
279 connect(_proc, SIGNAL(processExited(K3Process *)),
280 this, SLOT(htdigExited(K3Process *)));
282 _htdigRunning = true;
284 // write out file
285 QFile f(dataPath(_lang)+"/files");
286 if (f.open(QIODevice::WriteOnly))
288 QTextStream ts(&f);
290 for (int i=0; i<CHUNK_SIZE; ++i, ++count)
291 if (count < _filesToDig) {
292 ts << "file://" + _files[count] << endl;
293 } else {
294 done = true;
295 break;
297 f.close();
299 else
301 kDebug() << "Could not open `files` for writing";
302 return false;
306 // execute htdig
307 _proc->start(K3Process::NotifyOnExit, K3Process::Stdout );
309 kapp->enter_loop();
311 if (!_proc->normalExit() || _proc->exitStatus() != 0)
313 delete _proc;
314 delete progress;
315 return false;
318 // _filesDigged += CHUNK_SIZE;
319 progress->setFilesDigged(_filesDigged);
320 kapp->processEvents();
323 progress->setState(2);
325 // run htmerge -----------------------------------------------------
326 exe = group.readPathEntry("htmerge", kapp->dirs()->findExe("htmerge"));
327 if (exe.isEmpty())
329 return false;
331 delete _proc;
332 _proc = new K3Process();
333 *_proc << exe << "-c" << dataPath(_lang)+"/htdig.conf";
335 kDebug() << "Running htmerge";
337 connect(_proc, SIGNAL(processExited(K3Process *)),
338 this, SLOT(htmergeExited(K3Process *)));
340 _htmergeRunning = true;
342 _proc->start(K3Process::NotifyOnExit, K3Process::Stdout);
344 kapp->enter_loop();
346 if (!_proc->normalExit() || _proc->exitStatus() != 0)
348 delete _proc;
349 delete progress;
350 return false;
353 delete _proc;
355 progress->setState(3);
356 kapp->processEvents();
358 delete progress;
360 return true;
365 void HTMLSearch::htdigStdout(K3Process *, char *buffer, int len)
367 QString line = QString(buffer).left(len);
369 int cnt=0, index=-1;
370 while ( (index = line.find("file://", index+1)) > 0)
371 cnt++;
372 _filesDigged += cnt;
374 cnt=0;
375 index=-1;
376 while ( (index = line.find("not changed", index+1)) > 0)
377 cnt++;
378 _filesDigged -= cnt;
380 progress->setFilesDigged(_filesDigged);
384 void HTMLSearch::htdigExited(K3Process *p)
386 kDebug() << "htdig terminated " << p->exitStatus();
387 _htdigRunning = false;
388 kapp->exit_loop();
392 void HTMLSearch::htmergeExited(K3Process *)
394 kDebug() << "htmerge terminated";
395 _htmergeRunning = false;
396 kapp->exit_loop();
400 void HTMLSearch::htsearchStdout(K3Process *, char *buffer, int len)
402 _searchResult += QString::fromLocal8Bit(buffer,len);
406 void HTMLSearch::htsearchExited(K3Process *)
408 kDebug() << "htsearch terminated";
409 _htsearchRunning = false;
410 kapp->exit_loop();
414 QString HTMLSearch::search( const QString & _lang, const QString & words, const QString & method, int matches,
415 const QString & format, const QString & sort)
417 if (_lang == "C")
418 _lang = "en";
420 createConfig(_lang);
422 QString result = dataPath(_lang)+"/result.html";
424 // run htsearch ----------------------------------------------------
425 KConfig *config = new KConfig("khelpcenterrc", true);
426 KConfigGroup group(config, "htdig");
427 QString exe = group.readPathEntry("htsearch", kapp->dirs()->findExe("htsearch"));
428 if (exe.isEmpty())
430 delete config;
431 return QString();
433 _proc = new K3Process();
434 *_proc << exe << "-c" << dataPath(_lang)+"/htdig.conf" <<
435 QString("words=%1;method=%2;matchesperpage=%3;format=%4;sort=%5").arg(words).arg(method).arg(matches).arg(format).arg(sort);
437 kDebug() << "Running htsearch";
439 connect(_proc, SIGNAL(receivedStdout(K3Process *,char*,int)),
440 this, SLOT(htsearchStdout(K3Process *,char*,int)));
441 connect(_proc, SIGNAL(processExited(K3Process *)),
442 this, SLOT(htsearchExited(K3Process *)));
444 _htsearchRunning = true;
445 _searchResult = "";
447 _proc->start(K3Process::NotifyOnExit, K3Process::Stdout);
449 kapp->enter_loop();
451 if (!_proc->normalExit() || _proc->exitStatus() != 0)
453 kDebug() << "Error running htsearch... returning now";
454 delete _proc;
455 delete config;
456 return QString();
459 delete _proc;
461 // modify the search result
462 _searchResult = _searchResult.replace("http://localhost/", "file:/");
463 _searchResult = _searchResult.replace("Content-type: text/html", QString());
465 // dump the search result
466 QFile f(result);
467 if (f.open(QIODevice::WriteOnly))
469 QTextStream ts(&f);
471 ts << _searchResult << endl;
473 f.close();
474 delete config;
475 return result;
477 delete config;
478 return QString();