Add README
[xapian-trec.git] / split.cc
blob1aebd2b9223c0696beeb2157f3a15e5278fb9d32
1 /* split.cc: split a string routine
3 * ----START-LICENCE----
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation; either version 2 of the
8 * License, or (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
18 * USA
19 * -----END-LICENCE-----
22 #include <fstream>
23 #include <xapian.h>
24 #include <algorithm>
25 #include <iostream>
26 #include <string>
27 #include "stopword.h"
28 #include "split.h"
30 using namespace Xapian;
31 using namespace std;
33 void clear_apos( string & data ) {
34 // clear " from data
36 string temp;
38 temp.clear();
39 for( int i=0; i < data.size(); i++ )
40 if( data[i] != '"' && data[i] != '\n')
41 temp += data[i];
43 data = temp;
45 } // END clear_apos
47 static inline void
48 lowercase(string & term )
50 string::iterator i = term.begin();
51 while (i != term.end()) {
52 *i = tolower(*i);
53 i++;
57 void split( const string &s, char c, vector<string> &res)
59 res.clear();
60 if(s==""){return;}
61 uint i;
62 uint pos=0;
63 for( i=0; i<=s.size(); i++)
65 if( i == s.size() || s[i] == c )
67 uint j;
68 string tmps;
69 for( j=pos; j<i; j++)
71 tmps += s[j];
73 clear_apos( tmps );
74 lowercase( tmps );
75 res.push_back(tmps);
76 pos=i+1;
77 } // END if
78 } // END for
80 } // END split