Renamed kpengine to jben_kpengine and made its data dir relocatable.
[jben.git] / kanjilist.cpp
blobd18b1a3c795264ff294e033625398fc20407e86a
1 /*
2 Project: J-Ben
3 Author: Paul Goins
4 Website: http://www.vultaire.net/software/jben/
5 License: GNU General Public License (GPL) version 2
6 (http://www.gnu.org/licenses/old-licenses/gpl-2.0.txt)
8 File: kanjilist.cpp
10 This program is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2 of the License, or
13 (at your option) any later version.
15 This program is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
20 You should have received a copy of the GNU General Public License
21 along with this program. If not, see <http://www.gnu.org/licenses/>
24 #include "kanjilist.h"
25 #include "kdict.h"
26 #include <algorithm>
27 using namespace std;
29 KanjiList::KanjiList(const BoostHM<wchar_t,string>* const kDictHash) {
30 kanjiHash = kDictHash;
33 int KanjiList::AddFromString(const wstring& s) {
34 int kanjiAdded = 0, len = s.length();
35 wchar_t c;
36 BoostHM<wchar_t,string>::const_iterator it;
38 for(int i=0;i<len;i++) {
39 c = s[i];
40 it = kanjiHash->find(c);
41 if(it!=kanjiHash->end()) {
42 if(find(kanjiList.begin(), kanjiList.end(), c)==kanjiList.end()) {
43 kanjiList.push_back(c);
44 kanjiAdded++;
49 return kanjiAdded;
52 /* Convert the kanjilist into a wide char string,
53 with lineWidth kanji per line (0 == no line breaks). */
54 wstring KanjiList::ToString(int lineWidth) {
55 wstring result;
56 int lineWidthCounter=0;
57 int len = kanjiList.size();
58 for(int i=0;i<len;i++) {
59 result.append(1, kanjiList[i]);
60 if(lineWidth>0) {
61 lineWidthCounter++;
62 if(lineWidthCounter>=lineWidth) {
63 result.append(1, L'\n');
64 lineWidthCounter=0;
68 return result;
71 void KanjiList::Clear() {
72 kanjiList.clear();
75 int KanjiList::AddByGrade(int lowGrade, int highGrade) {
76 wstring kanjiStr;
77 int grade;
78 const KDict* kd = KDict::Get();
80 for(BoostHM<wchar_t,string>::const_iterator ki=kanjiHash->begin(); ki!=kanjiHash->end(); ki++) {
81 grade = kd->GetIntField(ki->first, L"G");
82 if(grade>=lowGrade &&
83 (grade<=highGrade || highGrade==0))
84 kanjiStr.append(1, ki->first);
87 return AddFromString(kanjiStr);
90 int KanjiList::AddByFrequency(int lowFreq, int highFreq) {
91 wstring kanjiStr;
92 int freq;
93 const KDict* kd = KDict::Get();
95 for(BoostHM<wchar_t,string>::const_iterator ki=kanjiHash->begin(); ki!=kanjiHash->end(); ki++) {
96 freq = kd->GetIntField(ki->first, L"F");
97 if(freq>=lowFreq && freq<=highFreq)
98 kanjiStr.append(1, ki->first);
101 return AddFromString(kanjiStr);
104 int KanjiList::Size() {return kanjiList.size();}
106 void KanjiList::InplaceMerge(vector<wchar_t>& v, BoostHM<wchar_t,int>& indexer, int start, int middle, int end) {
107 /* Merge is implemented as a bubble sort started at halfway
108 (since we know the first whole half is already sorted) */
109 int i, highIndex;
110 wchar_t temp;
111 i = highIndex = middle;
112 while(i<end) {
113 if(i>0 && (indexer[v[i]] < indexer[v[i-1]])) {
114 temp = v[i-1];
115 v[i-1] = v[i];
116 v[i] = temp;
117 i--;
118 } else {
119 highIndex++;
120 i=highIndex;
126 SortKanjiList sorts the currently loaded kanji list based upon a specified
127 KANJIDIC field, like F (frequency) or G (jouyou grade). Sorting is done
128 via a merged sort. This might be overkill, but I wanted to try doing
129 it, so I did.
131 void KanjiList::Sort(int sortType, bool reverseOrder) {
132 int totalSize = kanjiList.size();
133 if(totalSize<=1) return; /* Size 0 or 1 list is already sorted */
135 myCharIndexer = new BoostHM<wchar_t,int>;
136 myCharIndexer->clear();
137 vector<wchar_t>::iterator vi;
139 wstring fieldMarker;
140 switch(sortType) {
141 case ST_GRADE:
142 fieldMarker=L"G";
143 break;
144 case ST_FREQUENCY:
145 fieldMarker=L"F";
146 break;
147 default:
148 fieldMarker=L"INVALID";
151 /* Create index based on the sort type */
152 int value;
153 const KDict* kd = KDict::Get();
154 for(vi=kanjiList.begin();vi!=kanjiList.end();vi++) {
155 value = kd->GetIntField(*vi, fieldMarker);
156 if(value==-1) value=INT_MAX;
157 myCharIndexer->assign(*vi, value);
160 /* Sort our data based upon the stored key in the hash table */
161 /* This code, a merge sort, was created based upon code at:
162 http://en.wikipedia.org/wiki/Merge_sort#C.2B.2B_implementation
163 These pages were referred to:
164 http://www.cppreference.com/cppalgorithm/merge.html
165 http://www.cppreference.com/cppalgorithm/inplace_merge.html */
166 int rangeSize, rangeStart;
167 for(rangeSize=1; rangeSize<totalSize; rangeSize *= 2) {
168 for(rangeStart=0; rangeStart<totalSize-rangeSize; rangeStart += rangeSize*2) {
169 /* Our range sort function is HERE */
170 InplaceMerge(
171 kanjiList,
172 *myCharIndexer,
173 rangeStart,
174 rangeStart + rangeSize,
175 min(rangeStart + rangeSize*2, totalSize));
179 delete myCharIndexer;
182 wchar_t KanjiList::operator[](unsigned int index) {
183 if(index<kanjiList.size()) return kanjiList[index];
184 return L'\0';
187 int KanjiList::GetIndexByChar(wchar_t c) {
188 int i, len = kanjiList.size();
189 for(i=0;i<len;i++)
190 if(kanjiList[i]==c) return i;
191 return -1;
194 vector<wchar_t>& KanjiList::GetVector() {return kanjiList;}