Added adding/sorting kanji by JLPT grade. A few other minor changes.
[jben.git] / src / kanjilist.cpp
blob41d31212ff665a9418df753b96aa5685eb6c52b2
1 /*
2 Project: J-Ben
3 Author: Paul Goins
4 Website: http://www.vultaire.net/software/jben/
5 License: GNU General Public License (GPL) version 2
6 (http://www.gnu.org/licenses/old-licenses/gpl-2.0.txt)
8 File: kanjilist.cpp
10 This program is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2 of the License, or
13 (at your option) any later version.
15 This program is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
20 You should have received a copy of the GNU General Public License
21 along with this program. If not, see <http://www.gnu.org/licenses/>
24 #include "kanjilist.h"
25 #include "kdict.h"
26 #include "errorlog.h"
27 #include <sstream>
28 #include <algorithm>
30 using namespace std;
32 KanjiList::KanjiList() {}
34 int KanjiList::AddFromString(const wstring& s) {
35 int kanjiAdded = 0, len = s.length();
36 wchar_t c;
37 unordered_map<wchar_t,KInfo>::const_iterator it;
39 const unordered_map<wchar_t, KInfo>* ht = KDict::Get()->GetHashTable();
41 for(int i=0;i<len;i++) {
42 c = s[i];
43 it = ht->find(c);
44 if(it!=ht->end()) {
45 if(find(kanjiList.begin(), kanjiList.end(), c)==kanjiList.end()) {
46 kanjiList.push_back(c);
47 kanjiAdded++;
52 return kanjiAdded;
55 /* Convert the kanjilist into a wide char string,
56 with lineWidth kanji per line (0 == no line breaks). */
57 wstring KanjiList::ToString(int lineWidth) {
58 wstring result;
59 int lineWidthCounter=0;
60 size_t len = kanjiList.size();
61 if(len==0) return result;
62 for(size_t i=0;i<len;i++) {
63 result.append(1, kanjiList[i]);
64 if(lineWidth>0) {
65 lineWidthCounter++;
66 if(lineWidthCounter>=lineWidth) {
67 result.append(1, L'\n');
68 lineWidthCounter=0;
72 return result;
75 void KanjiList::Clear() {
76 kanjiList.clear();
79 int KanjiList::AddByGrade(int lowGrade, int highGrade) {
80 wstring kanjiStr;
81 int grade;
83 const unordered_map<wchar_t, KInfo>* ht = KDict::Get()->GetHashTable();
85 /* For our comparison, let's scoot ungraded kanji above all other
86 kanji. */
87 if(lowGrade==0) lowGrade = 127;
88 if(highGrade==0) highGrade = 127;
90 for(unordered_map<wchar_t,KInfo>::const_iterator
91 ki=ht->begin(); ki!=ht->end(); ki++) {
92 grade = ki->second.grade;
93 if(grade == 0) grade = 127;
95 if(grade>=lowGrade && grade<=highGrade)
96 kanjiStr.append(1, ki->first);
99 return AddFromString(kanjiStr);
102 int KanjiList::AddByJLPT(int lowLevel, int highLevel) {
103 wstring kanjiStr;
104 int level;
106 const unordered_map<wchar_t, KInfo>* ht = KDict::Get()->GetHashTable();
108 for(unordered_map<wchar_t,KInfo>::const_iterator
109 ki=ht->begin(); ki!=ht->end(); ki++) {
110 level = ki->second.jlpt;
111 if(level<=lowLevel && level>=highLevel)
112 kanjiStr.append(1, ki->first);
115 return AddFromString(kanjiStr);
118 int KanjiList::AddByFrequency(int lowFreq, int highFreq) {
119 wstring kanjiStr;
120 int freq;
122 const unordered_map<wchar_t, KInfo>* ht = KDict::Get()->GetHashTable();
124 for(unordered_map<wchar_t,KInfo>::const_iterator ki=ht->begin(); ki!=ht->end(); ki++) {
125 freq = ki->second.freq;
126 if(freq>=lowFreq && freq<=highFreq)
127 kanjiStr.append(1, ki->first);
130 return AddFromString(kanjiStr);
133 int KanjiList::Size() {return kanjiList.size();}
135 void KanjiList::InplaceMerge(vector<wchar_t>& v, unordered_map<wchar_t,int>& indexer, int start, int middle, int end) {
136 /* Merge is implemented as a bubble sort started at halfway
137 (since we know the first whole half is already sorted) */
138 int i, highIndex;
139 wchar_t temp;
140 i = highIndex = middle;
141 while(i<end) {
142 if(i>0 && (indexer[v[i]] < indexer[v[i-1]])) {
143 temp = v[i-1];
144 v[i-1] = v[i];
145 v[i] = temp;
146 i--;
147 } else {
148 highIndex++;
149 i=highIndex;
155 SortKanjiList sorts the currently loaded kanji list based upon a specified
156 KANJIDIC field, like F (frequency) or G (jouyou grade). Sorting is done
157 via a merged sort. This might be overkill, but I wanted to try doing
158 it, so I did.
160 void KanjiList::Sort(int sortType, bool reverseOrder) {
161 int totalSize = kanjiList.size();
162 if(totalSize<=1) return; /* Size 0 or 1 list is already sorted */
164 myCharIndexer = new unordered_map<wchar_t,int>;
165 myCharIndexer->clear();
166 vector<wchar_t>::iterator vi;
168 /* Create index based on the sort type */
169 int value;
170 const KDict* kd = KDict::Get();
171 const KInfo* ki;
172 for(vi=kanjiList.begin();vi!=kanjiList.end();vi++) {
173 ki = kd->GetEntry(*vi);
174 if(sortType==ST_GRADE)
175 value = ki->grade;
176 else if(sortType==ST_FREQUENCY)
177 value = ki->freq;
178 else if(sortType==ST_JLPT)
179 value = 5 - ki->jlpt; /* Should make value = 1-4 */
180 else {
181 ostringstream oss;
182 oss << "Unknown sort type: " << sortType << endl;
183 el.Push(EL_Error, oss.str());
184 break;
186 if(value==0) value=0x7FFFFFFF;
187 (*myCharIndexer)[*vi] = value;
190 /* Sort our data based upon the stored key in the hash table */
191 /* This merged sort code was created based upon code at:
192 http://en.wikipedia.org/wiki/Merge_sort#C.2B.2B_implementation
193 These pages were also referred to:
194 http://www.cppreference.com/cppalgorithm/merge.html
195 http://www.cppreference.com/cppalgorithm/inplace_merge.html */
196 int rangeSize, rangeStart;
197 for(rangeSize=1; rangeSize<totalSize; rangeSize *= 2) {
198 for(rangeStart=0;
199 rangeStart<totalSize-rangeSize;
200 rangeStart += rangeSize*2) {
201 /* Our range sort function is HERE */
202 InplaceMerge(
203 kanjiList,
204 *myCharIndexer,
205 rangeStart,
206 rangeStart + rangeSize,
207 min(rangeStart + rangeSize*2, totalSize));
211 delete myCharIndexer;
214 wchar_t KanjiList::operator[](unsigned int index) {
215 if(index<kanjiList.size()) return kanjiList[index];
216 return L'\0';
219 int KanjiList::GetIndexByChar(wchar_t c) {
220 int i, len = kanjiList.size();
221 for(i=0;i<len;i++)
222 if(kanjiList[i]==c) return i;
223 return -1;
226 vector<wchar_t>& KanjiList::GetVector() {return kanjiList;}