Cleaned code a bit, and make it compilable in Debian using gcc 4.4.5
[x-diff.git] / XTree.cpp
blobf3cc06ebe2117472ca737e9fe9e534105317bb84
1 /**
2 * Copyright (c) 2001 - 2005
3 * Yuan Wang. All rights reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. Redistributions in any form must be accompanied by information on
14 * how to obtain complete source code for the X-Diff software and any
15 * accompanying software that uses the X-Diff software. The source code
16 * must either be included in the distribution or be available for no
17 * more than the cost of distribution plus a nominal fee, and must be
18 * freely redistributable under reasonable conditions. For an executable
19 * file, complete source code means the source code for all modules it
20 * contains. It does not include source code for modules or files that
21 * typically accompany the major components of the operating system on
22 * which the executable file runs.
24 * THIS SOFTWARE IS PROVIDED BY YUAN WANG "AS IS" AND ANY EXPRESS OR IMPLIED
25 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT,
27 * ARE DISCLAIMED. IN NO EVENT SHALL YUAN WANG BE LIABLE FOR ANY DIRECT,
28 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
29 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
32 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
33 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 * POSSIBILITY OF SUCH DAMAGE.
38 #include "XTree.hpp"
40 using __gnu_cxx::hash;
41 using __gnu_cxx::hash_map;
42 using std::vector;
43 using std::string;
44 using std::cin;
45 using std::cout;
46 using std::cerr;
47 using std::endl;
49 const int XTree::MATCH = 0;
50 const int XTree::NO_MATCH = -1;
51 const int XTree::INSERT = -1;
52 const int XTree::DELETE = -1;
53 const int XTree::CHANGE = 1;
54 const int XTree::NULL_NODE = -1;
55 const int XTree::NO_CONNECTION = 1048576;
57 const int XTree::_TOP_LEVEL_CAPACITY = 16384;
58 const int XTree::_BOT_LEVEL_CAPACITY = 4096;
59 const int XTree::_ROOT = 0;
62 XTree::XTree()
63 : _tagNames(256),
64 _cdataTable(256)
66 _topCap = _TOP_LEVEL_CAPACITY;
67 _botCap = _BOT_LEVEL_CAPACITY;
68 _initialize();
71 XTree::XTree(int topcap, int botcap)
72 : _tagNames(256),
73 _cdataTable(256)
75 _topCap = topcap;
76 _botCap = botcap;
77 _initialize();
80 XTree::~XTree()
82 int size = _elementIndex / _botCap;
83 if (size >= 0)
85 for (int i = 0; i <= size; i++)
87 delete[] _valueIndex[i];
88 delete[] _firstChild[i];
89 delete[] _nextSibling[i];
90 delete[] _childrenCount[i];
91 delete[] _matching[i];
92 delete[] _isAttribute[i];
93 delete[] _hashValue[i];
96 delete[] _firstChild;
97 delete[] _nextSibling;
98 delete[] _childrenCount;
99 delete[] _valueIndex;
100 delete[] _matching;
101 delete[] _isAttribute;
102 delete[] _hashValue;
104 size = _valueCount / _botCap;
105 for (int i = 0; i <= size; i++)
106 delete[] _value[i];
107 delete[] _value;
110 void XTree::_initialize()
112 _firstChild = new int*[_topCap];
113 _nextSibling = new int*[_topCap];
114 _childrenCount = new int*[_topCap];
115 _valueIndex = new int*[_topCap];
116 _matching = new int*[_topCap];
117 _isAttribute = new bool*[_topCap];
118 _hashValue = new unsigned long long*[_topCap];
119 _value = new string*[_topCap];
121 _value[0] = new string[_botCap];
122 _elementIndex = -1;
123 _tagIndex = -1;
124 _valueCount = _botCap - 1;
127 void XTree::_expand(int topid)
129 _firstChild[topid] = new int[_botCap];
130 _nextSibling[topid] = new int[_botCap];
131 _childrenCount[topid] = new int[_botCap];
132 _valueIndex[topid] = new int[_botCap];
133 _matching[topid] = new int[_botCap];
134 _isAttribute[topid] = new bool[_botCap];
135 _hashValue[topid] = new unsigned long long[_botCap];
137 for (int i = 0; i < _botCap; i++)
139 _firstChild[topid][i] = NULL_NODE;
140 _nextSibling[topid][i] = NULL_NODE;
141 _childrenCount[topid][i]= 0;
142 _matching[topid][i] = MATCH;
143 _valueIndex[topid][i] = -1;
144 _isAttribute[topid][i] = false;
148 int XTree::addElement(int pid, int lsid, string tagName)
150 _elementIndex++;
152 int topid = _elementIndex / _botCap;
153 int botid = _elementIndex % _botCap;
154 if (botid == 0)
155 _expand(topid);
157 // Check if we've got the element name
158 hash_map <string, int, HashString>::const_iterator
159 hit = _tagNames.find(tagName);
160 if (hit != _tagNames.end())
162 int id = hit->second;
163 _valueIndex[topid][botid] = id;
165 else
167 _tagIndex++;
168 _value[0][_tagIndex] = tagName;
169 _tagNames[tagName] = _tagIndex;
170 _valueIndex[topid][botid] = _tagIndex;
173 if (pid == NULL_NODE)
174 return _elementIndex;
176 int ptopid = pid / _botCap;
177 int pbotid = pid % _botCap;
178 // parent-child relation or sibling-sibling ralation
179 if (lsid == NULL_NODE)
180 _firstChild[ptopid][pbotid] = _elementIndex;
181 else
182 _nextSibling[lsid/_botCap][lsid%_botCap] = _elementIndex;
184 // update children count
185 _childrenCount[ptopid][pbotid]++;
187 return _elementIndex;
190 int XTree::addText(int eid, int lsid, string text, unsigned long long value)
192 _elementIndex++;
194 int topid = _elementIndex / _botCap;
195 int botid = _elementIndex % _botCap;
196 if (botid == 0)
197 _expand(topid);
199 int etopid = eid / _botCap;
200 int ebotid = eid % _botCap;
201 if (lsid == NULL_NODE)
202 _firstChild[etopid][ebotid] = _elementIndex;
203 else
204 _nextSibling[lsid/_botCap][lsid%_botCap] = _elementIndex;
206 _childrenCount[etopid][ebotid]++;
207 _hashValue[topid][botid] = value;
209 _valueCount++;
210 int vtopid = _valueCount / _botCap;
211 int vbotid = _valueCount % _botCap;
212 if (vbotid == 0)
213 _value[vtopid] = new string[_botCap];
215 _value[vtopid][vbotid] = text;
216 _valueIndex[topid][botid] = _valueCount;
218 return _elementIndex;
221 int XTree::addAttribute(int eid, int lsid, string name, string value,
222 unsigned long long valuehash,
223 unsigned long long attrhash)
225 // attribute name first.
226 int aid = addElement(eid, lsid, name);
228 // attribute value second.
229 addText(aid, NULL_NODE, value, valuehash);
231 // hash value third
232 int atopid = aid / _botCap;
233 int abotid = aid % _botCap;
234 _isAttribute[atopid][abotid] = true;
235 _hashValue[atopid][abotid] = attrhash;
237 return aid;
240 void XTree::addHashValue(int eid, unsigned long long value)
242 _hashValue[eid/_botCap][eid%_botCap] = value;
245 void XTree::addCDATA(int eid, size_t position)
247 hash_map<int, vector<size_t> >::const_iterator
248 hit = _cdataTable.find(eid);
249 if (hit != _cdataTable.end())
251 vector<size_t> poslist = hit->second;
252 poslist.push_back(position);
253 _cdataTable[eid] = poslist;
255 else
257 vector<size_t> poslist;
258 poslist.push_back(position);
259 _cdataTable[eid] = poslist;
263 void XTree::addMatching(int eid, int matchType, int matchNode)
265 if (matchType == NO_MATCH)
266 _matching[eid/_botCap][eid%_botCap] = NO_MATCH;
267 else if (matchType == MATCH)
268 _matching[eid/_botCap][eid%_botCap] = MATCH;
269 else
270 _matching[eid/_botCap][eid%_botCap] = matchNode + 1;
273 void XTree::getMatching(int eid, int &matchType, int &matchNode)
275 int mid = _matching[eid/_botCap][eid%_botCap];
276 if (mid == NO_MATCH)
277 matchType = NO_MATCH;
278 else if (mid == MATCH)
279 matchType = MATCH;
280 else
282 matchType = CHANGE;
283 matchNode = mid - 1;
287 int XTree::getRoot()
289 return _ROOT;
292 int XTree::getFirstChild(int eid)
294 int cid = _firstChild[eid/_botCap][eid%_botCap];
295 while (cid > _ROOT)
297 int ctopid = cid / _botCap;
298 int cbotid = cid % _botCap;
299 if (_isAttribute[ctopid][cbotid])
300 cid = _nextSibling[ctopid][cbotid];
301 else
302 return cid;
305 return NULL_NODE;
308 int XTree::getNextSibling(int eid)
310 return _nextSibling[eid/_botCap][eid%_botCap];
313 int XTree::getFirstAttribute(int eid)
315 int aid = _firstChild[eid/_botCap][eid%_botCap];
316 if ((aid > _ROOT) && (_isAttribute[aid/_botCap][aid%_botCap]))
317 return aid;
318 else
319 return NULL_NODE;
322 int XTree::getNextAttribute(int aid)
324 int aid1 = _nextSibling[aid/_botCap][aid%_botCap];
325 if ((aid1 > _ROOT) && (_isAttribute[aid1/_botCap][aid1%_botCap]))
326 return aid1;
327 else
328 return NULL_NODE;
331 string XTree::getAttributeValue(int aid)
333 int cid = _firstChild[aid/_botCap][aid%_botCap];
334 int index = _valueIndex[cid/_botCap][cid%_botCap];
335 if (index > _ROOT)
336 return _value[index/_botCap][index%_botCap];
337 else
338 return NULL;
341 unsigned long long XTree::getHashValue(int eid)
343 return _hashValue[eid/_botCap][eid%_botCap];
346 vector<size_t> XTree::getCDATA(int eid)
348 hash_map<int, vector<size_t> >::const_iterator
349 hit = _cdataTable.find(eid);
350 if (hit != _cdataTable.end())
351 return hit->second;
352 else
354 vector<size_t> a;
355 return a;
359 int XTree::getChildrenCount(int eid)
361 return _childrenCount[eid/_botCap][eid%_botCap];
364 int XTree::getDecendentsCount(int eid)
366 int topid = eid / _botCap;
367 int botid = eid % _botCap;
368 int count = _childrenCount[topid][botid];
369 if (count == 0)
370 return 0;
372 int cid = _firstChild[topid][botid];
373 while (cid > _ROOT)
375 count += getDecendentsCount(cid);
376 cid = _nextSibling[cid/_botCap][cid%_botCap];
379 return count;
382 int XTree::getValueIndex(int eid)
384 return _valueIndex[eid/_botCap][eid%_botCap];
387 string XTree::getValue(int index)
389 return _value[index/_botCap][index%_botCap];
392 string XTree::getTag(int eid)
394 int index = _valueIndex[eid/_botCap][eid%_botCap];
395 return _value[0][index];
398 string XTree::getText(int eid)
400 int index = _valueIndex[eid/_botCap][eid%_botCap];
401 return _value[index/_botCap][index%_botCap];
404 bool XTree::isElement(int eid)
406 int index = _valueIndex[eid/_botCap][eid%_botCap];
407 if (index < _botCap)
408 return true;
409 else
410 return false;
413 bool XTree::isLeaf(int eid)
415 int index = _valueIndex[eid/_botCap][eid%_botCap];
416 if (index < _botCap)
417 return false;
418 else
419 return true;
422 bool XTree::isAttribute(int eid)
424 return _isAttribute[eid/_botCap][eid%_botCap];
427 int XTree::getNodeCount()
429 return _elementIndex;
432 void XTree::dump()
434 cout << "eid\tfirstC\tnextS\tattr?\tcCount\thash\tmatch\tvalue\n";
435 for (int i = _ROOT; i <= _elementIndex; i++)
437 int topid = i / _botCap;
438 int botid = i % _botCap;
439 int vid = _valueIndex[topid][botid];
440 int vtopid = vid / _botCap;
441 int vbotid = vid % _botCap;
442 cout << i << "\t" << _firstChild[topid][botid] << "\t"
443 << _nextSibling[topid][botid] << "\t"
444 << _isAttribute[topid][botid] << "\t"
445 << _childrenCount[topid][botid] << "\t"
446 << _hashValue[topid][botid] << "\t"
447 << _matching[topid][botid] << "\t"
448 << _value[vtopid][vbotid] << endl;
452 void XTree::dumpHash()
454 cout << "hash table:" << _tagNames.size() << endl;
455 hash_map<string, int, HashString>::const_iterator
456 hit;// = _tagNames.begin();
457 for(hit=_tagNames.begin(); hit != _tagNames.end(); hit++)
459 cout << hit->first << "\t" << hit->second << endl;
460 //hit++;