2 * Copyright (c) 2001 - 2005
3 * Yuan Wang. All rights reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. Redistributions in any form must be accompanied by information on
14 * how to obtain complete source code for the X-Diff software and any
15 * accompanying software that uses the X-Diff software. The source code
16 * must either be included in the distribution or be available for no
17 * more than the cost of distribution plus a nominal fee, and must be
18 * freely redistributable under reasonable conditions. For an executable
19 * file, complete source code means the source code for all modules it
20 * contains. It does not include source code for modules or files that
21 * typically accompany the major components of the operating system on
22 * which the executable file runs.
24 * THIS SOFTWARE IS PROVIDED BY YUAN WANG "AS IS" AND ANY EXPRESS OR IMPLIED
25 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT,
27 * ARE DISCLAIMED. IN NO EVENT SHALL YUAN WANG BE LIABLE FOR ANY DIRECT,
28 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
29 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
32 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
33 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 * POSSIBILITY OF SUCH DAMAGE.
40 using __gnu_cxx::hash
;
41 using __gnu_cxx::hash_map
;
49 const int XTree::MATCH
= 0;
50 const int XTree::NO_MATCH
= -1;
51 const int XTree::INSERT
= -1;
52 const int XTree::DELETE
= -1;
53 const int XTree::CHANGE
= 1;
54 const int XTree::NULL_NODE
= -1;
55 const int XTree::NO_CONNECTION
= 1048576;
57 const int XTree::_TOP_LEVEL_CAPACITY
= 16384;
58 const int XTree::_BOT_LEVEL_CAPACITY
= 4096;
59 const int XTree::_ROOT
= 0;
66 _topCap
= _TOP_LEVEL_CAPACITY
;
67 _botCap
= _BOT_LEVEL_CAPACITY
;
71 XTree::XTree(int topcap
, int botcap
)
82 int size
= _elementIndex
/ _botCap
;
85 for (int i
= 0; i
<= size
; i
++)
87 delete[] _valueIndex
[i
];
88 delete[] _firstChild
[i
];
89 delete[] _nextSibling
[i
];
90 delete[] _childrenCount
[i
];
91 delete[] _matching
[i
];
92 delete[] _isAttribute
[i
];
93 delete[] _hashValue
[i
];
97 delete[] _nextSibling
;
98 delete[] _childrenCount
;
101 delete[] _isAttribute
;
104 size
= _valueCount
/ _botCap
;
105 for (int i
= 0; i
<= size
; i
++)
110 void XTree::_initialize()
112 _firstChild
= new int*[_topCap
];
113 _nextSibling
= new int*[_topCap
];
114 _childrenCount
= new int*[_topCap
];
115 _valueIndex
= new int*[_topCap
];
116 _matching
= new int*[_topCap
];
117 _isAttribute
= new bool*[_topCap
];
118 _hashValue
= new unsigned long long*[_topCap
];
119 _value
= new string
*[_topCap
];
121 _value
[0] = new string
[_botCap
];
124 _valueCount
= _botCap
- 1;
127 void XTree::_expand(int topid
)
129 _firstChild
[topid
] = new int[_botCap
];
130 _nextSibling
[topid
] = new int[_botCap
];
131 _childrenCount
[topid
] = new int[_botCap
];
132 _valueIndex
[topid
] = new int[_botCap
];
133 _matching
[topid
] = new int[_botCap
];
134 _isAttribute
[topid
] = new bool[_botCap
];
135 _hashValue
[topid
] = new unsigned long long[_botCap
];
137 for (int i
= 0; i
< _botCap
; i
++)
139 _firstChild
[topid
][i
] = NULL_NODE
;
140 _nextSibling
[topid
][i
] = NULL_NODE
;
141 _childrenCount
[topid
][i
]= 0;
142 _matching
[topid
][i
] = MATCH
;
143 _valueIndex
[topid
][i
] = -1;
144 _isAttribute
[topid
][i
] = false;
148 int XTree::addElement(int pid
, int lsid
, string tagName
)
152 int topid
= _elementIndex
/ _botCap
;
153 int botid
= _elementIndex
% _botCap
;
157 // Check if we've got the element name
158 hash_map
<string
, int, HashString
>::const_iterator
159 hit
= _tagNames
.find(tagName
);
160 if (hit
!= _tagNames
.end())
162 int id
= hit
->second
;
163 _valueIndex
[topid
][botid
] = id
;
168 _value
[0][_tagIndex
] = tagName
;
169 _tagNames
[tagName
] = _tagIndex
;
170 _valueIndex
[topid
][botid
] = _tagIndex
;
173 if (pid
== NULL_NODE
)
174 return _elementIndex
;
176 int ptopid
= pid
/ _botCap
;
177 int pbotid
= pid
% _botCap
;
178 // parent-child relation or sibling-sibling ralation
179 if (lsid
== NULL_NODE
)
180 _firstChild
[ptopid
][pbotid
] = _elementIndex
;
182 _nextSibling
[lsid
/_botCap
][lsid
%_botCap
] = _elementIndex
;
184 // update children count
185 _childrenCount
[ptopid
][pbotid
]++;
187 return _elementIndex
;
190 int XTree::addText(int eid
, int lsid
, string text
, unsigned long long value
)
194 int topid
= _elementIndex
/ _botCap
;
195 int botid
= _elementIndex
% _botCap
;
199 int etopid
= eid
/ _botCap
;
200 int ebotid
= eid
% _botCap
;
201 if (lsid
== NULL_NODE
)
202 _firstChild
[etopid
][ebotid
] = _elementIndex
;
204 _nextSibling
[lsid
/_botCap
][lsid
%_botCap
] = _elementIndex
;
206 _childrenCount
[etopid
][ebotid
]++;
207 _hashValue
[topid
][botid
] = value
;
210 int vtopid
= _valueCount
/ _botCap
;
211 int vbotid
= _valueCount
% _botCap
;
213 _value
[vtopid
] = new string
[_botCap
];
215 _value
[vtopid
][vbotid
] = text
;
216 _valueIndex
[topid
][botid
] = _valueCount
;
218 return _elementIndex
;
221 int XTree::addAttribute(int eid
, int lsid
, string name
, string value
,
222 unsigned long long valuehash
,
223 unsigned long long attrhash
)
225 // attribute name first.
226 int aid
= addElement(eid
, lsid
, name
);
228 // attribute value second.
229 addText(aid
, NULL_NODE
, value
, valuehash
);
232 int atopid
= aid
/ _botCap
;
233 int abotid
= aid
% _botCap
;
234 _isAttribute
[atopid
][abotid
] = true;
235 _hashValue
[atopid
][abotid
] = attrhash
;
240 void XTree::addHashValue(int eid
, unsigned long long value
)
242 _hashValue
[eid
/_botCap
][eid
%_botCap
] = value
;
245 void XTree::addCDATA(int eid
, size_t position
)
247 hash_map
<int, vector
<size_t> >::const_iterator
248 hit
= _cdataTable
.find(eid
);
249 if (hit
!= _cdataTable
.end())
251 vector
<size_t> poslist
= hit
->second
;
252 poslist
.push_back(position
);
253 _cdataTable
[eid
] = poslist
;
257 vector
<size_t> poslist
;
258 poslist
.push_back(position
);
259 _cdataTable
[eid
] = poslist
;
263 void XTree::addMatching(int eid
, int matchType
, int matchNode
)
265 if (matchType
== NO_MATCH
)
266 _matching
[eid
/_botCap
][eid
%_botCap
] = NO_MATCH
;
267 else if (matchType
== MATCH
)
268 _matching
[eid
/_botCap
][eid
%_botCap
] = MATCH
;
270 _matching
[eid
/_botCap
][eid
%_botCap
] = matchNode
+ 1;
273 void XTree::getMatching(int eid
, int &matchType
, int &matchNode
)
275 int mid
= _matching
[eid
/_botCap
][eid
%_botCap
];
277 matchType
= NO_MATCH
;
278 else if (mid
== MATCH
)
292 int XTree::getFirstChild(int eid
)
294 int cid
= _firstChild
[eid
/_botCap
][eid
%_botCap
];
297 int ctopid
= cid
/ _botCap
;
298 int cbotid
= cid
% _botCap
;
299 if (_isAttribute
[ctopid
][cbotid
])
300 cid
= _nextSibling
[ctopid
][cbotid
];
308 int XTree::getNextSibling(int eid
)
310 return _nextSibling
[eid
/_botCap
][eid
%_botCap
];
313 int XTree::getFirstAttribute(int eid
)
315 int aid
= _firstChild
[eid
/_botCap
][eid
%_botCap
];
316 if ((aid
> _ROOT
) && (_isAttribute
[aid
/_botCap
][aid
%_botCap
]))
322 int XTree::getNextAttribute(int aid
)
324 int aid1
= _nextSibling
[aid
/_botCap
][aid
%_botCap
];
325 if ((aid1
> _ROOT
) && (_isAttribute
[aid1
/_botCap
][aid1
%_botCap
]))
331 string
XTree::getAttributeValue(int aid
)
333 int cid
= _firstChild
[aid
/_botCap
][aid
%_botCap
];
334 int index
= _valueIndex
[cid
/_botCap
][cid
%_botCap
];
336 return _value
[index
/_botCap
][index
%_botCap
];
341 unsigned long long XTree::getHashValue(int eid
)
343 return _hashValue
[eid
/_botCap
][eid
%_botCap
];
346 vector
<size_t> XTree::getCDATA(int eid
)
348 hash_map
<int, vector
<size_t> >::const_iterator
349 hit
= _cdataTable
.find(eid
);
350 if (hit
!= _cdataTable
.end())
359 int XTree::getChildrenCount(int eid
)
361 return _childrenCount
[eid
/_botCap
][eid
%_botCap
];
364 int XTree::getDecendentsCount(int eid
)
366 int topid
= eid
/ _botCap
;
367 int botid
= eid
% _botCap
;
368 int count
= _childrenCount
[topid
][botid
];
372 int cid
= _firstChild
[topid
][botid
];
375 count
+= getDecendentsCount(cid
);
376 cid
= _nextSibling
[cid
/_botCap
][cid
%_botCap
];
382 int XTree::getValueIndex(int eid
)
384 return _valueIndex
[eid
/_botCap
][eid
%_botCap
];
387 string
XTree::getValue(int index
)
389 return _value
[index
/_botCap
][index
%_botCap
];
392 string
XTree::getTag(int eid
)
394 int index
= _valueIndex
[eid
/_botCap
][eid
%_botCap
];
395 return _value
[0][index
];
398 string
XTree::getText(int eid
)
400 int index
= _valueIndex
[eid
/_botCap
][eid
%_botCap
];
401 return _value
[index
/_botCap
][index
%_botCap
];
404 bool XTree::isElement(int eid
)
406 int index
= _valueIndex
[eid
/_botCap
][eid
%_botCap
];
413 bool XTree::isLeaf(int eid
)
415 int index
= _valueIndex
[eid
/_botCap
][eid
%_botCap
];
422 bool XTree::isAttribute(int eid
)
424 return _isAttribute
[eid
/_botCap
][eid
%_botCap
];
427 int XTree::getNodeCount()
429 return _elementIndex
;
434 cout
<< "eid\tfirstC\tnextS\tattr?\tcCount\thash\tmatch\tvalue\n";
435 for (int i
= _ROOT
; i
<= _elementIndex
; i
++)
437 int topid
= i
/ _botCap
;
438 int botid
= i
% _botCap
;
439 int vid
= _valueIndex
[topid
][botid
];
440 int vtopid
= vid
/ _botCap
;
441 int vbotid
= vid
% _botCap
;
442 cout
<< i
<< "\t" << _firstChild
[topid
][botid
] << "\t"
443 << _nextSibling
[topid
][botid
] << "\t"
444 << _isAttribute
[topid
][botid
] << "\t"
445 << _childrenCount
[topid
][botid
] << "\t"
446 << _hashValue
[topid
][botid
] << "\t"
447 << _matching
[topid
][botid
] << "\t"
448 << _value
[vtopid
][vbotid
] << endl
;
452 void XTree::dumpHash()
454 cout
<< "hash table:" << _tagNames
.size() << endl
;
455 hash_map
<string
, int, HashString
>::const_iterator
456 hit
;// = _tagNames.begin();
457 for(hit
=_tagNames
.begin(); hit
!= _tagNames
.end(); hit
++)
459 cout
<< hit
->first
<< "\t" << hit
->second
<< endl
;