1 /***************************************************************************
2 Copyright 2006 David Nolden <david.nolden.kdevelop@art-master.de>
3 ***************************************************************************/
5 /***************************************************************************
7 * This program is free software; you can redistribute it and/or modify *
8 * it under the terms of the GNU General Public License as published by *
9 * the Free Software Foundation; either version 2 of the License, or *
10 * (at your option) any later version. *
12 ***************************************************************************/
22 ///For higher orders to work, the sum-type has to be signed
23 #define DEFAULTSUMTYPE int
24 #define DEFAULTORDER 2
27 ///State-based-build can only nearly complete with extreme compiler-parameters set, although it should theoretically be faster, if the compile would optimize it the right way.
29 //#define STATEBASEDBUILD
31 namespace FuzzySearch
{
34 * How should all the differences be weighed together to a compound difference?
36 * The weighting still needs some work.
40 template < class Compare1
, class Compare2
, class Then
, class Else
>
46 template < class Compare
, class Then
, class Else
>
47 class IfEqual
<Compare
, Compare
, Then
, Else
> {
53 class PrimitiveVector
{
57 PrimitiveVector() : data_( 0 ), size_( 0 ) {}
65 ///All data is lost when this is called
66 void rawResize( unsigned int i
) {
75 data_
= new Item
[ i
];
79 Item
& operator [] ( uint index
) {
80 return data_
[ index
];
83 const Item
& operator [] ( uint index
) const {
84 return data_
[ index
];
92 ///unsigned int is ok for document-sizes of up to about 16 Megabytes, 64-bit integers create problems here
94 template <int maxDepth
, class SumType
= DEFAULTSUMTYPE
>
96 SumType sums
[ maxDepth
];
98 SumGroup( SumType
* _sums
= 0 ) {
100 for ( int a
= 0; a
< maxDepth
; a
++ ) {
101 sums
[ a
] = _sums
[ a
];
104 for ( int a
= 0; a
< maxDepth
; a
++ )
110 SumType sumDifference( const SumGroup& rhs ) const {
112 for( int a = 0; a < maxDepth; a++ ) {
114 if( sums[a] > rhs.sums[a] )
115 dif = sums[a] - rhs.sums[a];
117 dif = rhs.sums[a] - sums[a];
119 ret += ( dif * (maxDepth - a ) ) >> a;
124 struct VecSumGetter
{
126 VecSumGetter() : sumVec( 0 ) {}
127 VecSumGetter( SumType
** vec
) : sumVec( vec
) {}
129 inline bool sumValid( const uint num
) const {
130 return ( bool ) sumVec
[ num
];
134 inline SumType
sum( const uint num
) const {
135 return * ( sumVec
[ num
] );
140 inline bool sumValid( const uint num
) const {
143 inline SumType
sum( const uint num
) const {
148 struct GroupSumGetter
{
150 GroupSumGetter( SumGroup
& grp
) : group( grp
) {}
152 inline bool sumValid( const uint num
) const {
157 inline SumType
sum( const uint num
) const {
158 return group
.sums
[ num
];
162 template <class SumGetter
>
163 SumType
sumDifference( const SumGetter
& getter
) const {
165 for ( int a
= 0; a
< maxDepth
; a
++ ) {
167 if ( getter
.sumValid( a
) ) {
168 if ( sums
[ a
] > getter
.sum( a
) )
169 dif
= sums
[ a
] - getter
.sum( a
);
171 dif
= getter
.sum( a
) - sums
[ a
];
176 ///The weighting is a bit primitive at the moment
177 ret
+= ( dif
* ( ( maxDepth
- a
) + 1 ) ) /* >> a*/;
182 template <class Archive
>
183 void serialize( Archive
& arch
, const uint
/*version*/ ) {
188 template < int Condition
, class Then
, class Else
>
193 template < class Then
, class Else
>
194 struct IfThen
<0, Then
, Else
> {
199 template <int maxDepth
, typename SumType
, int order
, bool dummy
>
200 class DifferenceSumReference
{
202 template <class Archive
>
203 void serialize( Archive
& /*arch*/, const uint
/*version*/ ) {}
207 template <int maxDepth
, class SumType
= DEFAULTSUMTYPE
, int order
= DEFAULTORDER
>
211 SumGroup
<maxDepth
, SumType
> leftMarker
;
212 SumGroup
<maxDepth
, SumType
> rightMarker
;
214 DifferenceSumReference
< maxDepth
, SumType
, order
- 1, ( order
- 1 ) == 0 > nextOrder_
;
216 SumReference() : valid_( false ) {}
218 template <class Archive
>
219 void serialize( Archive
& arch
, const uint
/*version*/ ) {
220 arch
& valid_
& leftMarker
& rightMarker
;
224 void setValid( bool valid
) {
228 bool isValid() const {
234 template <int maxDepth
, typename SumType
, int order
>
235 struct DifferenceSumReference
<maxDepth
, SumType
, order
, false> {
236 SumReference
<maxDepth
, SumType
, order
> reference_
;
238 template <class Archive
>
239 void serialize( Archive
& arch
, const uint
/*version*/ ) {
244 /** The problem with a single sum-search: 123456| is detected as exactly same as 124356|.
245 * For that reason a parallel difference-sum-search can be used, which would look like 111111| in the first, and 112(-1)21| in the second case.
246 * SumType must be signed for that to work.
250 template <int maxDepth
, typename SumType
, int order
, bool dummy
>
251 class DifferenceSumSearch
{
253 DifferenceSumSearch( const string
& /*text*/ ) {}
254 template <class Type
>
255 void fillReference( Type
& /*t*/, int /*position*/ ) {}
257 template <class PrevType
, class Evaluator
>
258 struct FindRefProgress
{
259 FindRefProgress( PrevType
& /*t*/, const SumReference
< maxDepth
, SumType
, order
+ 1 > & /*_ref*/, Evaluator
& /*_eval*/ ) {}
264 SumType
rightDiff() {
274 template <class SumType
, int Depth
, int EndDepth
>
275 struct SumBuildState
: public SumBuildState
< SumType
, Depth
+ 1, EndDepth
> {
276 typedef SumBuildState
< SumType
, Depth
, EndDepth
> Self
;
277 typedef SumBuildState
< SumType
, Depth
+ 1, EndDepth
> Next
;
278 typedef SumBuildState
< SumType
, 0, EndDepth
> Head
;
287 //uint maxCount = 1<<b;
289 SumBuildState( PrimitiveVector
<SumType
>* sumVecs_
, const char* tx
) : Next( sumVecs_
, tx
), tempSumC( 0 ), tempSum( 0 ), text( tx
), ctext( tx
), offset( 0 ) {
290 v
= &( sumVecs_
[ Depth
][ 0 ] );
293 inline void go1( register const char* ctex
, register const uint offset
) {
294 Next::go1( ctex
, offset
);
296 if ( tempSumC
== ( uint
) ( 1 << Depth
) ) {
297 ///remove the first summand
298 tempSum
-= *( ctex
- ( 1 << Depth
) );
304 ( * ( v
+ offset
) ) = tempSum
;
308 /* inline void go2( register uint offset ) {
314 go1( ctext
, offset
);
323 template <class SumType
, int Depth
>
324 struct SumBuildState
<SumType
, Depth
, Depth
> {
325 SumBuildState( PrimitiveVector
<SumType
>*, const char* ) {}
328 inline void go1( register const char* ctex
, register const uint offset
) {}
329 // inline void go2() {
335 template <int maxDepth
, typename SumType
= DEFAULTSUMTYPE
, int order
= DEFAULTORDER
>
337 typedef SumSearch
<maxDepth
, SumType
, order
> SelfSumSearch
;
338 typedef PrimitiveVector
< SumType
> SumVector
;
340 SumVector sumVecs_
[ maxDepth
];
341 typedef DifferenceSumSearch
< maxDepth
, SumType
, order
- 1, ( order
- 1 ) == 0 > NextOrderType
;
343 #ifdef STATEBASEDBUILD
345 void buildSumGroups() {
346 for ( int a
= 0; a
< maxDepth
; a
++ ) {
347 sumVecs_
[ a
].rawResize( text_
.size() );
350 SumBuildState
< SumType
, 0, maxDepth
> state( sumVecs_
, text_
.c_str() );
352 uint textSize
= text_
.size();
354 for ( uint a
= 0; a
< textSize
; a
++ ) {
359 void buildSumGroups() {
360 for ( int a
= 0; a
< maxDepth
; a
++ ) {
361 sumVecs_
[ a
].rawResize( text_
.size() );
364 const char* text
= text_
.c_str();
365 uint textSize
= text_
.size();
367 for ( uint b
= 0; b
< maxDepth
; b
++ ) {
369 SumType
* v
= &( sumVecs_
[ b
][ 0 ] );
372 uint maxCount
= 1 << b
;
374 const char* ctext
= text
;
376 for ( uint a
= 0; a
< textSize
; a
++ ) {
377 if ( tempSumC
== maxCount
) {
378 ///remove the first summand
379 tempSum
-= *( ctext
- maxCount
);
384 tempSum
+= ( *ctext
);
395 NextOrderType nextOrder_
;
397 SumSearch( const string
& text
) : nextOrder_( text
) {
403 SumReference
<maxDepth
, SumType
, order
> getReference( int position
) {
404 SumReference
<maxDepth
, SumType
, order
> ret
;
405 fillReference( ret
, position
);
407 nextOrder_
.fillReference( ret
, position
);
412 ///Should not be used from outside
413 void fillReference( SumReference
<maxDepth
, SumType
, order
>& ret
, int position
) {
414 int leftPos
= position
- 1;
415 SumType leftSums
[ maxDepth
];
416 SumType rightSums
[ maxDepth
];
418 if ( leftPos
>= 0 ) {
419 for ( int a
= 0; a
< maxDepth
; a
++ )
420 leftSums
[ a
] = sumVecs_
[ a
] [ leftPos
];
422 for ( int a
= 0; a
< maxDepth
; a
++ )
426 for ( int a
= 0; a
< maxDepth
; a
++ ) {
427 uint pos
= leftPos
+ ( 1 << a
);
428 if ( pos
< sumVecs_
[ 0 ].size() )
429 rightSums
[ a
] = sumVecs_
[ a
][ pos
];
434 ret
.leftMarker
= SumGroup
<maxDepth
, SumType
>( leftSums
);
435 ret
.rightMarker
= SumGroup
<maxDepth
, SumType
>( rightSums
);
436 ret
.setValid( true );
440 struct SimpleEvaluator
{
444 SimpleEvaluator() : minDif( 100000 ), minDifPos( -1 ) {}
445 /**This should return a new compound difference-value. In the end, the position with the lowest difference will be returned,
446 *so this may be used to implement some weighting, maybe according to the position */
447 inline void operator () ( const SumType leftDifference
, const SumType rightDifference
, const uint position
) {
448 SumType dif
= ( leftDifference
+ 1 ) * ( rightDifference
+ 1 );
450 if ( dif
< minDif
) {
451 //cout << "best dif: " << dif << endl;
453 minDifPos
= position
;
462 SumVector
* sumVecs() {
466 template <class Evaluator
>
467 struct FindReferenceProgress
{
468 SumReference
<maxDepth
, SumType
, order
>& ref
;
471 typename
NextOrderType::template FindRefProgress
<SelfSumSearch
, Evaluator
>
474 SumType
* leftOffset
[ maxDepth
];
475 SumType
* rightOffset
[ maxDepth
];
478 SumType
* finalRight
[ maxDepth
];
482 typename SumGroup
<maxDepth
, SumType
>::VecSumGetter leftGetter
;
483 typename SumGroup
<maxDepth
, SumType
>::VecSumGetter rightGetter
;
484 typedef typename SumGroup
<maxDepth
, SumType
>::ZeroGetter ZeroGetter
;
486 FindReferenceProgress( SelfSumSearch
& search
, const SumReference
<maxDepth
, SumType
, order
>& _ref
, Evaluator
& _eval
) : ref( const_cast<SumReference
<maxDepth
, SumType
, order
>&>( _ref
) ), eval( _eval
), nextOrder_( search
, ref
, _eval
), sumVecs_( search
.sumVecs() ) {
487 for ( uint a
= 0; a
< maxDepth
; a
++ ) {
488 leftOffset
[ a
] = &( sumVecs_
[ a
][ 0 - 1 ] );
490 if ( ( uint
) ( 1 << a
) < ( uint
) sumVecs_
[ 0 ].size() ) {
491 rightOffset
[ a
] = &( sumVecs_
[ a
][ ( 1 << a
) - 1 ] );
493 rightOffset
[ a
] = 0;
497 firstItem
= &sumVecs_
[ 0 ][ 0 ];
499 for ( int a
= 0; a
< maxDepth
; a
++ )
500 finalRight
[ a
] = &( sumVecs_
[ a
][ sumVecs_
[ a
].size() - 1 ] );
502 leftGetter
= typename SumGroup
<maxDepth
, SumType
>::VecSumGetter( leftOffset
);
503 rightGetter
= typename SumGroup
<maxDepth
, SumType
>::VecSumGetter( rightOffset
);
507 return ( bool ) rightOffset
[ 0 ];
511 for ( int a
= 0; a
< maxDepth
; a
++ ) {
513 if ( !rightOffset
[ a
] )
516 if ( rightOffset
[ a
] > finalRight
[ a
] )
517 rightOffset
[ a
] = 0;
523 if ( leftOffset
[ 0 ] < firstItem
) {
524 return ref
.leftMarker
.sumDifference( ZeroGetter() ) + nextOrder_
.leftDiff();
526 return ref
.leftMarker
.sumDifference( leftGetter
) + nextOrder_
.leftDiff();
529 SumType
rightDiff() {
530 return ref
.rightMarker
.sumDifference( rightGetter
) + nextOrder_
.rightDiff();
534 return ( ((unsigned long)rightOffset
[ 0 ] ) - (unsigned long)firstItem
) / sizeof( uint
* );
538 template <class Evaluator
>
539 int findReference( const SumReference
<maxDepth
, SumType
, order
>& ref
, Evaluator
& eval
) {
540 if ( !ref
.isValid() )
542 if ( sumVecs_
[ 0 ].size() < 2 )
545 FindReferenceProgress
<Evaluator
> prog( *this, ref
, eval
);
548 SumType leftDif
= 0; //prog.leftDiff();
549 SumType rightDif
= prog
.rightDiff();
550 uint difPos
= prog
.difPos();
552 eval( leftDif
, rightDif
, difPos
);
556 return eval
.result();
559 ///This uses the default-evaluator
560 inline int findReference( const SumReference
<maxDepth
, SumType
, order
>& ref
) {
561 SimpleEvaluator eval
;
562 return findReference( ref
, eval
);
567 template <int maxDepth
, typename SumType
, int order
>
568 class DifferenceSumSearch
<maxDepth
, SumType
, order
, false> {
569 typedef SumSearch
<maxDepth
, SumType
, order
> MySearch
;
570 typedef SumSearch
< maxDepth
, SumType
, order
+ 1 > MyPrevSearch
;
573 static string
makeDifference( const string
& str
) {
575 uint len
= ret
.length();
580 for ( uint n
= 0; n
< len
; n
++ ) {
589 DifferenceSumSearch( const string
& text
) : search_( makeDifference( text
) ) {}
591 template <class Type
>
592 void fillReference( Type
& t
, int position
) {
593 search_
.fillReference( t
.nextOrder_
.reference_
, position
);
596 template <class PrevSearch
, class Evaluator
>
597 struct FindRefProgress
{
598 typename
MySearch:: template FindReferenceProgress
<Evaluator
>
601 FindRefProgress( PrevSearch
& search
, const SumReference
< maxDepth
, SumType
, order
+ 1 > & _ref
, Evaluator
& _eval
) : find( search
.nextOrder_
.search_
, _ref
.nextOrder_
.reference_
, _eval
)
605 return find
.leftDiff();
607 SumType
rightDiff() {
608 return find
.rightDiff();
620 using namespace FuzzySearch
;
624 // kate: space-indent on; indent-width 2; tab-width 2; replace-tabs on