1 // fast “slice-and-count” calculation of string similarity
2 // Copyright © 2009 The University of Chicago
13 // for StringInventory:
16 namespace linguistica
{ namespace ui
{ class status_user_agent
; } }
20 // for SliceCollection:
22 namespace linguistica
{ namespace ui
{ class status_user_agent
; } }
25 * A snippet is a representation of a string which also remembers the
26 * number of times it is used. The primary purpose of this object is
27 * so that we can access a pointer to each one.
33 explicit snippet(QString
);
34 unsigned int getlength() const {return m_string
.length();}
35 QString
getkey() const {return m_string
;}
38 extern bool lessThan(const snippet
* s1
, const snippet
* s2
);
41 * This is simply a dictionary that allows us to go from a QString to a pointer
42 * to a snippet with the same key that is in the inventory.
44 class StringInventory
: public Q3Dict
<snippet
> {
48 void addwordcollection(class CWordCollection
* words
,
49 SliceCollection
& slices
,
50 linguistica::ui::status_user_agent
& status_display
);
51 snippet
* getsnippetpointer(QString key
);
52 snippet
* operator<<(QString key
);
53 StringPtrList
* addword(QString word
);
57 * A slice is a list of pointers to a snippet for every substring in the key of
58 * the slice. It is the principal class in this file.
60 class StringPtrList
: public QList
<snippet
*> { // a.k.a. Slice
63 StringInventory
* m_MyStringInventory
;
65 StringPtrList(QString key
, StringInventory
*);
68 int getlength() {return m_key
.length();}
69 QString
getkey() {return m_key
;}
71 typedef StringPtrList Slice
;
73 class SliceCollection
: public QList
<Slice
*> {
75 void writetolog(class QTextStream
& log
);
76 void FindWordPairs(linguistica::ui::status_user_agent
& status_display
,
77 class CLexicon
* lex
, unsigned int minimumlength
= 0);
80 extern int largestcommonsubstringlength(Slice
* a
, Slice
* b
,
81 unsigned int minimum
= 0);
82 extern snippet
* largestcommonsubstring(Slice
* a
, Slice
* b
,
83 unsigned int minimum
= 0);