CMiniLexicon::FindMajorSignatures(): use log file routines
[linguistica.git] / SparseIntVector.cpp
blob54f42a0f74b497cf856ab11abfd94f9a10511c0c
1 // Implementation of CSparseIntVector methods
2 // Copyright © 2009 The University of Chicago
3 #include "SparseIntVector.h"
5 #include "StringSurrogate.h"
6 #include "Parse.h"
8 class CSparseIntVector;
10 struct BiData {
11 int Integer;
12 int Values;
16 int CompareBiData (const void* , const void* );
18 CSparseIntVector::CSparseIntVector() {
19 Length = 0;
20 Dimension = 0;
21 Values = 0;
23 NumberOfSlots = 10;
25 Values = new int[NumberOfSlots];
26 Dimension = new int [NumberOfSlots];
27 for (int i = 0; i < NumberOfSlots; i++) {
28 Values[i] = 0;
29 Dimension[i] = 0;
31 NormalizedFlag = 0;
35 CSparseIntVector::CSparseIntVector ( int n)
37 int MinimumNumberOfSlots = 10;
38 if (n > MinimumNumberOfSlots) {
39 NumberOfSlots = n ;
40 } else {
41 NumberOfSlots = MinimumNumberOfSlots;
44 Length = n;
45 Values = new int [NumberOfSlots];
46 Dimension = new int [NumberOfSlots];
47 for (int i = 0; i < NumberOfSlots; i++) {
48 Values[i] = 0;
49 Dimension[i] = 0;
51 NormalizedFlag = 0;
54 CSparseIntVector:: CSparseIntVector (CSparseIntVector& rhs){
56 NumberOfSlots = rhs.GetNumberOfSlots();
57 Length = rhs.GetLength();
58 int i;
59 for ( i = 0; i<Length; i++) { Values[i] = rhs[i];};
61 for ( i = Length; i < NumberOfSlots; i++)
63 Values[i] = rhs[i];
66 // NormalizedFlag = rhs.GetNormalizedFlag();
69 CSparseIntVector:: ~CSparseIntVector()
70 { if (Values) {
71 delete Values;
73 if (Dimension) {
74 delete Dimension;
79 int CSparseIntVector::operator< (CSparseIntVector& RHS)
81 if ( Length < RHS.Length ) {
82 return 1;
84 if ( Length > RHS.Length ) {
85 return -1;
88 for (int i = 0; i < Length; i++) {
89 if ( Dimension[i] < RHS.Dimension[i] ) {
90 return 1;
92 if ( Dimension[i] > RHS.Dimension[i] ) {
93 return -1;
96 return 0;
99 int CSparseIntVector::operator> (CSparseIntVector& RHS)
101 if ( Length > RHS.Length ) {
102 return 1;
104 if ( Length < RHS.Length ) {
105 return -1;
108 for (int i = 0; i < Length; i++) {
109 if ( Dimension[i] > RHS.Dimension[i] ) {
110 return 1;
112 if ( Dimension[i] < RHS.Dimension[i] ) {
113 return -1;
116 return 0;
123 void CSparseIntVector::Clear() {
124 if (Length > 0) {
125 for (int i = 0; i < NumberOfSlots; i++) {
126 Values[i] = 0;
129 NormalizedFlag = 0;
132 void CSparseIntVector::SetLength (int n) {
133 Length = n;
134 NumberOfSlots = n;
135 Values = new int [n];
137 int CSparseIntVector:: GetLength() {return Length;}
139 int CSparseIntVector::operator[] (int n) {
140 if (n < 0 || n >= Length ) {return -1;}
141 else { return Values[n]; };
144 int CSparseIntVector::GetNumberOfSlots() {return NumberOfSlots;}
146 int CSparseIntVector::GetNextDimension (int& DimOut, int& Position)
148 if (Position < 0) {return 0;}
149 if (Position >= Length) {return 0;}
150 DimOut = Dimension[Position];
151 Position++;
152 return 1;
155 int CSparseIntVector::GetAt (int dim) // dim is the value to the outside world
156 { int n = ContainsDim(dim);
157 if (n == -1 ) { return -1; }
158 else { return Values[n]; };
161 void CSparseIntVector::operator= (CSparseIntVector& rhs)
162 { // March 17 2000 JG
163 Length = rhs.GetLength();
164 if ( rhs.GetLength() > NumberOfSlots )
166 if (Dimension) delete Dimension;
167 if (Values) delete Values;
168 NumberOfSlots = rhs.GetLength();
169 Dimension = new int [NumberOfSlots];
170 Values = new int [NumberOfSlots];
172 for (int i = 0; i<Length; i++)
174 Dimension[i] = rhs.Dimension[i];
175 Values[i] = rhs.Values[i];
178 int CSparseIntVector::operator== (CSparseIntVector& L2)
180 if (Length != L2.GetLength() ) {return 0;}
181 for (int i = 0; i < Length; i++) {
182 if (Dimension[i] != L2.Dimension[i]) {return 0;}
183 if (Values[i] != L2[i] ) {return 0;}
185 return 1;
188 int CSparseIntVector::Delete (int Dim)
189 { int InternalDim = -1,
191 if (Length == 0) {return -1;}
192 for ( i = 0; i < Length; i++)
194 if ( Dimension[i] == Dim)
196 InternalDim = i;
197 break;
200 if (InternalDim < 0) {
201 return -1;
203 for ( i = InternalDim; i< Length-1; i++) {
204 Dimension[i] = Dimension[i+1];
205 Values[i] = Values[i+1];
206 Length--;
208 return 0;
211 int CSparseIntVector::GetTopDimension()
212 { return Dimension[Length-1];}
214 void CSparseIntVector::SetAt (int dimen, int value) // insert New at the
215 // right spot.
216 { int index,
217 RightSpot = -1,
220 if (dimen < 0 ) {return;}
221 index = ContainsDim (dimen);
222 if (index >= 0 ) {
223 Values[index] = value;
224 return;
227 Length++;
229 if (Length <= NumberOfSlots)
231 //Feb 28 1999
232 // special case if we happen to be appending at the end:
233 if ( dimen > Dimension[Length-2] )
235 Dimension[ Length - 1 ] = dimen;
236 Values [ Length - 1 ] = value;
237 return; // march 15 1999
240 for ( i = 0; i < Length-1 && dimen > Dimension[i]; i++ )
242 RightSpot = i+1;
244 for ( i = Length-1; i > RightSpot; i--)
246 Dimension[i] = Dimension[i-1];
247 Values[i] = Values [i-1];
249 Values[RightSpot] = value;
250 Dimension [RightSpot] = dimen;
252 else
255 NumberOfSlots *= 2;
256 int * NewValues = new int [NumberOfSlots];
257 int * NewDimension = new int [NumberOfSlots];
259 int pos;
260 pos=0;
261 for ( i = 0; i<Length-1 && dimen > Dimension[i] ; i++)
263 NewDimension[i] = Dimension[i];
264 NewValues[i] = Values[i];
265 pos = i+1;
267 NewValues[pos] = value;
268 NewDimension[pos] = dimen;
269 for ( i = pos + 1; i<Length; i++)
271 NewDimension[i] = Dimension[i-1];
272 NewValues[i] = Values[i-1];
275 if (Values)
277 delete[] Dimension;
278 Dimension = 0;
279 delete [] Values;
280 Values = 0;
282 Values = NewValues;
283 Dimension = NewDimension;
286 NormalizedFlag = 0;
289 void CSparseIntVector::operator() (int n, int val)
290 { SetAt (n,val);
291 NormalizedFlag =0;
294 int CSparseIntVector::operator () (int n) {
295 int loc = ContainsDim (n);
296 return GetAt(loc);
299 void CSparseIntVector::IncrementAt(int n, int val ){
300 int dim = ContainsDim (n);
301 if ( dim >= 0 ) {
302 Values[dim] += val;
303 return;
304 } else {
305 SetAt (n, val);
307 NormalizedFlag =0;
311 int Overlap (CSparseIntVector& List1, CSparseIntVector& List2)
312 { int overlap = 0;
313 int i = 0,
314 j = 0;
315 int Val1=0,
316 Val2=0;
317 while (i < List1.GetLength() && j < List2.GetLength() ) {
318 Val1 = List1[i];
319 Val2 = List2[j];
320 if (Val1 == Val2) {
321 overlap++; i++; j++;
322 } else {
323 if (Val1 < Val2 ) {
324 i++;
325 } else {
326 j++;
330 return overlap;
332 void CSparseIntVector::ClearOut()
333 { for (int i = 0; i < NumberOfSlots; i++) {
334 Values[i] = 0;
335 Dimension[i] = 0;
337 Length = 0;
338 NormalizedFlag = 0;
341 int CSparseIntVector::ContainsDim (int n)
342 { // take advantage of log Size: do this smarter.!
344 if (Length == 0) {return -1;}
348 for (int i = 0; i < Length; i++) {
349 if ( Dimension[i] == n) {
350 return i;
353 return -1;
358 /* TODO: find alternative if used
359 ostream& CSparseIntVector::OutputToStream (CWordCollection& rWords, ostream& stream,int threshold)
362 BiData* SortArray = new BiData[Length];
364 for (int i = 0; i < (int) Length; i++) {
366 SortArray[i].Integer = Dimension[i];
367 SortArray[i].Values = Values[i];
370 // put this back in:
371 // qsort ( (void*) SortArray, Length, sizeof (BiData ), CompareBiData );
373 for ( i = 0; i < (int) Length; i++) {
374 if ( SortArray[i].Values < threshold ) {continue;}
375 if ( SortArray[i].Integer > (int) rWords.GetCount() ||
376 SortArray[i].Integer < 0 )
377 {continue;}
379 stream << setw(6) << SortArray[i].Integer << setw(16) <<
380 rWords[ SortArray[i].Integer ]-> GetKey() << setw (7) <<
381 SortArray[i].Values << endl;
390 delete SortArray;
391 return stream;
398 /* TODO: need CWordCollection
400 void CSparseIntVector::MakeLogFreq (CWordCollection* Words)
401 { int WordNumber;
402 for (int i = 0; i < Length; i++ ) {
403 WordNumber = Dimension[i];
404 Values[i] = int
405 ( log ( Values[i] ) -
406 log ( Words->GetAt (WordNumber)->GetCorpusCount() ) );
408 NormalizedFlag = 0;
412 void CSparseIntVector::Normalize()
414 int Total = 0,
416 for ( i = 0; i < Length ; i++) {
417 Total += Values[i];
419 for ( i=0; i < Length ; i++) {
420 Values[i] /= Total;
422 NormalizedFlag = 1;
424 int CSparseIntVector::GetNormalizedFlag() {return NormalizedFlag;}
426 int CSparseIntVector::InnerProduct ( CSparseIntVector* Vector2)
428 int Total = 0,
429 ThatValue= 0;
430 int ThisDim=0;
432 for (int i = 0; i < Length ; i++) {
433 ThisDim = Dimension[i];
434 ThatValue = Vector2->GetAt(ThisDim);
435 if (ThatValue >= 0) {
436 Total += Values[i] * ThatValue;
439 return Total;
442 int CSparseIntVector::GetFirstDimension()
443 { if (Dimension) { return Dimension[0] ;}
444 else { return 0;}
448 void CSparseIntVector::Ingest(CSparseIntVector* pVector)
450 int Position = 0,
451 DimOut= 0;
452 while ( pVector->GetNextDimension (DimOut, Position) ) {
453 SetAt (DimOut, 1);
457 bool CSparseIntVector::Contains (CSparseIntVector* pVec)
459 int ThisDim,
461 ThatDim,
462 DimPos = 0;
464 ThatDim=0;
465 // Oct 30 1999:
466 if (pVec->GetLength() > GetLength() ) {
467 return 0;
470 pVec->GetNextDimension( ThatDim, DimPos );
472 for (int i = 0; i < Length; i ++) {
473 ThisDim = Dimension[i];
476 /* we step through the values present in This,
477 looking to see if That has such a dimension;
478 if we slip past That without finding its dimension,
479 it means This doesn't contain that. Note the use
480 of MFC-style iterator for the That vector */
482 if ( ThisDim == ThatDim ){
483 if (! pVec->GetNextDimension( ThatDim, DimPos ) ) {
484 return TRUE;//we've gone through pVec entirely
486 continue;
488 if ( ThisDim < ThatDim ) {
489 continue;
491 if ( ThisDim > ThatDim ) {
492 return FALSE;
496 return FALSE; //October 30 1999
500 // Oct 31 1999 JG
501 int CSparseIntVector::Union (CSparseIntVector& Vector)
503 NumberOfSlots += Vector.GetLength();
504 int* NewDimension = new int [NumberOfSlots];
505 int* NewValues = new int [NumberOfSlots];
507 int This = 0,
508 That = 0,
509 ThisDim = -1,
510 ThatDim = -1,
511 NewDim = 0;
513 while ( ( This >= 0 && This < Length)
515 (That >= 0 && That < Vector.Length )
519 if (This >= 0 && This < Length ) { //This is in the legal zone
520 ThisDim = Dimension[This];
521 } else {
522 ThisDim = -1;
524 if (That >= 0 && That < Vector.Length ) { //That is in the legal zone
525 ThatDim = Vector.Dimension[That];
526 } else {
527 ThatDim = -1;
530 if ( ThatDim < 0 || // That is exhausted
531 ( ThisDim >= 0 && ThisDim < ThatDim ) // This is real, so is That, This is smaller
534 NewDimension[NewDim] = ThisDim;
535 //NewValues[NewDim] = 1; change March 5 2000 JG
536 NewValues[NewDim] = Values[This]; // March 5 2000
537 This ++;
539 if (ThisDim < 0 || //This is exhausted
540 ( ThatDim >= 0 && ThatDim < ThisDim ) // This is real, so is That, That is smaller
543 NewDimension[NewDim] = ThatDim;
544 //NewValues[NewDim] =1; change March 5 2000 JG
545 NewValues[NewDim] = Vector.Values[That]; // March 5 2000
546 That++;
549 if (ThisDim >= 0 && ThisDim == ThatDim) {
550 NewDimension[NewDim] = ThatDim;
551 //NewValues[NewDim] =1; // change March 5 2000 JG
552 NewValues[NewDim] = Values[This] + Vector.Values[That]; // March 5 2000
553 That++;
554 This++;
560 NewDim ++;
562 if (This >= Length ) {
563 This = -1;
565 if (That >= Vector.Length ) {
566 That = -1;
570 Length = NewDim ;
572 delete Dimension;
573 delete Values;
575 Dimension = NewDimension;
576 Values = NewValues;
578 return Length;
583 // Oct 31 1999
584 int Overlap (CSparseIntVector* List1, CSparseIntVector* List2, CSparseIntVector& Intersection)
586 if ( List1->Length < List2->Length) {
587 Intersection.NumberOfSlots = List1->Length;
588 } else {
589 Intersection.NumberOfSlots = List2->Length;
593 if (Intersection.Dimension) {
594 delete Intersection.Dimension;
596 if (Intersection.Values) {
597 delete Intersection.Values;
600 Intersection.Dimension = new int [Intersection.NumberOfSlots];
601 Intersection.Values = new int [Intersection.NumberOfSlots];
603 int This = 0,
604 That = 0,
605 ThisDim = 0,
606 ThatDim = 0,
607 NewDim = 0;
609 while (This >= 0 && That >= 0) {
611 if (This >= 0) {
612 ThisDim = List1->Dimension[This];
614 if (That >= 0) {
615 ThatDim = List2->Dimension[That];
618 if (This >= 0 && ThisDim == ThatDim) {
619 Intersection.Dimension[NewDim] = ThisDim;
620 Intersection.Values[NewDim] = 1;
621 This ++;
622 That ++;
623 NewDim ++;
625 if (ThisDim >= 0 && ThisDim < ThatDim) {
626 This ++;
628 if (ThatDim >= 0 && ThisDim > ThatDim) {
629 That ++;
632 if (This >= List1->Length ) {
633 This = -1;
635 if (That >= List2->Length ) {
636 That = -1;
640 Intersection.Length = NewDim ;
644 return Intersection.GetLength();
647 CParse CSparseIntVector::Display()
649 CParse Return;
650 for (int i = 0 ; i < Length; i++)
652 QString colon = ":", space = " ";
653 Return.Append (Dimension[i]);
654 Return.Append( CStringSurrogate( colon.unicode(), 0, colon.length()) );
655 Return.Append ( Values[i] );
656 Return.Append ( CStringSurrogate( space.unicode(), 0, space.length()) );
658 return Return;
660 QString CSparseIntVector::DisplayAsString()
662 QString Return,
663 Temp;
664 for (int i = 0; i < Length; i++)
666 Temp.arg( Dimension[i] );
667 Temp += ":";
668 Temp.arg( Values[i] ) ;
669 Return += Temp;
672 return Return;