1 // Implementation of CSparseIntVector methods
2 // Copyright © 2009 The University of Chicago
3 #include "SparseIntVector.h"
5 #include "StringSurrogate.h"
8 class CSparseIntVector
;
16 int CompareBiData (const void* , const void* );
18 CSparseIntVector::CSparseIntVector() {
25 Values
= new int[NumberOfSlots
];
26 Dimension
= new int [NumberOfSlots
];
27 for (int i
= 0; i
< NumberOfSlots
; i
++) {
35 CSparseIntVector::CSparseIntVector ( int n
)
37 int MinimumNumberOfSlots
= 10;
38 if (n
> MinimumNumberOfSlots
) {
41 NumberOfSlots
= MinimumNumberOfSlots
;
45 Values
= new int [NumberOfSlots
];
46 Dimension
= new int [NumberOfSlots
];
47 for (int i
= 0; i
< NumberOfSlots
; i
++) {
54 CSparseIntVector:: CSparseIntVector (CSparseIntVector
& rhs
){
56 NumberOfSlots
= rhs
.GetNumberOfSlots();
57 Length
= rhs
.GetLength();
59 for ( i
= 0; i
<Length
; i
++) { Values
[i
] = rhs
[i
];};
61 for ( i
= Length
; i
< NumberOfSlots
; i
++)
66 // NormalizedFlag = rhs.GetNormalizedFlag();
69 CSparseIntVector:: ~CSparseIntVector()
79 int CSparseIntVector::operator< (CSparseIntVector
& RHS
)
81 if ( Length
< RHS
.Length
) {
84 if ( Length
> RHS
.Length
) {
88 for (int i
= 0; i
< Length
; i
++) {
89 if ( Dimension
[i
] < RHS
.Dimension
[i
] ) {
92 if ( Dimension
[i
] > RHS
.Dimension
[i
] ) {
99 int CSparseIntVector::operator> (CSparseIntVector
& RHS
)
101 if ( Length
> RHS
.Length
) {
104 if ( Length
< RHS
.Length
) {
108 for (int i
= 0; i
< Length
; i
++) {
109 if ( Dimension
[i
] > RHS
.Dimension
[i
] ) {
112 if ( Dimension
[i
] < RHS
.Dimension
[i
] ) {
123 void CSparseIntVector::Clear() {
125 for (int i
= 0; i
< NumberOfSlots
; i
++) {
132 void CSparseIntVector::SetLength (int n
) {
135 Values
= new int [n
];
137 int CSparseIntVector:: GetLength() {return Length
;}
139 int CSparseIntVector::operator[] (int n
) {
140 if (n
< 0 || n
>= Length
) {return -1;}
141 else { return Values
[n
]; };
144 int CSparseIntVector::GetNumberOfSlots() {return NumberOfSlots
;}
146 int CSparseIntVector::GetNextDimension (int& DimOut
, int& Position
)
148 if (Position
< 0) {return 0;}
149 if (Position
>= Length
) {return 0;}
150 DimOut
= Dimension
[Position
];
155 int CSparseIntVector::GetAt (int dim
) // dim is the value to the outside world
156 { int n
= ContainsDim(dim
);
157 if (n
== -1 ) { return -1; }
158 else { return Values
[n
]; };
161 void CSparseIntVector::operator= (CSparseIntVector
& rhs
)
162 { // March 17 2000 JG
163 Length
= rhs
.GetLength();
164 if ( rhs
.GetLength() > NumberOfSlots
)
166 if (Dimension
) delete Dimension
;
167 if (Values
) delete Values
;
168 NumberOfSlots
= rhs
.GetLength();
169 Dimension
= new int [NumberOfSlots
];
170 Values
= new int [NumberOfSlots
];
172 for (int i
= 0; i
<Length
; i
++)
174 Dimension
[i
] = rhs
.Dimension
[i
];
175 Values
[i
] = rhs
.Values
[i
];
178 int CSparseIntVector::operator== (CSparseIntVector
& L2
)
180 if (Length
!= L2
.GetLength() ) {return 0;}
181 for (int i
= 0; i
< Length
; i
++) {
182 if (Dimension
[i
] != L2
.Dimension
[i
]) {return 0;}
183 if (Values
[i
] != L2
[i
] ) {return 0;}
188 int CSparseIntVector::Delete (int Dim
)
189 { int InternalDim
= -1,
191 if (Length
== 0) {return -1;}
192 for ( i
= 0; i
< Length
; i
++)
194 if ( Dimension
[i
] == Dim
)
200 if (InternalDim
< 0) {
203 for ( i
= InternalDim
; i
< Length
-1; i
++) {
204 Dimension
[i
] = Dimension
[i
+1];
205 Values
[i
] = Values
[i
+1];
211 int CSparseIntVector::GetTopDimension()
212 { return Dimension
[Length
-1];}
214 void CSparseIntVector::SetAt (int dimen
, int value
) // insert New at the
220 if (dimen
< 0 ) {return;}
221 index
= ContainsDim (dimen
);
223 Values
[index
] = value
;
229 if (Length
<= NumberOfSlots
)
232 // special case if we happen to be appending at the end:
233 if ( dimen
> Dimension
[Length
-2] )
235 Dimension
[ Length
- 1 ] = dimen
;
236 Values
[ Length
- 1 ] = value
;
237 return; // march 15 1999
240 for ( i
= 0; i
< Length
-1 && dimen
> Dimension
[i
]; i
++ )
244 for ( i
= Length
-1; i
> RightSpot
; i
--)
246 Dimension
[i
] = Dimension
[i
-1];
247 Values
[i
] = Values
[i
-1];
249 Values
[RightSpot
] = value
;
250 Dimension
[RightSpot
] = dimen
;
256 int * NewValues
= new int [NumberOfSlots
];
257 int * NewDimension
= new int [NumberOfSlots
];
261 for ( i
= 0; i
<Length
-1 && dimen
> Dimension
[i
] ; i
++)
263 NewDimension
[i
] = Dimension
[i
];
264 NewValues
[i
] = Values
[i
];
267 NewValues
[pos
] = value
;
268 NewDimension
[pos
] = dimen
;
269 for ( i
= pos
+ 1; i
<Length
; i
++)
271 NewDimension
[i
] = Dimension
[i
-1];
272 NewValues
[i
] = Values
[i
-1];
283 Dimension
= NewDimension
;
289 void CSparseIntVector::operator() (int n
, int val
)
294 int CSparseIntVector::operator () (int n
) {
295 int loc
= ContainsDim (n
);
299 void CSparseIntVector::IncrementAt(int n
, int val
){
300 int dim
= ContainsDim (n
);
311 int Overlap (CSparseIntVector
& List1
, CSparseIntVector
& List2
)
317 while (i
< List1
.GetLength() && j
< List2
.GetLength() ) {
332 void CSparseIntVector::ClearOut()
333 { for (int i
= 0; i
< NumberOfSlots
; i
++) {
341 int CSparseIntVector::ContainsDim (int n
)
342 { // take advantage of log Size: do this smarter.!
344 if (Length
== 0) {return -1;}
348 for (int i
= 0; i
< Length
; i
++) {
349 if ( Dimension
[i
] == n
) {
358 /* TODO: find alternative if used
359 ostream& CSparseIntVector::OutputToStream (CWordCollection& rWords, ostream& stream,int threshold)
362 BiData* SortArray = new BiData[Length];
364 for (int i = 0; i < (int) Length; i++) {
366 SortArray[i].Integer = Dimension[i];
367 SortArray[i].Values = Values[i];
371 // qsort ( (void*) SortArray, Length, sizeof (BiData ), CompareBiData );
373 for ( i = 0; i < (int) Length; i++) {
374 if ( SortArray[i].Values < threshold ) {continue;}
375 if ( SortArray[i].Integer > (int) rWords.GetCount() ||
376 SortArray[i].Integer < 0 )
379 stream << setw(6) << SortArray[i].Integer << setw(16) <<
380 rWords[ SortArray[i].Integer ]-> GetKey() << setw (7) <<
381 SortArray[i].Values << endl;
398 /* TODO: need CWordCollection
400 void CSparseIntVector::MakeLogFreq (CWordCollection* Words)
402 for (int i = 0; i < Length; i++ ) {
403 WordNumber = Dimension[i];
405 ( log ( Values[i] ) -
406 log ( Words->GetAt (WordNumber)->GetCorpusCount() ) );
412 void CSparseIntVector::Normalize()
416 for ( i
= 0; i
< Length
; i
++) {
419 for ( i
=0; i
< Length
; i
++) {
424 int CSparseIntVector::GetNormalizedFlag() {return NormalizedFlag
;}
426 int CSparseIntVector::InnerProduct ( CSparseIntVector
* Vector2
)
432 for (int i
= 0; i
< Length
; i
++) {
433 ThisDim
= Dimension
[i
];
434 ThatValue
= Vector2
->GetAt(ThisDim
);
435 if (ThatValue
>= 0) {
436 Total
+= Values
[i
] * ThatValue
;
442 int CSparseIntVector::GetFirstDimension()
443 { if (Dimension
) { return Dimension
[0] ;}
448 void CSparseIntVector::Ingest(CSparseIntVector
* pVector
)
452 while ( pVector
->GetNextDimension (DimOut
, Position
) ) {
457 bool CSparseIntVector::Contains (CSparseIntVector
* pVec
)
466 if (pVec
->GetLength() > GetLength() ) {
470 pVec
->GetNextDimension( ThatDim
, DimPos
);
472 for (int i
= 0; i
< Length
; i
++) {
473 ThisDim
= Dimension
[i
];
476 /* we step through the values present in This,
477 looking to see if That has such a dimension;
478 if we slip past That without finding its dimension,
479 it means This doesn't contain that. Note the use
480 of MFC-style iterator for the That vector */
482 if ( ThisDim
== ThatDim
){
483 if (! pVec
->GetNextDimension( ThatDim
, DimPos
) ) {
484 return TRUE
;//we've gone through pVec entirely
488 if ( ThisDim
< ThatDim
) {
491 if ( ThisDim
> ThatDim
) {
496 return FALSE
; //October 30 1999
501 int CSparseIntVector::Union (CSparseIntVector
& Vector
)
503 NumberOfSlots
+= Vector
.GetLength();
504 int* NewDimension
= new int [NumberOfSlots
];
505 int* NewValues
= new int [NumberOfSlots
];
513 while ( ( This
>= 0 && This
< Length
)
515 (That
>= 0 && That
< Vector
.Length
)
519 if (This
>= 0 && This
< Length
) { //This is in the legal zone
520 ThisDim
= Dimension
[This
];
524 if (That
>= 0 && That
< Vector
.Length
) { //That is in the legal zone
525 ThatDim
= Vector
.Dimension
[That
];
530 if ( ThatDim
< 0 || // That is exhausted
531 ( ThisDim
>= 0 && ThisDim
< ThatDim
) // This is real, so is That, This is smaller
534 NewDimension
[NewDim
] = ThisDim
;
535 //NewValues[NewDim] = 1; change March 5 2000 JG
536 NewValues
[NewDim
] = Values
[This
]; // March 5 2000
539 if (ThisDim
< 0 || //This is exhausted
540 ( ThatDim
>= 0 && ThatDim
< ThisDim
) // This is real, so is That, That is smaller
543 NewDimension
[NewDim
] = ThatDim
;
544 //NewValues[NewDim] =1; change March 5 2000 JG
545 NewValues
[NewDim
] = Vector
.Values
[That
]; // March 5 2000
549 if (ThisDim
>= 0 && ThisDim
== ThatDim
) {
550 NewDimension
[NewDim
] = ThatDim
;
551 //NewValues[NewDim] =1; // change March 5 2000 JG
552 NewValues
[NewDim
] = Values
[This
] + Vector
.Values
[That
]; // March 5 2000
562 if (This
>= Length
) {
565 if (That
>= Vector
.Length
) {
575 Dimension
= NewDimension
;
584 int Overlap (CSparseIntVector
* List1
, CSparseIntVector
* List2
, CSparseIntVector
& Intersection
)
586 if ( List1
->Length
< List2
->Length
) {
587 Intersection
.NumberOfSlots
= List1
->Length
;
589 Intersection
.NumberOfSlots
= List2
->Length
;
593 if (Intersection
.Dimension
) {
594 delete Intersection
.Dimension
;
596 if (Intersection
.Values
) {
597 delete Intersection
.Values
;
600 Intersection
.Dimension
= new int [Intersection
.NumberOfSlots
];
601 Intersection
.Values
= new int [Intersection
.NumberOfSlots
];
609 while (This
>= 0 && That
>= 0) {
612 ThisDim
= List1
->Dimension
[This
];
615 ThatDim
= List2
->Dimension
[That
];
618 if (This
>= 0 && ThisDim
== ThatDim
) {
619 Intersection
.Dimension
[NewDim
] = ThisDim
;
620 Intersection
.Values
[NewDim
] = 1;
625 if (ThisDim
>= 0 && ThisDim
< ThatDim
) {
628 if (ThatDim
>= 0 && ThisDim
> ThatDim
) {
632 if (This
>= List1
->Length
) {
635 if (That
>= List2
->Length
) {
640 Intersection
.Length
= NewDim
;
644 return Intersection
.GetLength();
647 CParse
CSparseIntVector::Display()
650 for (int i
= 0 ; i
< Length
; i
++)
652 QString colon
= ":", space
= " ";
653 Return
.Append (Dimension
[i
]);
654 Return
.Append( CStringSurrogate( colon
.unicode(), 0, colon
.length()) );
655 Return
.Append ( Values
[i
] );
656 Return
.Append ( CStringSurrogate( space
.unicode(), 0, space
.length()) );
660 QString
CSparseIntVector::DisplayAsString()
664 for (int i
= 0; i
< Length
; i
++)
666 Temp
.arg( Dimension
[i
] );
668 Temp
.arg( Values
[i
] ) ;