1 // Some methods for discovering allomorphs of stems
2 // Copyright © 2009 The University of Chicago
3 #include "MiniLexicon.h"
12 #include "SignatureCollection.h"
13 #include "CompoundCollection.h"
14 #include "SuffixCollection.h"
15 #include "WordCollection.h"
16 #include "StemCollection.h"
17 #include "CollectionTemplate.h"
18 #include "StringSurrogate.h"
19 #include "StringFunc.h"
23 typedef QMap
<QString
,QString
> StringToString
;
27 Finding rules: first we find pairs of similar stems, and see HOW they differ.
30 Suppose we find a lot of stems that differ by final -e. Then we look at the smaller stem's
31 signature. If it contains a high incidence of
33 (b) one or two other suffixes F1, F2
34 (c) F1 and F2 "go well" with the signature of the larger stem(s);
37 (d) Consider the hypothesis that F1 and F2 are of the form <e>F1 and <e>F2
43 CSignature
* m_SigPointer
;
46 SigLetter(CSignature
* Sig
, const CStringSurrogate
& SS
)
47 : m_SigPointer(Sig
), m_Letter(SS
) { }
50 void CMiniLexicon::RelateStems()
52 const CStringSurrogate
CSSofQNULL(TheStringNULL
);
53 const int MinimumSuffixCount
= 5;
55 LogFileLargeTitle( "Finding allomorphy (Version 2002)");
56 m_pSuffixes
->Sort(COUNT
);
58 // XXX. consider using boost::optional instead.
59 std::auto_ptr
<CParse
> pPossibleDeletingSuffixes
;
61 // Major loop over deletable pseudo-suffixes
62 QList
<SigLetter
*> SignaturesToFixList
;
63 //SignaturesToFixList.setAutoDelete(true); //fix this -- so it doesn't become a memory leak. @@@
64 CSuffixCollection SuffixesToEliminate
;
65 for (int suffixno
= 0; suffixno
< m_pSuffixes
->GetCount(); ++suffixno
) {
66 CSuffix
* qSuffix
= m_pSuffixes
->GetAtSort(suffixno
);
67 if (qSuffix
->GetKeyLength() != 1)
69 if (qSuffix
->GetUseCount() < MinimumSuffixCount
)
72 /// possible suffix (like 'e' in English)
73 CStringSurrogate ssDiffLetter
= qSuffix
->GetKey();
74 LogFileSmallTitle(ssDiffLetter
.Display());
76 pPossibleDeletingSuffixes
= std::auto_ptr
<CParse
>(new CParse
);
78 // Find all suffix candidates that MIGHT delete this DiffLetter
79 // if DiffLetter == 'e', SuffixCandidates includes ing, ity.
80 LogFileHeader("Suffixes that do not follow this suffix", "Count");
83 for (int suffixno2
= 1; suffixno2
< m_pSuffixes
->GetCount(); ++suffixno2
) {
84 CSuffix
* pSuffix
= m_pSuffixes
->GetAt(suffixno2
);
86 if (pSuffix
->GetUseCount() < MinimumSuffixCount
)
89 CStringSurrogate ssSuffix
= pSuffix
->GetKey();
90 int TotalStemsWithSuffix
;
91 int HowManyEndWithThisLetter
;
92 HowManyStemsWithThisSuffixEndInThisLetter(
96 HowManyEndWithThisLetter
);
98 // XXX. Use “Threshold = 0.05” instead?
100 const int MinimumStemCount
= 5;
101 const int MaximumExceptionCount
= 5;
103 // if suffix doesn’t begin with DiffLetter
104 // and its count is very small, ignore it.
105 if (TotalStemsWithSuffix
<= MinimumStemCount
&&
106 ssSuffix
.Left(ssDiffLetter
.GetLength())
109 LogFile(ssSuffix
.Display(), HowManyEndWithThisLetter
);
111 if (HowManyEndWithThisLetter
<= MaximumExceptionCount
) {
112 pPossibleDeletingSuffixes
->Append(ssSuffix
);
117 if (!found
) LogFileSmallTitle( "None found");
120 // Simple signatures (2 affixes)
121 // We now have a set of candidates that might delete DiffLetter
122 // We will look at each signature, and see whether it
123 // erroneously has a suffix that should really be thought
124 // of as deleting from the stem rather than as a suffix.
126 // 1. In our first test, we will look for signatures of the
127 // form: Letter.OtherSuffix, where OtherSuffix does not
128 // appear after stems ending in Letter, and where
129 // NULL.OtherSuffix does occur as a signature.
131 LogFileHeader("Suspicious signature","Paired signature" );
134 for (int signo
= 0; signo
< m_pSignatures
->GetCount(); ++signo
) {
135 CSignature
* pSig
= m_pSignatures
->GetAt(signo
);
137 if (pSig
-> Size() != 2)
139 if (!pSig
-> Contains(ssDiffLetter
))
143 if (pSig
-> GetPiece(1) == ssDiffLetter
)
144 OtherSuffix
= pSig
->GetPiece(2).Display();
146 OtherSuffix
= pSig
->GetPiece(1).Display();
148 if (OtherSuffix
== TheStringNULL
)
150 if (!pPossibleDeletingSuffixes
->Contains(OtherSuffix
))
151 // OtherSuffix doesn't appear after DiffLetter
153 LogFile(pSig
->Display());
157 TestSig
.Append(TheStringNULL
);
158 TestSig
.Append(OtherSuffix
);
159 TestSig
.Alphabetize();
161 if (*m_pSignatures
^= TestSig
) {
162 SuffixesToEliminate
<< ssDiffLetter
;
164 std::auto_ptr
<SigLetter
> pSigLetter(
165 new SigLetter(pSig
, ssDiffLetter
));
166 SignaturesToFixList
.append(
167 pSigLetter
.release());
168 LogFile(TestSig
.Display());
170 LogFile(TestSig
.Display(), " not found.");
174 if (!found
) LogFile("None found.");
178 // NB: We should do the preceding test in a more
179 // general way, that is:
180 // Look for pairs of signatures of the form
181 // A.B and A.xB, where x is the deleting letter;
182 // the previous case is like this and where B is NULL.
183 // Then A deletes x, i.e., A is really <x>A.
185 // 2. In our other test, we look at the signatures
186 // that contain both DiffLetter and one of the deleters,
187 // and see, first of all, if a suffix appears more often
188 // with NULL than with DiffLetter;
189 // or, the suffix actually is of the form aX,
190 // where a = DiffLetter, X occurs with NULL more
191 // than aX occurs with a.
193 if (!(SuffixesToEliminate
^= ssDiffLetter
))
196 for (int signo
= 0; signo
< m_pSignatures
->GetCount(); ++signo
) {
197 CSignature
* pSig
= m_pSignatures
->GetAt(signo
);
198 if (!pSig
-> Contains(ssDiffLetter
))
201 for (int affixno
= 1; affixno
<= pSig
->Size(); ++affixno
) {
202 CStringSurrogate ssSuffix
= pSig
->GetPiece(affixno
);
203 CSuffix
* pSuffix
= *m_pSuffixes
^= ssSuffix
;
205 if (pSuffix
->GetDeletees()->Contains(ssDiffLetter
))
206 // we've already determined it deletes DiffLetter
209 if (StemsWithBothSuffixes(TheStringNULL
, ssSuffix
) >
210 StemsWithBothSuffixes(
211 ssDiffLetter
, ssSuffix
))
212 // probably a deleter
213 pSuffix
->AddDeletee(ssDiffLetter
);
215 if (ssSuffix
.Left(ssDiffLetter
.GetLength()) ==
217 // probably cut wrong, like "ement"
218 CStringSurrogate ssTruncatedForm
=
220 ssDiffLetter
.GetLength());
222 if (StemsWithBothSuffixes(TheStringNULL
,
224 StemsWithBothSuffixes(
227 pSuffix
->AddDeletee(ssDiffLetter
);
232 // Now put onto SignaturesToFixList all of the sigs that
233 // are composed ONLY of deleting suffixes.
234 LogFileHeader("Signatures to fix");
235 for (int signo
= 0; signo
< m_pSignatures
->GetCount(); ++signo
) {
236 CSignature
* pSig
= m_pSignatures
->GetAt(signo
);
238 if (!pSig
->Contains(ssDiffLetter
))
241 bool bFoundDeletee
= false;
242 bool bFoundDeleter
= false;
243 for (int affixno
= 1; affixno
<= pSig
->Size(); ++affixno
) {
244 const CStringSurrogate ssSuffix
= pSig
->GetPiece(affixno
);
245 if (ssSuffix
== ssDiffLetter
) {
246 bFoundDeletee
= true;
250 CSuffix
* pSuffix
= *m_pSuffixes
^= ssSuffix
;
251 Q_ASSERT(pSuffix
!= 0);
252 if (!pSuffix
->GetDeletees()
253 ->Contains(ssDiffLetter
)) {
256 bFoundDeleter
= true;
260 if (bFoundDeletee
&& bFoundDeleter
) {
261 std::auto_ptr
<SigLetter
> pSigLetter(new SigLetter(
262 pSig
, ssDiffLetter
));
263 LogFile(pSigLetter
->m_SigPointer
->Display());
264 SignaturesToFixList
.append(pSigLetter
.release());
269 } // end of checking this letter to see if it ever deletes.
270 // End of Major loop over deletable pseudo-suffixes
271 LogFileSmallTitle("Now we do the changing for the simple signatures.");
274 foreach (SigLetter
* pSigLetter
, SignaturesToFixList
) {
275 CSignature
* pSig
= pSigLetter
->m_SigPointer
;
276 CStringSurrogate ssDiffLetter
= pSigLetter
->m_Letter
;
279 // "1. Moving material from suffixes to stems:
280 // ${pSigLetter->m_Letter}, in signature:
281 // ${*pSigLetter->m_SigPointer}
283 // this is where the real work happens
284 // pPossibleDeletingSuffixes =
285 // SuffixesWhichMightDeleteKey.find(ssDiffLetter.Display()));
287 // XXX. pPossibleDeletingSuffixes kept on getting recreated
288 // before, so something is probably awry.
289 if (pPossibleDeletingSuffixes
.get())
290 MoveWordsStemSuffixBoundaryToRight(pSig
,
291 ssDiffLetter
, pPossibleDeletingSuffixes
.get());
294 // XXX. log: Suffixes to eliminate: ${SuffixesToEliminate}
296 m_pSignatures
->CleanUp();
297 SignaturesToFixList
.clear();
298 m_pSignatures
->CleanUp();
300 // go through all the signatures,
301 // and identify those that should be fixed.
302 // For example, the signature e.<e>ing.es
303 // will be marked to become NULL.<e>ing.s. /
304 LogFileSmallTitle("Bigger signatures");
305 LogFile ("How many suffixes to eliminate? ", SuffixesToEliminate
.GetCount());
307 for (int suffixno
= 0; suffixno
< SuffixesToEliminate
.GetCount(); ++suffixno
) {
308 CSuffix
* const pSuffixToEliminate
= SuffixesToEliminate
[suffixno
];
309 const CStringSurrogate ssSuffixToEliminate
=
310 pSuffixToEliminate
->GetKey();
312 LogFileSmallTitle(ssSuffixToEliminate
.Display());
313 LogFileHeader("Signature", "Suffix", "Disposition");
315 for (int signo
= 0; signo
< m_pSignatures
->GetCount(); ++signo
) {
316 CSignature
* const pSig
= m_pSignatures
->GetAt(signo
);
318 bool ThisSigContainsDeletingSuffix
= false;
319 if (pSig
->Contains(ssSuffixToEliminate
) &&
321 LogFile(pSig
->Display());
323 for (int affixno
= 1; affixno
<= pSig
->Size(); ++affixno
) {
324 CStringSurrogate ssSuffix
=
325 pSig
->GetPiece(affixno
);
326 LogFile(ssSuffix
.Display());
328 if (ssSuffix
== ssSuffixToEliminate
) {
329 // 2. Suffix is the one we want
331 LogFile("Found the deleting (pseudo-) suffix ");
335 if (pPossibleDeletingSuffixes
.get() &&
336 pPossibleDeletingSuffixes
337 ->Contains(ssSuffix
)) {
338 // 3. Suffix deletes the suffix
339 // we want to make NULL --
340 ThisSigContainsDeletingSuffix
= true;
342 // XXX. CStringSurrogate::StartsWith
344 ssSuffixToEliminate
.GetLength())
345 == ssSuffixToEliminate
) {
346 // this is the "ement" case
347 // -- we want to change the
349 // not make it an e-deleter
351 // XXX. SuffixChangesToMake
352 // map never populated
353 QString NewSuffix
= "";
356 LogFile("We found the pseudosuffix at the beginning of a poor suffix; will change to: ",NewSuffix
);
359 LogFile("We found the pseudosuffix deleting before ", ssSuffix
.Display());
361 } else if (ssSuffix
.Left( ssSuffixToEliminate
.GetLength()) == ssSuffixToEliminate
)
363 // 4. Suffix starts with the suffix
364 // we want to make NULL
365 LogFile("We found ", ssSuffix
.Display());
368 LogFile("Not a deleter:", (ssSuffix
.Display()));
369 } // end of checking each suffix in the signature
372 if (!ThisSigContainsDeletingSuffix
)
375 std::auto_ptr
<SigLetter
> pSigLetter(new SigLetter(
376 pSig
, ssSuffixToEliminate
));
377 SignaturesToFixList
.append(pSigLetter
.release());
378 } // end of signature loop
381 } // end of loop over suffixes being eliminated.
383 LogFileSmallTitle("Now we do the changing for more complex signatures");
385 foreach (SigLetter
* pSigLetter
, SignaturesToFixList
) {
386 CSignature
* pSig
= pSigLetter
->m_SigPointer
;
387 CStringSurrogate ssDiffLetter
= pSigLetter
->m_Letter
;
389 LogFile (QString(" Moving material from suffixes to stems: "),pSigLetter
->m_Letter
.Display(), QString(", in signature: "),
390 pSigLetter
->m_SigPointer
->Display());
392 // this is where the real work happens:
394 MoveWordsStemSuffixBoundaryToRight(pSig
,
395 pSigLetter
->m_Letter
.Display(),
396 pPossibleDeletingSuffixes
.get());
398 LogFileHeader ("Suffixes to eliminate: ");
399 for (int suffixno
= 0; suffixno
< SuffixesToEliminate
.GetCount(); ++suffixno
){
400 LogFile( SuffixesToEliminate
[suffixno
]->Display() );}
403 m_pSignatures
->CleanUp();
409 //////////////////////////////////////////////////////////////////////////////////////
410 //////////////////////////////////////////////////////////////////////////////////////
412 /* The following function takes a stem, and adds FinalLetter to it, and then
413 adds all the other suffixes in the stem's old SuffixList to the bigger
414 stem -- whether that stem already existed or not.
415 We should be sure to get rid of this stem only if ALL of its suffixes
416 are shifted -- and many may not be.
419 //////////////////////////////////////////////////////////////////////////////////////
420 //////////////////////////////////////////////////////////////////////////////////////
422 void CMiniLexicon::ShiftFinalLetterToStem (CStem
* pStem
, QString
& FinalLetter
)
427 Stem
= pStem
->GetKey().Display();
432 CParse
* SuffixList
= pStem
->GetSuffixList();
441 for (int suffixno
= 1; suffixno
<= SuffixList
->Size(); suffixno
++)
443 Suffix
= SuffixList
->GetPiece(suffixno
).Display();
444 if ( Suffix
== FinalLetter
)
450 Word
= Stem
+ Suffix
;
451 pWord
= *m_pWords
^= Word
;
452 Suffix
= "<" + FinalLetter
+ ">" + Suffix
;
453 pSuffix
= *m_pSuffixes
<< ssSuffix
;
455 NewStem
= ssStem
.Display() + FinalLetter
;
456 CStem
* pBiggerStem
= *m_pStems
^= NewStem
;
458 if (pBiggerStem
!= 0) {
459 // the bigger stem already exists....
460 pBiggerStem
->AddSuffix( Suffix
);
461 pNewSig
= *m_pSignatures
<< pBiggerStem
->GetSuffixList();
463 // ...get rid of OLD signature in Signatures;
466 // Fix word structure
473 // the bigger stem is new...
477 pWord
->AttachWordAndSuffixalStem(pBiggerStem
);
484 void CMiniLexicon::HowManyStemsWithThisSuffixEndInThisLetter (
485 CStringSurrogate
& Suffix
,
486 CStringSurrogate
& Letter
,
487 int& TotalStemsWithSuffix
,
488 int& HowManyEndWithThisLetter
)
495 TotalStemsWithSuffix
= 0;
496 HowManyEndWithThisLetter
= 0;
498 Q_ASSERT (Letter
.Display() != "e" || Suffix
.Display() != "ing");
500 for (int signo
= 0; signo
< m_pSignatures
->GetCount(); signo
++)
502 pSig
= m_pSignatures
->GetAt(signo
);
503 if ( ! pSig
->Contains ( Suffix
) ) { continue; }
505 for (int stemno
= 0; stemno
< pSig
->GetStemPtrList()->size(); stemno
++)
506 { pStem
= pSig
->GetStemPtrList()->at(stemno
);
507 ssStem
= pStem
->GetKey();
508 TotalStemsWithSuffix
++;
509 if ( ssStem
.Right (Letter
.GetLength() ) == Letter
)
511 HowManyEndWithThisLetter
++;
517 int CMiniLexicon::StemsWithBothSuffixes(CStringSurrogate
& ssSuffix1
, CStringSurrogate
& ssSuffix2
)
523 for (int signo
= 0; signo
< m_pSignatures
->GetCount(); signo
++)
525 pSig
= m_pSignatures
->GetAt(signo
);
526 if ( pSig
->Contains (ssSuffix1
) && pSig
->Contains(ssSuffix2
) )
528 count
+= pSig
->GetNumberOfStems();
533 int CMiniLexicon::StemsWithBothSuffixes(QString Suffix1
, CStringSurrogate
& ssSuffix2
)
539 for (int signo
= 0; signo
< m_pSignatures
->GetCount(); signo
++)
541 pSig
= m_pSignatures
->GetAt(signo
);
542 if ( pSig
->Contains (Suffix1
) && pSig
->Contains(ssSuffix2
) )
544 count
+= pSig
->GetNumberOfStems();
551 void CMiniLexicon::MoveWordsStemSuffixBoundaryToRight(CSignature* pThisSig,
552 CStringSurrogate& ssDeletee,
553 CTypedPtrMap<CMapStringToString, CString, CString>& Remapper
560 PDeletee = ssDeletee;
563 bool bModifiedSigExisted = FALSE;
564 CSignature *pLargerModifiedSig,
565 *pModifiedSig = NULL,
566 *pOldSigBecameSig = NULL,
569 CStringSurrogate ssStem,
581 bool bStemShouldRemain = FALSE;
582 bool bOldSigRemains = FALSE;
583 bool bNewStemAlreadyExisted = FALSE;
586 //----------------------------------------------------------------//
592 int* bStatus = new int [ pThisSig->Size() + 1];
596 *m_LogFile << "\n\tF1: Creating new signature: " <<
598 " Residue of old sig: "<<
599 WhatSigWillBecome.Display();
603 PNewSig = CreateADeletingSignature(
613 *m_LogFile << "\n\tF1: Creating new signature: " <<
615 " Residue of old sig: "<<
616 WhatSigWillBecome.Display();
619 // TODO: fix this later. Eg local: NULL.e.ly, creates a signature just NULL with locale. Not useful.
620 if ( PNewSig.Size() == 1 ) {return; }
622 // TODO: fix this later. Problem is what's left over is just NULL: e.g. NULL.e.ed
623 if ( CSS( WhatSigWillBecome ) == CSS ( CString("NULL") ) ) { return; }
626 pModifiedSig = *Signatures ^= PNewSig;
629 pModifiedSig = *Signatures << &PNewSig ;
632 pModifiedSig->SetRemark ( pThisSig->GetRemark() + " + allomorphy" );
634 if ( WhatSigWillBecome.Size() > 0 )
636 bOldSigRemains = TRUE;
637 pOldSigBecameSig = *Signatures << &WhatSigWillBecome;
638 pOldSigBecameSig ->SetRemark( CString("Allomorphy2") );
641 //----------------------------------------------------------------//
642 // Loop through stems:
643 //----------------------------------------------------------------//
645 Pos = pThisSig->GetStemPtrList()->GetHeadPosition();
646 CTypedPtrList<CPtrList, CStem*>* Temp = pThisSig->GetStemPtrList();
649 pOldStem = pThisSig->GetStemPtrList()->GetNext(Pos);
650 PNewStem = pOldStem->GetPiece() + ssDeletee;
651 PNewStem .SimplifyParseStructure();
652 pNewStem = *Stems_Suffixed ^= PNewStem;
656 if ( pNewStem ) // -- if the larger stem ("love") already existed
658 if ( m_LogFile ) { *m_LogFile << "\t NewStem: "<< pNewStem->GetKey() << " existed."; }
660 bNewStemAlreadyExisted = TRUE;
662 ModifyAnExistingSignatureWithANewStem ( pOldStem, pNewStem, pThisSig );
666 if ( m_LogFile ) { *m_LogFile << "\t NewStem: "<< PNewStem.Display() << " did not exist; it does now."; }
668 bNewStemAlreadyExisted = FALSE;
669 pNewStem = CreateANewSignatureForThisStem ( PNewStem, pModifiedSig );
671 val = pThisSig ->RemoveStem ( pOldStem );
675 if ( bOldSigRemains )
677 pOldStem ->SetSuffixList ( &WhatSigWillBecome );
678 pOldStem ->SetSuffixSignature ( pOldSigBecameSig );
683 if ( m_LogFile ) { *m_LogFile << "\n\t\tF1: Changing stem: "<< pOldStem->GetKey() ; }
685 // go through each word associated with the OldStem, and change its suffix pointer.
686 for ( i = 1; i <= (int) pThisSig->Size(); i++)
688 if ( bStatus[i] == FALSE )
690 bStemShouldRemain = TRUE;
691 ssSuffix = pThisSig->GetPiece(i);
692 if ( ssSuffix.IsNULL() )
698 Word = pOldStem->GetPiece() + ssSuffix ;
701 LinkThisWordToThisSignature ( Word, pOldSigBecameSig );
702 pThisSig ->RemoveWord (pWord);
704 if ( !pOldSigBecameSig->GetStemPtrList()->Find( pOldStem ) )
706 pOldSigBecameSig->GetStemPtrList()->AddTail (pOldStem);
710 } // this means the suffix doesn't delete the deleting suffix or anything
712 ssNewSuffix = SuffixChanges.GetPiece(i);
714 pSuffix = *Suffixes ^= ssNewSuffix;
715 if ( ssNewSuffix.IsNULL() )
721 Word = pOldStem->GetPiece() + pThisSig->GetPiece(i) ;
724 pWord = *Words ^= CStringSurrogate( Word );
725 pWord ->AppendToConfidence( CString ("Allomorphy 2") );
726 pWord ->SetSuffixPtr ( pSuffix );
727 val = pThisSig ->RemoveWord (pWord);
730 // case like hostilit-y becoming hostility
731 if ( ssNewSuffix.IsNULL() )
736 if ( bNewStemAlreadyExisted )
738 pWord ->SetSuffixSignature ( pLargerModifiedSig );
739 pWord ->SetStemPtr ( pNewStem );
740 pLargerModifiedSig ->AttachToSuffixSig ( pNewStem );
744 if ( m_LogFile ) { *m_LogFile << "\t NewStem: "<< PNewStem.Display() << " did not exist."; }
745 pModifiedSig->AttachToSuffixSig(
746 pNewStem, CSignature::eDo_Not_Call_Words);
747 pWord->AttachSuffixSignature(pModifiedSig);
748 pWord->SetStemPtr(pNewStem);
752 }// cycle through the suffixes
755 if ( bStemShouldRemain == FALSE )
757 Stems_Suffixed ->RemoveMember( pOldStem->GetPiece() );
766 // This is DEPRECATED! Not used in new code. The following version is used, the one underneath this.
767 void CMiniLexicon::MoveWordsStemSuffixBoundaryToRight(CSignature
* pThisSig
, QString Deletee
, CParse
* pSuffixCandidates
)
771 MoveWordsStemSuffixBoundaryToRight(pThisSig
, Deletee
, pSuffixCandidates
);
774 void CMiniLexicon::MoveWordsStemSuffixBoundaryToRight(CSignature
* pThisSig
,
775 CStringSurrogate
& ssDeletee
,
776 CParse
* pSuffixCandidates
//3/04
783 PDeletee
= ssDeletee
;
785 CSignature
* pModifiedSig
= NULL
;
786 CSignature
* pOldSigBecameSig
= NULL
;
787 CSignature
* pOlderSig
= NULL
;
789 CStringSurrogate ssSuffix
,
798 bool bStemShouldRemain
= FALSE
;
799 bool bOldSigRemains
= FALSE
;
800 bool bNewStemAlreadyExisted
= FALSE
;
802 //----------------------------------------------------------------//
806 // this actually changes the signature itself:
808 int* bStatus
= new int [ pThisSig
->Size() + 1];
811 PNewSig
= CreateADeletingSignature(
819 LogFile( pThisSig
->Display(),
820 QString(" Creating new signature:"),
821 PNewSig
.Display('-'),
822 QString(" Residue of old sig: "),
823 WhatSigWillBecome
.Display() );
825 // TODO: fix this later. Eg local: NULL.e.ly, creates a signature just NULL with locale. Not useful.
826 if ( PNewSig
.Size() == 1 ) {return; }
828 // TODO: fix this later. Problem is what's left over is just NULL: e.g. NULL.e.ed
829 if ( CSS( WhatSigWillBecome
) == CSS ( QString("NULL") ) ) { return; }
831 pModifiedSig
= *m_pSignatures
^= PNewSig
;
834 pModifiedSig
= *m_pSignatures
<< &PNewSig
;
837 pModifiedSig
->SetRemark ( pThisSig
->GetRemark() + " + allomorphy" );
839 if ( WhatSigWillBecome
.Size() > 0 )
841 bOldSigRemains
= TRUE
;
842 pOldSigBecameSig
= *m_pSignatures
<< &WhatSigWillBecome
;
843 pOldSigBecameSig
->SetRemark( QString("Allomorphy") );
846 //----------------------------------------------------------------//
847 // Loop through stems:
848 //----------------------------------------------------------------//
851 LogFileHeader("New Stems", "Already existed?");
854 for (int stemno
=0; stemno
< pThisSig
->GetNumberOfStems(); stemno
++)
856 pStem
= pThisSig
->GetStem(stemno
);
858 QString temp2
= pStem
->Display();
859 PNewStem
= *pOldStem
+ ssDeletee
;
860 PNewStem
.SimplifyParseStructure();
861 QString temp4
= PNewStem
.Display();
862 pNewStem
= *m_pStems
^= PNewStem
;
863 QString temp3
= PNewStem
.Display();
865 // XXX. suppresses warning
866 // `pLargerModifiedSig' may be used uninitialized
867 // since g++ can’t figure out the control flow.
868 CSignature
* pLargerModifiedSig
= 0;
870 bNewStemAlreadyExisted
= (pNewStem
!= 0);
871 if (bNewStemAlreadyExisted
) {
872 // -- if the larger stem ("love") already existed
873 LogFile(pNewStem
->Display(), "yes");
874 pOlderSig
= pNewStem
->GetSuffixSignature();
875 POldSig
= *pNewStem
->GetSuffixSignature();
876 POldSig
.MergeParse(PNewSig
, PMergedSig
);
878 pLargerModifiedSig
= *m_pSignatures
<< &PMergedSig
;
879 pLargerModifiedSig
->SetRemark(QString("Allomorphy1"));
880 pNewStem
->SetSuffixList(&PMergedSig
);
881 pNewStem
->SetSuffixSignature( pLargerModifiedSig
);
882 pNewStem
->SetConfidence(QString("Allomorphy1"));
883 pThisSig
->DetachStem(pOldStem
,
884 CSignature::eDo_Not_Call_Words
);
887 LogFile(PNewStem
.Display(), QString("no"));
888 pNewStem
= *m_pStems
<< PNewStem
;
889 pNewStem
->SetSuffixList ( pModifiedSig
);
890 pNewStem
->SetSuffixSignature( pModifiedSig
);
891 pNewStem
->SetConfidence ( QString("Allomorphy2") );
892 val
= pThisSig
->RemoveStem ( pOldStem
);
894 if ( bOldSigRemains
)
896 pOldStem
->SetSuffixList ( &WhatSigWillBecome
);
897 pOldStem
->SetSuffixSignature ( pOldSigBecameSig
);
901 // go through each word associated with the OldStem, and change its suffix pointer.
902 for (int affixno
= 1; affixno
<= pThisSig
->Size(); ++affixno
) {
903 if ( bStatus
[affixno
] == FALSE
)
905 bStemShouldRemain
= TRUE
;
906 ssSuffix
= pThisSig
->GetPiece(affixno
);
907 if ( ssSuffix
.IsNULL() )
913 Word
= CSS(pOldStem
) + ssSuffix
;
916 pWord
= *m_pWords
^= CStringSurrogate( Word
);
917 pWord
->SetSuffixSignature ( pOldSigBecameSig
);
918 pWord
->AppendToConfidence(QString ("Allomorphy 1") );
919 pOldSigBecameSig
->AddWord( pWord
);
920 if ( ! pOldSigBecameSig
->StemListContains( pOldStem
) < 0 )
922 pOldSigBecameSig
->AppendStemPtr(pOldStem
);
924 pThisSig
->RemoveWord (pWord
);
926 } // this means the suffix doesn't delete the deleting suffix or anything
928 ssNewSuffix
= SuffixChanges
.GetPiece(affixno
);
930 pSuffix
= *m_pSuffixes
^= ssNewSuffix
;
931 if ( ssNewSuffix
.IsNULL() )
937 Word
= CSS(pOldStem
) + pThisSig
->GetPiece(affixno
);
941 pWord
= *m_pWords
^= CStringSurrogate( Word
);
942 pWord
->AppendToConfidence( QString ("Allomorphy 2") );
943 pWord
->SetSuffixPtr ( pSuffix
);
944 val
= pThisSig
->RemoveWord (pWord
);
947 // case like hostilit-y becoming hostility
948 if ( ssNewSuffix
.IsNULL() )
953 if (bNewStemAlreadyExisted
) {
954 pWord
->SetSuffixSignature(pLargerModifiedSig
);
955 pWord
->SetStemPtr(pNewStem
);
956 pLargerModifiedSig
->AttachToSuffixSig(pNewStem
);
958 // LogFile( PNewStem.Display(), QString( " did not exist.");
959 pModifiedSig
->AttachToSuffixSig(pNewStem
,
960 CSignature::eDo_Not_Call_Words
);
961 pWord
->AttachSuffixSignature(pModifiedSig
);
962 pWord
->SetStemPtr(pNewStem
);
966 }// cycle through the suffixes
969 if ( bStemShouldRemain
== FALSE
)
971 m_pStems
->RemoveMember( pOldStem
);
980 Takes as input a signature, a Deletee (stem final material that will be deleted)
983 CParse
CMiniLexicon::CreateADeletingSignature ( CSignature
* pSig
,
985 CParse
& ReplacingSuffixes
,
987 CParse
& WhatSigWillBecome
,
988 CParse
* pSuffixCandidatesThatMightDeleteDeletee
) //3/04
992 CStringSurrogate ssSuffix
, ssTruncatedSuffix
;
998 int DeleteeLength
= Deletee
.GetLength();
999 QString
QNULL ("NULL");
1001 ReplacingSuffixes
.ClearParse();
1002 WhatSigWillBecome
.ClearParse();
1007 LogFile("Affix", "Status");
1008 for (int affixno
= 1; affixno
<= pSig
->Size(); affixno
++)
1009 { // Consider the suffixes in this signature. If one is the deletee, replace it by NULL.
1010 ssSuffix
= pSig
->GetPiece(affixno
);
1011 if ( ssSuffix
== Deletee
)
1013 NewSig
.Append (QNULL
);
1014 ReplacingSuffixes
.Append ( QNULL
);
1015 bStatus
[affixno
] = TRUE
;
1016 LogFile(ssSuffix
.Display(), QString(" is our deletee"));
1020 pSuffix
= *m_pSuffixes
^= ssSuffix
;
1021 ssTruncatedSuffix
= ssSuffix
.Mid( Deletee
.GetLength() );
1022 pTruncatedSuffix
= *m_pSuffixes
^= ssTruncatedSuffix
;
1023 // 0: if the candidate doesn't delete the deleting suffix
1024 if ( ! pSuffixCandidatesThatMightDeleteDeletee
->Contains( ssSuffix
) )
1026 ReplacingSuffixes
.Append ( QString ("***") );
1027 WhatSigWillBecome
.Append( ssSuffix
);
1028 bStatus
[affixno
] = FALSE
;
1029 LogFile ( ssSuffix
.Display(), QString(" does not delete." ));
1031 // 1: this is the "ement" case
1034 pSuffix
->GetKey().Left( DeleteeLength
) == Deletee
&&
1035 ( StemsWithBothSuffixes ( TheStringNULL
, ssTruncatedSuffix
) >
1036 StemsWithBothSuffixes ( Deletee
, ssSuffix
)
1040 NewSig
.Append( ssTruncatedSuffix
);
1041 ReplacingSuffixes
.Append( ssTruncatedSuffix
);
1042 bStatus
[affixno
] = TRUE
;
1043 LogFile (ssSuffix
.Display(), QString(" the ement sort of case." ));
1046 StemsWithBothSuffixes ( TheStringNULL
, ssSuffix
) >
1047 StemsWithBothSuffixes ( Deletee
, ssSuffix
)
1049 // 2: this is the case of a suffix that tdeletes a stem-final Deletee
1052 pSuffix
->AddDeletee (Deletee
);
1053 NewSig
.Append( ssSuffix
);
1054 ReplacingSuffixes
.Append( ssSuffix
);
1055 bStatus
[affixno
] = TRUE
;
1056 LogFile (ssSuffix
.Display(), QString( " is a deleter." ));
1059 // 3: another case where we'll say, for now, the candidate doesn't delete the deleting suffix:
1061 ReplacingSuffixes
.Append ( QString ("***") );
1062 WhatSigWillBecome
.Append( ssSuffix
);
1063 bStatus
[affixno
] = FALSE
;
1064 LogFile (ssSuffix
.Display(), QString( " maybe does not delete Deletee." ));
1074 CParse CMiniLexicon::CreateADeletingSignature ( CSignature* pSig,
1076 CParse& ReplacingSuffixes,
1078 CParse& WhatSigWillBecome,
1079 StringToString& Remapper) //3/04
1083 CStringSurrogate ssSuffix, ssTruncatedSuffix;
1088 int DeleteeLength = Deletee.GetLength();
1089 QString QNULL ("NULL"),
1092 ReplacingSuffixes.ClearParse();
1093 WhatSigWillBecome.ClearParse();
1097 for (int n = 1; n <= pSig->Size(); n++)
1099 ssSuffix = pSig->GetPiece(n);
1100 if ( ssSuffix == Deletee )
1102 NewSig.Append (QNULL) ;
1103 ReplacingSuffixes.Append ( QNULL);
1107 { //if ( ! pSuffixCandidates->Contains( ssSuffix ) )
1109 QMap<QString,QString>::Iterator it;
1110 if ( Remapper.contains (ssSuffix.Display()) )
1111 //Remapper.Lookup( ssSuffix.SpellOut() ) )
1113 Rewrite =Remapper.find(ssSuffix.Display()).data();
1114 NewSig .Append( Rewrite);
1115 ReplacingSuffixes .Append( Rewrite);
1120 ReplacingSuffixes.Append ( QString ("***") );
1121 WhatSigWillBecome.Append( ssSuffix );