Stem.cpp

   1 // Implementation of CStem methods
   2 // Copyright © 2009 The University of Chicago
   3 #include "Stem.h"
   4
   5 // See also Stem_Phonology.cpp for phonology methods,
   6 // Stem_EncodingLength.cpp for description length calculations,
   7 // GUIclasses.cpp for methods pertaining to GUI output,
   8 // and Word.cpp for methods concerning stems-qua-words.
   9
  10 #include <Q3TextStream>
  11 #include <QList>
  12 #include "EarleyParser.h"
  13 #include "Signature.h"
  14 #include "Prefix.h"
  15 #include "Suffix.h"
  16
  17 CLexicon* CStem::m_Lexicon;  // assign value !  :TODO
  18
  19 // construction/destruction.
  20
  21 CStem::CStem(CMiniLexicon* mini)
  22         : CLParse(mini),
  23         m_WordCount(0),
  24         m_BrokenForm(),
  25         m_SuffixList(), // initialized below
  26         m_pSuffixSignature(NULL), m_pPrefixSignature(NULL),
  27         m_PrefixList(), // initialized below
  28         m_Regular(0),
  29         m_SimpleFlag(false),
  30         m_StemType(UNKNOWN),
  31         m_StemLoc(0),
  32         m_Stem2Loc(0),
  33         m_NumberOfStems(0),
  34         m_PrefixLoc(0), m_SuffixLoc(0),
  35         m_Confidence(QString()),
  36         m_pStem(NULL),
  37         m_strStem(),
  38         m_strSuffix(), m_strPrefix(),
  39         m_pSuffix(NULL), m_pPrefix(NULL),
  40         m_LengthOfPointerToMe(0.0),
  41         m_WordPtrList(new QList<CStem*>),
  42         m_LeftNeighbors(),
  43         m_RightNeighbors(),
  44         // compounding.
  45         m_MyEarleyParser(),
  46         m_CompoundCount(0.0),
  47         m_Affixness(0.0),
  48         // phonology.
  49         m_Phonology_Tier1(),
  50         m_Phonology_Tier2(),
  51         m_Phonology_Tier1_Skeleton(),
  52         m_UnigramLogProb(0.0),
  53         m_BigramLogProb(0.0),
  54         m_BigramComplexity(0.0),        // average
  55         m_UnigramComplexity(0.0),       // average
  56         m_PhonologicalContent(0.0),
  57         m_HMM_LogProbability(0.0),
  58         // first Boltzmann model.
  59         m_Tier2_LocalMI_Score(0.0),
  60         m_LocalMI_TotalBoltzmannScore(0.0),
  61         m_LocalMI_Plog(0.0),
  62         // second Boltzmann model.
  63         m_Tier2_DistantMI_Score(0.0),
  64         m_DistantMI_TotalBoltzmannScore(0.0),
  65         m_DistantMI_Plog(0.0),
  66         // tier-1 phonology info for graphical display.
  67         m_phonologies(), m_unigrams(), m_mis(),
  68         m_countofunigrams(0),
  69         m_countofmis(0),
  70         m_maxpositive(0.0),
  71         m_maxnegative(0.0),
  72         m_donephonology(false)
  73 {
  74         m_SuffixList.Alphabetize();
  75         m_PrefixList.Alphabetize();
  76 }
  77
  78 CStem::CStem(const CStringSurrogate& stem, CMiniLexicon* mini)
  79         : CLParse(stem, mini),
  80         m_WordCount         (0),
  81         m_BrokenForm        (),
  82         m_SuffixList        (), // initialized below
  83         m_pSuffixSignature  (NULL),
  84         m_pPrefixSignature  (NULL),
  85         m_PrefixList        (), // initialized below
  86         m_Regular           (0),
  87         m_SimpleFlag        (false),
  88         m_StemType          (UNKNOWN),
  89         m_StemLoc(          0),
  90         m_Stem2Loc          (0),
  91         m_NumberOfStems     (0),
  92         m_PrefixLoc         (0),
  93         m_SuffixLoc         (0),
  94         m_Confidence        (QString()),
  95         m_pStem             (NULL),
  96         m_strStem           (),
  97         m_strSuffix         (),
  98         m_strPrefix         (),
  99         m_pSuffix           (NULL),
 100         m_pPrefix           (NULL),
 101         m_LengthOfPointerToMe(0.0),
 102         m_WordPtrList(new QList<CStem*>),
 103         m_LeftNeighbors     (),
 104         m_RightNeighbors    (),
 105         // compounding.
 106         m_MyEarleyParser    (),
 107         m_CompoundCount     (0.0),
 108         m_Affixness (0.0),
 109         // phonology.
 110         m_Phonology_Tier1   (),
 111         m_Phonology_Tier2   (),
 112         m_Phonology_Tier1_Skeleton(),
 113         m_UnigramLogProb    (0.0),
 114         m_BigramLogProb     (0.0),
 115         m_BigramComplexity  (0.0),      // average
 116         m_UnigramComplexity(0.0),       // average
 117         m_PhonologicalContent(0.0),
 118         m_HMM_LogProbability(0.0),
 119         // first Boltzmann model.
 120         m_Tier2_LocalMI_Score(0.0),
 121         m_LocalMI_TotalBoltzmannScore(0.0),
 122         m_LocalMI_Plog(0.0),
 123         // second Boltzmann model.
 124         m_Tier2_DistantMI_Score(0.0),
 125         m_DistantMI_TotalBoltzmannScore(0.0),
 126         m_DistantMI_Plog(0.0),
 127         // tier-1 phonology info for graphical display.
 128         m_phonologies(), m_unigrams(), m_mis(),
 129         m_countofunigrams(0),
 130         m_countofmis(0),
 131         m_maxpositive(0.0),
 132         m_maxnegative(0.0),
 133         m_donephonology(false)
 134 {
 135         m_SuffixList.Alphabetize();
 136         m_PrefixList.Alphabetize();
 137 }
 138
 139 CStem::CStem(const CStem& x)
 140         : CLParse(x),
 141         m_WordCount(x.m_WordCount),
 142         m_BrokenForm(), // XXX. copy?
 143         m_SuffixList(), // initialized below. XXX. copy?
 144         m_pSuffixSignature      (x.m_pSuffixSignature),
 145         m_pPrefixSignature      (x.m_pPrefixSignature),
 146         m_PrefixList            (),     // initialized below. XXX. copy?
 147         m_Regular               (x.m_Regular),
 148         m_SimpleFlag            (x.m_SimpleFlag),
 149         m_StemType              (x.m_StemType),
 150         m_StemLoc               (x.m_StemLoc),
 151         m_Stem2Loc              (x.m_Stem2Loc),
 152         m_NumberOfStems         (x.m_NumberOfStems),
 153         m_PrefixLoc             (x.m_PrefixLoc),
 154         m_SuffixLoc             (x.m_SuffixLoc),
 155         m_Confidence            (x.m_Confidence),
 156         m_pStem                 (x.m_pStem),
 157         m_strStem               (x.m_strStem),
 158         m_strSuffix(),
 159         m_strPrefix(),  // XXX. copy?
 160         m_pSuffix(x.m_pSuffix),
 161         m_pPrefix(x.m_pPrefix),
 162         m_LengthOfPointerToMe(x.m_LengthOfPointerToMe),
 163         m_WordPtrList(new QList<CStem*>(*x.m_WordPtrList)),
 164         m_LeftNeighbors(), m_RightNeighbors(),  // XXX. copy?
 165         // compounding.
 166         m_MyEarleyParser(),     // XXX. copy?
 167         m_CompoundCount(x.m_CompoundCount),
 168         m_Affixness(x.m_Affixness),
 169         // phonology.
 170         m_Phonology_Tier1(x.m_Phonology_Tier1),
 171         m_Phonology_Tier2(x.m_Phonology_Tier2),
 172         m_Phonology_Tier1_Skeleton(x.m_Phonology_Tier1_Skeleton),
 173         m_UnigramLogProb(x.m_UnigramLogProb),
 174         m_BigramLogProb(x.m_BigramLogProb),
 175         m_BigramComplexity(x.m_BigramComplexity),
 176         m_UnigramComplexity(x.m_UnigramComplexity),
 177         m_PhonologicalContent(x.m_PhonologicalContent),
 178         m_HMM_LogProbability(0.0),      // XXX. copy?
 179         // first Boltzmann model.
 180         m_Tier2_LocalMI_Score(x.m_Tier2_LocalMI_Score),
 181         m_LocalMI_TotalBoltzmannScore(0.0),     // XXX. copy?
 182         m_LocalMI_Plog(0.0),    // XXX. copy?
 183         // second Boltzmann model.
 184         m_Tier2_DistantMI_Score(x.m_Tier2_DistantMI_Score),
 185         m_DistantMI_TotalBoltzmannScore(0.0),   // XXX. copy?
 186         m_DistantMI_Plog(0.0),  // XXX. copy?
 187         // tier-1 phonology info for graphical display.
 188         m_phonologies(), m_unigrams(), m_mis(), // XXX. copy?
 189         m_countofunigrams(0),   // XXX. copy?
 190         m_countofmis(0),        // XXX. copy?
 191         m_maxpositive(0.0),     // XXX. copy?
 192         m_maxnegative(0.0),     // XXX. copy?
 193         m_donephonology(x.m_donephonology)
 194 {
 195         m_SuffixList.Alphabetize();
 196         m_PrefixList.Alphabetize();
 197 }
 198
 199 CStem::CStem(const CLParse& text_in_corpus)
 200         : CLParse(text_in_corpus),
 201         m_WordCount(0),
 202         m_BrokenForm(),
 203         m_SuffixList(), // initialized below
 204         m_pSuffixSignature(NULL), m_pPrefixSignature(NULL),
 205         m_PrefixList(), // initialized below
 206         m_Regular(0),
 207         m_SimpleFlag(false),
 208         m_StemType(UNKNOWN),
 209         m_StemLoc(0),
 210         m_Stem2Loc(0),
 211         m_NumberOfStems(0),
 212         m_PrefixLoc(0), m_SuffixLoc(0),
 213         m_Confidence(QString()),
 214         m_pStem(NULL),
 215         m_strStem(),
 216         m_strSuffix(), m_strPrefix(),
 217         m_pSuffix(NULL), m_pPrefix(NULL),
 218         m_LengthOfPointerToMe(0.0),
 219         m_WordPtrList(new QList<CStem*>),
 220         m_LeftNeighbors(),
 221         m_RightNeighbors(),
 222         // compounding.
 223         m_MyEarleyParser(),
 224         m_CompoundCount(0.0),
 225         m_Affixness(0.0),
 226         // phonology.
 227         m_Phonology_Tier1(),
 228         m_Phonology_Tier2(),
 229         m_Phonology_Tier1_Skeleton(),
 230         m_UnigramLogProb(0.0),
 231         m_BigramLogProb(0.0),
 232         m_BigramComplexity(0.0),        // average
 233         m_UnigramComplexity(0.0),       // average
 234         m_PhonologicalContent(0.0),
 235         m_HMM_LogProbability(0.0),
 236         // first Boltzmann model.
 237         m_Tier2_LocalMI_Score(0.0),
 238         m_LocalMI_TotalBoltzmannScore(0.0),
 239         m_LocalMI_Plog(0.0),
 240         // second Boltzmann model.
 241         m_Tier2_DistantMI_Score(0.0),
 242         m_DistantMI_TotalBoltzmannScore(0.0),
 243         m_DistantMI_Plog(0.0),
 244         // tier-1 phonology info for graphical display.
 245         m_phonologies(), m_unigrams(), m_mis(),
 246         m_countofunigrams(0),
 247         m_countofmis(0),
 248         m_maxpositive(0.0),
 249         m_maxnegative(0.0),
 250         m_donephonology(false)
 251 {
 252         m_SuffixList.Alphabetize();
 253         m_PrefixList.Alphabetize();
 254 }
 255
 256 CStem::CStem(const CParse& text, CMiniLexicon* lex)
 257         : CLParse(text, lex),
 258         m_WordCount(0),
 259         m_BrokenForm(),
 260         m_SuffixList(), // initialized below
 261         m_pSuffixSignature(NULL), m_pPrefixSignature(NULL),
 262         m_PrefixList(), // initialized below
 263         m_Regular(0),
 264         m_SimpleFlag(false),
 265         m_StemType(UNKNOWN),
 266         m_StemLoc(0),
 267         m_Stem2Loc(0),
 268         m_NumberOfStems(0),
 269         m_PrefixLoc(0), m_SuffixLoc(0),
 270         m_Confidence(QString()),
 271         m_pStem(NULL),
 272         m_strStem(),
 273         m_strSuffix(), m_strPrefix(),
 274         m_pSuffix(NULL), m_pPrefix(NULL),
 275         m_LengthOfPointerToMe(0.0),
 276         m_WordPtrList(new QList<CStem*>),
 277         m_LeftNeighbors(),
 278         m_RightNeighbors(),
 279         // compounding.
 280         m_MyEarleyParser(),
 281         m_CompoundCount(0.0),
 282         m_Affixness(0.0),
 283         // phonology.
 284         m_Phonology_Tier1(),
 285         m_Phonology_Tier2(),
 286         m_Phonology_Tier1_Skeleton(),
 287         m_UnigramLogProb(0.0),
 288         m_BigramLogProb(0.0),
 289         m_BigramComplexity(0.0),        // average
 290         m_UnigramComplexity(0.0),       // average
 291         m_PhonologicalContent(0.0),
 292         m_HMM_LogProbability(0.0),
 293         // first Boltzmann model.
 294         m_Tier2_LocalMI_Score(0.0),
 295         m_LocalMI_TotalBoltzmannScore(0.0),
 296         m_LocalMI_Plog(0.0),
 297         // second Boltzmann model.
 298         m_Tier2_DistantMI_Score(0.0),
 299         m_DistantMI_TotalBoltzmannScore(0.0),
 300         m_DistantMI_Plog(0.0),
 301         // tier-1 phonology info for graphical display.
 302         m_phonologies(), m_unigrams(), m_mis(),
 303         m_countofunigrams(0),
 304         m_countofmis(0),
 305         m_maxpositive(0.0),
 306         m_maxnegative(0.0),
 307         m_donephonology(false)
 308 {
 309         m_SuffixList.Alphabetize();
 310         m_PrefixList.Alphabetize();
 311 }
 312
 313 CStem::~CStem()
 314 {
 315         delete m_BrokenForm;
 316         delete m_WordPtrList;
 317         delete m_MyEarleyParser;
 318 }
 319
 320 //-----------------------------------------------------------------
 321 // Overloaded operators
 322 //-----------------------------------------------------------------
 323
 324 void CStem::operator= (const CStem& RHS)
 325 {
 326   CopyParse(RHS);
 327
 328   m_BrokenForm                                  = NULL;
 329   m_Confidence                                  = RHS.GetConfidence();
 330   m_NumberOfStems                               = RHS.GetNumberOfStems();
 331   m_pPrefix                                     = RHS.GetPrefixPtr();
 332   m_pPrefixSignature                            = RHS.GetPrefixSignature();
 333   m_PrefixLoc                                   = RHS.GetPrefixLoc();
 334   m_pStem                                       = RHS.GetStemPtr();
 335   m_pSuffix                                     = RHS.GetSuffixPtr();
 336   m_pSuffixSignature                            = RHS.GetSuffixSignature();
 337   m_Regular                                     = RHS.GetRegular();
 338   m_SimpleFlag                                  = RHS.GetSimpleFlag();
 339   m_Stem2Loc                                    = RHS.GetStem2Loc();
 340   m_StemLoc                                     = RHS.GetStemLoc();
 341   m_StemType                                    = RHS.GetStemType();
 342   m_SuffixLoc                                   = RHS.GetSuffixLoc();
 343   m_WordCount                                   = RHS.GetWordCount();
 344   m_Phonology_Tier1                             = RHS.m_Phonology_Tier1;
 345   m_Phonology_Tier2                             = RHS.m_Phonology_Tier2;
 346   m_Phonology_Tier1_Skeleton                    = RHS.m_Phonology_Tier1_Skeleton;
 347   m_CompoundCount                               = RHS.GetCompoundCount();
 348   m_Affixness                                   = RHS.GetAffixness();
 349   m_MyEarleyParser                              = RHS.GetMyEarleyParser();
 350 //  m_LengthOfPointerToMe                       = RHS.GetLengthOfPointerToMe();
 351
 352 //  m_SuffixList.SetAlphabetical();
 353 //  m_PrefixList.SetAlphabetical();
 354
 355 }
 356
 357
 358 void CStem::Copy (CStem& RHS)
 359 {
 360   CopyParse(RHS);
 361
 362   m_BrokenForm                                  = NULL;
 363   m_Confidence                                  = RHS.GetConfidence();
 364   m_NumberOfStems                               = RHS.GetNumberOfStems();
 365   m_pPrefix                                     = RHS.GetPrefixPtr();
 366   m_pPrefixSignature                            = RHS.GetPrefixSignature();
 367   m_PrefixLoc                                   = RHS.GetPrefixLoc();
 368   m_pStem                                       = RHS.GetStemPtr();
 369   m_pSuffix                                     = RHS.GetSuffixPtr();
 370   m_pSuffixSignature                            = RHS.GetSuffixSignature();
 371   m_Regular                                     = RHS.GetRegular();
 372   m_SimpleFlag                                  = RHS.GetSimpleFlag();
 373   m_Stem2Loc                                    = RHS.GetStem2Loc();
 374   m_StemLoc                                     = RHS.GetStemLoc();
 375   m_StemType                                    = RHS.GetStemType();
 376   m_SuffixLoc                                   = RHS.GetSuffixLoc();
 377   m_WordCount                                   = RHS.GetWordCount();
 378   m_MyEarleyParser                              = RHS.GetMyEarleyParser();
 379 //  m_SuffixList.SetAlphabetical();
 380 //  m_PrefixList.SetAlphabetical();
 381
 382   m_CompoundCount  = RHS.GetCompoundCount();
 383   m_Affixness = RHS.GetAffixness();
 384         m_LengthOfPointerToMe = RHS.GetLengthOfPointerToMe();
 385 }
 386
 387
 388
 389 //-----------------------------------------------------------------
 390 // Other methods
 391 //-----------------------------------------------------------------
 392
 393
 394 // Copy utility for stems
 395 //
 396 // Parameters:
 397 //    RHS - the stem to be copied
 398
 399 void CStem::CopyStemInformation(CStem* RHS)
 400 {
 401   m_Confidence = RHS->GetConfidence();
 402   SetCorpusCount(RHS->GetCorpusCount());
 403   m_NumberOfStems  = RHS->GetNumberOfStems();
 404   m_pPrefix = RHS->GetPrefixPtr();
 405
 406   if ( RHS->GetPrefixList() )
 407   {
 408     m_PrefixList = RHS->GetPrefixList();
 409   }
 410
 411   m_PrefixLoc = RHS->GetPrefixLoc();
 412
 413   if ( RHS->GetPrefixSignature() )
 414   {
 415     m_pPrefixSignature = RHS->GetPrefixSignature();
 416   }
 417
 418   m_pStem  = RHS->GetStemPtr();
 419   m_pSuffix = RHS->GetSuffixPtr();
 420   m_Regular = RHS->GetRegular();
 421   m_SimpleFlag = RHS->GetSimpleFlag();
 422   m_Stem2Loc = RHS->GetStem2Loc();
 423   m_StemLoc = RHS->GetStemLoc();
 424   m_StemType = RHS->GetStemType();
 425   m_SuffixLoc  = RHS->GetSuffixLoc();
 426
 427   if ( RHS->GetSuffixList() )
 428   {
 429     m_SuffixList = RHS->GetSuffixList();
 430   }
 431
 432   if ( RHS->GetSuffixSignature() )
 433   {
 434     m_pSuffixSignature = RHS->GetSuffixSignature();
 435   }
 436
 437   m_WordCount = RHS->GetWordCount();
 438
 439   CStem* word;
 440   for (int wordno = 0; wordno < RHS->GetWordPtrList()->size(); wordno++)
 441   {     word = RHS->GetWordPtrList()->at(wordno);
 442         m_WordPtrList->append( word );
 443     }
 444
 445   m_CompoundCount  = RHS->GetCompoundCount();
 446   m_Affixness = RHS->GetAffixness();
 447         m_LengthOfPointerToMe =  RHS->GetLengthOfPointerToMe();
 448     m_MyEarleyParser = RHS->GetMyEarleyParser();
 449 }
 450 //-----------------------------------------------------------------------------------//
 451 // Add the prefix 'NULL' to the list of prefixes
 452 void CStem::AddNULLPrefix()
 453 //-----------------------------------------------------------------------------------//
 454 {
 455   m_WordCount++;
 456   QString Null = "NULL";
 457   if ( ! m_PrefixList.ContainsNULL() )
 458   {
 459     m_PrefixList.Append (CStringSurrogate(Null.unicode(),0,Null.length()));
 460   }
 461 }
 462
 463 //-----------------------------------------------------------------------------------//
 464 // Add the suffix 'NULL' to the list of suffixes
 465 void CStem::AddNULLSuffix()
 466 //-----------------------------------------------------------------------------------//
 467 {
 468   m_WordCount++;
 469   QString Null = "NULL";
 470   if ( ! m_SuffixList.ContainsNULL() )
 471   {
 472     m_SuffixList.Append (CStringSurrogate(Null.unicode(),0,Null.length()));
 473   }
 474 }
 475
 476 //-----------------------------------------------------------------------------------//
 477 // Add a word to the word list
 478 bool CStem::AddWord (CStem* pWord)
 479 //-----------------------------------------------------------------------------------//
 480 {
 481         if ( m_WordPtrList->indexOf(pWord) < 0 )
 482         {
 483                 m_WordPtrList->append (pWord);
 484                 return TRUE;
 485         }
 486         return FALSE;
 487 }
 488
 489
 490 // Add a prefix to the prefix list
 491 //
 492 // Parameters:
 493 //    pPrefix - pointer to the prefix to
 494 //    be added
 495
 496 void CStem::AddPrefix (CPrefix* pPrefix)
 497 {
 498   if (! ContainsPrefix (pPrefix)) {
 499 //    if(!m_PrefixList.Alphabetical()) m_PrefixList.Alphabetize();
 500     m_PrefixList.Append (pPrefix->GetKey());
 501   }
 502 }
 503
 504
 505 // Add a suffix to the suffix list
 506 //
 507 // Parameters:
 508 //    pSuffix - pointer to the suffix to
 509 //    be added
 510
 511 void CStem::AddSuffix(CSuffix* pSuffix)
 512 {
 513   if ( !ContainsSuffix (pSuffix) ) {
 514 //    if(!m_SuffixList.Alphabetical()) m_SuffixList.Alphabetize();
 515     m_SuffixList.Append (pSuffix->GetKey());
 516   }
 517 }
 518
 519
 520
 521
 522 // Add a suffix to the suffix list
 523 //
 524 // Parameters:
 525 //    key - surrogate string of the suffix to
 526 //    be added
 527
 528 void CStem::AddSuffix(const CStringSurrogate& key)
 529 {
 530   if ( !m_SuffixList.Contains (key) ) {
 531 //    if(!m_SuffixList.Alphabetical()) m_SuffixList.Alphabetize();
 532     m_SuffixList.Append (key);
 533   }
 534 }
 535
 536
 537 // Copy a list of suffixes into the suffix list
 538 //
 539 // Parameters:
 540 //    pParse - the list of new suffixes
 541
 542 void CStem::CopySuffixList(CParse* pParse)
 543 {
 544   for (int i = 1; i <= (int)pParse->Size(); i++)
 545   {
 546     AddSuffix ( pParse->GetPiece(i) );
 547   }
 548 }
 549
 550
 551 // Find out if the prefix list contains a specific
 552 // prefix
 553 //
 554 // Parameters:
 555 //    Prefix - the prefix in question
 556 //
 557 // Returns:
 558 //    bool - true if the prefix is in our list
 559
 560 bool  CStem::ContainsPrefix(CPrefix* Prefix) const
 561 {
 562   if ( m_PrefixList.Contains (Prefix->GetKey()) ) {
 563     return true;
 564   } else {
 565     return false;
 566   }
 567 }
 568
 569 bool  CStem::ContainsPrefix(const CStringSurrogate& Prefix) const
 570 {
 571   if ( m_PrefixList.Contains (Prefix) ) {
 572     return true;
 573   } else {
 574     return false;
 575   }
 576 }
 577
 578 QString CStem::GetSortingString ()
 579 {
 580         QString sortString = GetSuffixList()->Display(); return sortString;
 581 }
 582 // Add a prefix to the prefix list
 583 //
 584 // Parameters:
 585 //    Prefix - prefix surrogate to be added be added
 586
 587 void CStem::AddPrefix(const CStringSurrogate& Prefix)
 588 {
 589   if ( !ContainsPrefix (Prefix) )
 590   {
 591     m_PrefixList.Append (Prefix);
 592   }
 593 }
 594
 595
 596 // Find out if the suffix list contains a specific
 597 // suffix
 598 //
 599 // Parameters:
 600 //    Suffix - the suffix to look for
 601 //
 602 // Returns:
 603 //    bool - true if the suffix is in the list
 604
 605 bool  CStem::ContainsSuffix(CSuffix* Suffix) const
 606 {
 607   if (m_SuffixList.Contains (Suffix->GetKey() ) ){
 608     return true;
 609   } else {
 610     return false;
 611   }
 612 }
 613
 614
 615 // Increment the word count
 616 //
 617 // Parameters:
 618 //    n - amount to increment, default = 1
 619
 620 void CStem::IncrementWordCount (int n )
 621 {
 622   m_WordCount += n;
 623   Q_ASSERT (m_WordCount > 0);
 624   Q_ASSERT (m_WordCount < 1000000);
 625 }
 626
 627
 628 // Get the prefix
 629 //
 630 // Parameters:
 631 //    Prefix - the parse to put the prefix in
 632
 633 void    CStem::GetPrefix ( CParse& Prefix ) const
 634 {
 635     if (m_strPrefix.GetKeyLength() > 0) {
 636         Prefix =  m_strPrefix;
 637         return;
 638     }
 639     Prefix = GetPiece( m_PrefixLoc );
 640 }
 641
 642
 643 // Get the suffix
 644 //
 645 // Parameters:
 646 //    Output - the parse to put the suffix in
 647
 648 void    CStem::GetSuffix(CParse& Output ) const
 649 {
 650   if (m_strSuffix.GetKeyLength() > 0) {
 651         Output =  m_strSuffix;
 652         return;
 653     }
 654   Output = GetPiece( m_SuffixLoc );
 655 }
 656
 657
 658 // Get the stem
 659 //
 660 // Parameters:
 661 //    Output - the parse to put the stem in
 662
 663 void    CStem::GetStem(CParse& Output) const
 664 {
 665     if (m_strStem.GetKeyLength() > 0) {
 666         Output =  m_strStem;
 667         return;
 668     }
 669     Output = GetPiece( m_StemLoc );
 670 }
 671
 672
 673
 674
 675 // Display the type of this stem
 676 //
 677 // Returns:
 678 //    QString - the type of this stem
 679
 680 QString CStem::DisplayStemType() const
 681 {
 682   switch (m_StemType)
 683   {
 684   case NORMAL:
 685     { return ""; } // return "Normal":
 686   case BIWORD_COMPOUND:
 687     { return "2 word compound"; }
 688   case MULTIPLE_COMPOUND:
 689     { return "Multiple-word compound"; }
 690   case POSSIBLE_COMPOUND:
 691     { return "Possible compound"; };
 692   case NUMBER:
 693     { return "Number"; }
 694   case UNKNOWN:
 695     { return "??"; }
 696   case ENDS_IN_HYPHEN:
 697     { return "Ends in hyphen"; }
 698   case STEM_COMPOUND:
 699     { return "Compound"; }
 700   case STEM_NORMAL:
 701     { return "Stem"; }
 702   case STEM_PLUS_SUFFIX:
 703     { return "Stem & Suffix"; }
 704   case POLYWORD_PIECE:
 705     { return "Polyword piece"; }
 706   default:
 707     { return "???"; }
 708   }
 709 }
 710
 711
 712 // Merge the prefix and stem
 713
 714 void CStem::ClearPrefixStemSplit()
 715 {
 716   if ( m_StemLoc && m_PrefixLoc)
 717   {
 718     MergePieces (m_PrefixLoc);
 719   }
 720
 721   m_PrefixLoc = 0;
 722   m_StemLoc = 1;
 723 }
 724
 725
 726
 727
 728 // Merge the root and suffix
 729
 730 void CStem::ClearRootSuffixSplit()
 731 {
 732   if ( m_StemLoc && HasASuffix()  )
 733   {
 734     MergePieces (m_StemLoc);
 735   }
 736   if ( m_StemLoc == 1)
 737   {
 738     m_StemLoc  = 0;
 739   }
 740 }
 741
 742 // TODO : define this function
 743 // Get the sorting quantity
 744 //
 745 // Returns:
 746 //    float - the sorting quantity
 747
 748 float CStem::GetSortingQuantity() const
 749 {
 750   Q_ASSERT (0);
 751   return 0;
 752 }
 753
 754 /// used in allomorphy code.
 755 void CStem::RepairSuffixList(const CMiniLexicon* Lexicon)
 756 {
 757         struct not_implemented { };
 758         throw not_implemented();
 759         static_cast<void>(Lexicon);
 760
 761 //      QString NewSuffix;
 762 //      CSuffix* pNewSuffix;
 763
 764 //      for (int i = 1; i <= m_SuffixList.Size(); ++i) {
 765                 // TODO: Get John's help to fix, I don't understand.
 766 //              CStringSurrogate ssSuffix = m_SuffixList.GetPiece(i);
 767 //              CSuffix* pOldSuffix = *Lexicon->GetSuffixes() ^= ssSuffix;
 768
 769 //              SuffixStringTranslation.GetPiece(ssSuffix.SpellOut(), NewSuffix);
 770 //              pNewSuffix = *Lexicon->GetSuffixes() ^=
 771 //      }
 772 }
 773
 774 // Detach a specific suffix from the list
 775 //
 776 // Parameters:
 777 //    pSuffix - the suffix to detach
 778
 779 void CStem::DetachSuffix(CSuffix* pSuffix)
 780 {
 781   Q_ASSERT (pSuffix);
 782   m_SuffixList.Remove ( pSuffix->GetKey() );
 783   pSuffix->RemoveFromStemPtrList ( this );
 784   pSuffix->RemoveStemString ( GetKey() );
 785 }
 786
 787
 788
 789 // Detach a specific prefix from the list
 790 //
 791 // Parameters:
 792 //    pPrefix - the prefix to detach
 793
 794 void CStem::DetachPrefix(CPrefix* pPrefix)
 795 {
 796   Q_ASSERT (pPrefix);
 797   m_PrefixList.Remove ( pPrefix->GetKey() );
 798   pPrefix->RemoveFromStemPtrList ( this );
 799   pPrefix->RemoveStemString ( GetKey() );
 800 }
 801
 802
 803 // Remove a word from the word list
 804 //
 805 // Parameters:
 806 //    pWord - pointer to the word to be removed
 807
 808 void CStem::RemoveWordFromWordPtrList(CStem* pWord)
 809 {
 810
 811   m_WordPtrList->remove(pWord);
 812
 813 }
 814
 815 // Replace the old suffix signature and return it
 816 //
 817 // Parameters:
 818 //    pNewSig - the new signature
 819 //
 820 // Returns:
 821 //    CSignature* - the old signature
 822
 823 CSignature* CStem::ChangeSuffixSignature(CSignature* pNewSig)
 824 {
 825         CSignature* pOldSig = m_pSuffixSignature;
 826
 827         if (pOldSig != 0)
 828                 pOldSig->DetachStem(this, CSignature::eCall_Words);
 829
 830         m_pSuffixSignature = pNewSig;
 831         return pOldSig;
 832 }
 833
 834 void CStem::OutputStem(Q3TextStream& outf, int index,
 835         QMap<QString, QString>* filter)
 836 {
 837         QString confidence;
 838         // "# Index | Stem                 | Confidence           | Corpus Count | # of Words  | Affixes |  Words"
 839
 840         outf << "  ";
 841
 842         outf.setf(2);
 843         outf.width(5);
 844         outf << index + 1;
 845         outf << "   ";
 846
 847         outf.width(20);
 848         outf << Display();
 849         outf << "   ";
 850
 851         outf.width(20);
 852         confidence = GetConfidence();
 853         if( confidence == "" ) confidence = "NONE";
 854         outf << confidence.replace( " ", "_" );
 855         outf << "   ";
 856
 857         outf.unsetf(2);
 858         outf.width(12);
 859         outf << GetCorpusCount();
 860         outf << "   ";
 861
 862         //outf.width(11);
 863         if( GetSuffixSignature() )
 864         {
 865                 outf.width(12);
 866                 outf << m_WordPtrList->size();
 867                 outf << "   ";
 868
 869                 outf.setf(2);
 870                 outf << GetSuffixSignature()->Display(' ', filter);
 871                 outf << " ";
 872         }
 873         else if( GetPrefixSignature() )
 874         {
 875                 outf.width(12);
 876                 outf << GetPrefixSignature()->Size();
 877                 outf << "   ";
 878
 879                 outf.setf(2);
 880                 outf << GetPrefixSignature()->Display(' ', filter);
 881                 outf << " ";
 882         }
 883         else
 884         {
 885                 outf << 0;
 886                 outf << "   ";
 887
 888                 outf.setf(2);
 889                 outf << "NONE";
 890                 outf << " ";
 891         }
 892
 893         outf << endl;
 894 }