Added jben.py as a script to install.
[jben2_gui.git] / kpengine / scoring.c
blobcea817d6ebe12ccea3e7fb600cde71818004cf80
1 /* -*- mode: C; c-file-style: "bsd"; tab-width: 4 -*- */
2 /* scoring.c - The handwriting recognition engine
3 * JStroke 1.x - Japanese Kanji handwriting recognition technology demo.
4 * Copyright (C) 1997 Robert E. Wells
5 * http://wellscs.com/pilot
6 * mailto:robert@wellscs.com
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version 2
11 * of the License, or (at your option) any later version.
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with this program (gpl.html); if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
22 * Derived from prior work by Todd David Rudick on JavaDict and StrokeDic.
23 * Makes use of KANJIDIC data from Jim Breen of Monash University.
24 * Further credit details available at http://wellscs.com/pilot
25 * See readme.txt, changelo, and gpl.html for more information.
26 * -------------------------------------------------------------------------*/
28 #include <string.h>
30 #include "jstroke.h"
31 #include "memowrite.h"
33 #define diAngCostBase 52 // See angles.pl for derivation.
34 #define diAngCostScale 98 // See angles.pl for derivation.
35 #define diHugeCost (((((ULong)24)*diAngCostScale)+diAngCostBase)*100)
37 #define diMaxScoreToSquare ((ULong) 0xffff)
38 #define diMaxScoreSquared (diMaxScoreToSquare*diMaxScoreToSquare)
40 #define diPathBufLen 16
42 void StrokeScorerEvalItem(StrokeScorer *pScorer, StrokeDicEntryList *cpEntry,
43 ULong* ipScore /*OUT*/);
45 ULong StrokeDicScoreStroke(Byte* bpX, Byte* bpY, UInt iLen,
46 CharPtr cpPath, UInt iPathLen,
47 UInt iDepth);
49 CharPtr StrokeScorerExtraFilters(StrokeScorer *pscorer,
50 CharPtr cp, ULong* ipScore /*OUT*/);
52 Long StrokeScorerExtraEval(StrokeScorer *pscorer,
53 char cArg, UInt iStroke);
55 ULong SqrtULong(ULong val);
57 /* ----- SqrtULong ---------------------------------------------------------*/
59 ULong SqrtULong(ULong val) {
60 ULong root, rootLo, rootHi;
62 if (val == 0)
63 return 0;
64 else if (val == 1)
65 return 1;
66 else if (val >= diMaxScoreSquared)
67 return diMaxScoreToSquare;
69 rootLo = 1;
70 rootHi = (val >> 1); /* will be >= 1, since val > 1. */
71 if (rootHi > diMaxScoreToSquare)
72 rootHi = diMaxScoreToSquare;
74 while ((root = ((rootLo + rootHi) >> 1)) > rootLo) {
75 if ((root * root) <= val) {
76 rootLo = root;
78 else {
79 rootHi = root;
83 return root;
86 /* ----- StrokeScorerCreate-------------------------------------------------*/
87 /* Create a StrokeScorer object. (Returns NULL if can't get memory) */
89 StrokeScorer *StrokeScorerCreate (StrokeDic cpStrokeDic, RawStroke *rsp,
90 UInt iStrokeCnt) {
91 StrokeScorer *pScorer = (StrokeScorer *) MemPtrNew(sizeof(StrokeScorer));
92 if (!pScorer) {
93 ErrBox("Not enough memory.");
94 return NULL;
97 pScorer->m_cpStrokeDic = cpStrokeDic;
98 pScorer->m_pRawStrokes = rsp;
99 pScorer->m_iStrokeCnt = iStrokeCnt;
100 pScorer->m_iScoreLen = 0;
102 pScorer->m_pScores = (ScoreItemPtr) MemPtrNew(diMaxListCount*sizeof(StrokeScorer));
104 if (!pScorer->m_pScores) {
105 ErrBox("Not enough memory.");
106 MemPtrFree(pScorer);
107 return NULL;
110 pScorer->m_cpPath = MemPtrNew(diPathBufLen+1);
112 if (!pScorer->m_cpPath) {
113 ErrBox("Not enough memory.");
114 MemPtrFree(pScorer->m_pScores);
115 MemPtrFree(pScorer);
116 return NULL;
119 return pScorer;
122 /* ----- StrokeScorerDestroy-------------------------------------------------*/
123 /* Destroy a StrokeScorer object */
125 void StrokeScorerDestroy (StrokeScorer *pScorer) {
126 if (pScorer) {
127 MemPtrFree (pScorer->m_pScores);
128 MemPtrFree (pScorer->m_cpPath);
129 MemPtrFree (pScorer);
133 /* ----- StrokeScorerProcess-------------------------------------------------*/
134 /* Process some database entries (maximum iMaxCnt, -1 for all).
135 Returns 0 when none remaining (should eventually return count remaining
136 to facilitate a progressbar */
138 Long StrokeScorerProcess (StrokeScorer *pScorer, Long iMaxCnt) {
139 StrokeDicEntryList *cp;
140 ULong iScore;
141 Long iCnt;
142 ScoreItemPtr pScore, pScoreBase, pSrc;
144 if (!pScorer) {
145 ErrBox("StrokeScorerProcess: pScorer == NULL.");
146 return 0;
149 pScoreBase = pScorer->m_pScores;
151 /* Evaluate all the items in cpStrokeDic against Context,
152 * and update ScoreItems list as we go.
155 iCnt = 0;
156 for (cp = pScorer->m_cpStrokeDic; cp; cp = cp->m_next )
159 iCnt++;
160 if (iMaxCnt >= 0 && iCnt > iMaxCnt)
161 break;
163 StrokeScorerEvalItem(pScorer, cp, &iScore);
165 for (pScore = pScoreBase+pScorer->m_iScoreLen-1;
166 pScore>=pScoreBase; pScore--) {
167 if (iScore >= pScore->m_iScore)
168 break;
170 pScore++;
172 /* If we have a top score, lets register it. */
173 if (pScore < (pScoreBase + diMaxListCount)) {
175 /* Increase the score list length if it isn't full yet. */
176 if (pScorer->m_iScoreLen < diMaxListCount)
177 pScorer->m_iScoreLen++;
179 /* Push down all lower scores in the list to make room. */
180 for (pSrc = pScoreBase+pScorer->m_iScoreLen-2; pSrc >= pScore; pSrc--) {
181 pSrc[1].m_iScore = pSrc->m_iScore;
182 pSrc[1].m_cp = pSrc->m_cp;
185 /* Actually store our info in the list. */
186 pScore->m_iScore = iScore;
187 pScore->m_cp = &cp->m_entry;
190 } /* for each stroke description... */
192 if (cp)
193 return 1; /* should be count remaining */
194 else
195 return 0;
198 /* ----- StrokeScorerEvalItem -----------------------------------------------*/
200 void StrokeScorerEvalItem(StrokeScorer *pScorer, StrokeDicEntryList *cpEntry,
201 ULong* ipScore /*OUT*/) {
202 StrokeDicEntryList *cp = cpEntry;
203 CharPtr stroke_ptr = cp->m_entry.m_strokes;
204 CharPtr filter_ptr = cp->m_entry.m_filters;
205 UInt iStroke;
206 CharPtr cpPath = pScorer->m_cpPath;
207 CharPtr cpPathEnd;
208 RawStroke* rsp;
209 ULong iThisScore;
210 ULong iScore = 0;
212 // MemoWriteLen(pScorer->m_cpStrokeDic, 2); /* DEBUG: tag trace with SJIS char. */
214 // if (*cp) cp++; /* Skip over first half SJIS char. */
215 // if (*cp) cp++; /* Skip over second half SJIS char. */
217 /* The first char must have high order bit set,
218 * and the second char MAY have high order bit set,
219 * but a subsequent char with high order bit set must be
220 * the beginning of the next entry. -rwells, 970712.
223 /* Loop through stroke descriptions */
224 for (iStroke = 0; iStroke < pScorer->m_iStrokeCnt; iStroke++) {
226 cpPathEnd = cpPath;
228 while(*stroke_ptr==' '||*stroke_ptr=='\t')
229 ++stroke_ptr;
231 switch (*stroke_ptr) { /* Break out on first char value... */
232 case '1': /* TDR='1' CLK=07:30 DEG=225 */
233 *cpPathEnd++ = 20; break;
234 case '2': /* TDR='2' CLK=06:00 DEG=180 */
235 *cpPathEnd++ = 16; break;
236 case '3': /* TDR='3' CLK=04:30 DEG=135 */
237 *cpPathEnd++ = 12; break;
238 case '4': /* TDR='4' CLK=09:00 DEG=270 */
239 *cpPathEnd++ = 24; break;
240 case '6': /* TDR='6' CLK=03:00 DEG=090 */
241 *cpPathEnd++ = 8; break;
242 case '7': /* TDR='7' CLK=10:30 DEG=315 */
243 *cpPathEnd++ = 28; break;
244 case '8': /* TDR='8' CLK=12:00 DEG=360 */
245 *cpPathEnd++ = 0; break;
246 case '9': /* TDR='9' CLK=01:30 DEG=045 */
247 *cpPathEnd++ = 4; break;
248 case 'x': /* TDR='x' down 06:00 then 07:30 */
249 *cpPathEnd++ = 16; *cpPathEnd++ = 20; break;
250 case 'y': /* TDR='y' down 06:00 then 04:30 */
251 *cpPathEnd++ = 16; *cpPathEnd++ = 12; break;
252 case 'c': /* TDR='c' down 06:00 then 03:00 */
253 *cpPathEnd++ = 16; *cpPathEnd++ = 8; break;
254 case 'b': /* TDR='b' across 03:00 then 06:00 */
255 *cpPathEnd++ = 8; *cpPathEnd++ = 16; break;
256 default:
257 goto NoMoreStrokes;
258 } /* end switch on first char value */
260 for (stroke_ptr++; ; stroke_ptr++) { /* Loop through following chars for stroke */
261 switch (*stroke_ptr) { /* Break out on char value... */
262 case '1': /* TDR='1' CLK=07:30 DEG=225 */
263 *cpPathEnd++ = 20; break;
264 case '2': /* TDR='2' CLK=06:00 DEG=180 */
265 *cpPathEnd++ = 16; break;
266 case '3': /* TDR='3' CLK=04:30 DEG=135 */
267 *cpPathEnd++ = 12; break;
268 case '4': /* TDR='4' CLK=09:00 DEG=270 */
269 *cpPathEnd++ = 24; break;
270 case '6': /* TDR='6' CLK=03:00 DEG=090 */
271 *cpPathEnd++ = 8; break;
272 case '7': /* TDR='7' CLK=10:30 DEG=315 */
273 *cpPathEnd++ = 28; break;
274 case '8': /* TDR='8' CLK=12:00 DEG=360 */
275 *cpPathEnd++ = 0; break;
276 case '9': /* TDR='9' CLK=01:30 DEG=045 */
277 *cpPathEnd++ = 4; break;
278 case 'x': /* TDR='x' down 06:00 then 07:30 */
279 *cpPathEnd++ = 16; *cpPathEnd++ = 20; break;
280 case 'y': /* TDR='y' down 06:00 then 04:30 */
281 *cpPathEnd++ = 16; *cpPathEnd++ = 12; break;
282 case 'c': /* TDR='c' down 06:00 then 03:00 */
283 *cpPathEnd++ = 16; *cpPathEnd++ = 8; break;
284 case 'b': /* TDR='b' across 03:00 then 06:00 */
285 *cpPathEnd++ = 8; *cpPathEnd++ = 16; break;
286 default:
287 goto ThisStrokeDone;
288 } /* end switch on char value */
289 } /* end loop through chars for stroke */
290 ThisStrokeDone:
292 rsp = &(pScorer->m_pRawStrokes[iStroke]);
294 iThisScore = StrokeDicScoreStroke(rsp->m_x, rsp->m_y, rsp->m_len,
295 cpPath, (cpPathEnd - cpPath),
296 0 /*depth*/);
298 MemoWrite2d(" s", iStroke+1);
299 MemoWrite2d("=", iThisScore); /* DEBUG: stroke score */
301 if (iThisScore >= diMaxScoreToSquare)
302 iThisScore = diMaxScoreSquared;
303 else
304 iThisScore = (iThisScore * iThisScore);
306 if (iScore >= (diMaxScoreSquared - iThisScore))
307 iScore = diMaxScoreSquared;
308 else
309 iScore += iThisScore;
311 } /* end loop through stroke descriptions */
312 NoMoreStrokes:
314 iScore = SqrtULong(iScore);
315 *ipScore = iScore;
317 MemoWrite2d(" is=", iScore); /* DEBUG: overall stroke score */
319 /* Handle optional extra filters... may modify *ipScore, updates cp. */
320 if (filter_ptr) {
321 filter_ptr = StrokeScorerExtraFilters(pScorer, filter_ptr, ipScore);
324 if (iStroke != pScorer->m_iStrokeCnt)
325 ErrBox("JStrokeDic miscount");
328 MemoWrite2d(" fs=", *ipScore); /* DEBUG: final score */
329 MemoWrite("\n");
332 /* ----- StrokeScorerTopPicks -----------------------------------------------*/
333 /* Return best diMaxListCount candidates processed so far */
335 ListMem* StrokeScorerTopPicks (StrokeScorer *pScorer)
337 ListMem* pListMem;
338 UInt iListSize;
339 UInt iat;
340 ScoreItemPtr pScore, pScoreBase;
342 if (!pScorer) {
343 ErrBox("StrokeScorerTopPicks: pScorer == NULL.");
344 return NULL;
347 /* If no list, just return an empty list. */
348 if (pScorer->m_iScoreLen <= 0)
349 return AppEmptyList();
351 pScoreBase = pScorer->m_pScores;
353 /* One extra for NULL terminator */
354 iListSize = (pScorer->m_iScoreLen)*sizeof(CharPtr);
356 if (!(pListMem = MemPtrNew(sizeof(ListMem) + iListSize))) {
357 ErrBox("ERROR: no mem in top picks");
358 return NULL;
361 pListMem->m_argc = pScorer->m_iScoreLen;
363 iat=0;
364 for (pScore = pScoreBase; pScore < (pScoreBase+pScorer->m_iScoreLen); pScore++)
366 char buf[1000];
368 //snprintf(buf,1000,"%s #%lu", pScore->m_cp->m_character, pScore->m_iScore);
369 snprintf(buf,1000,"%s", pScore->m_cp->m_character);
371 pListMem->m_argv[iat]=strdup(buf);
373 if(!pListMem->m_argv[iat])
375 ErrBox("ERROR: no mem in top picks");
376 return NULL;
379 ++iat;
381 pListMem->m_argv[iat]=0;
383 return pListMem;
387 /* ----- StrokeDicScoreStroke ---------------------------------------------- */
389 ULong StrokeDicScoreStroke(Byte* bpX, Byte* bpY, UInt iLen,
390 CharPtr cpPath, UInt iPathLen,
391 UInt iDepth) {
392 ULong iScore, iThisScore;
393 Long iMid, iStep, iPathMid, iPathRest;
394 Long iDifX, iDifY;
395 UInt iAng32, iPath32, iDif32;
397 //small values of iLen can cause the following algorithm to underflow
398 if (iLen < 2 || iPathLen < 1 || iLen < ((iPathLen+1)>>1) )
399 return diHugeCost;
401 if (iPathLen == 1) {
402 iDifX = bpX[iLen-1] - bpX[0];
403 iDifY = bpY[0] - bpY[iLen-1]; /* Flip from display to math axes. */
405 if (iDifX == 0 && iDifY == 0) /* Two samples at same place... */
406 return diHugeCost;
408 /* Subdivide recursively while stroke is long and depth is shallow.
409 * $$$ These values are pretty magic... review later. -rwells, 970719.
410 * TDR used 20*20... -rwells, 970719.
412 if ((iDifX*iDifX + iDifY*iDifY) > (20*20) && iLen > 5 && iDepth < 4) {
414 iMid = iLen >> 1;
416 /* Note that we use the middle point on both sides... */
418 iScore = StrokeDicScoreStroke(bpX, bpY, iMid+1,
419 cpPath, iPathLen, iDepth+1);
421 iScore += StrokeDicScoreStroke(bpX+iMid, bpY+iMid, iLen-iMid,
422 cpPath, iPathLen, iDepth+1);
424 return (iScore >> 1);
426 } /* End if stroke is long, and depth is shallow... */
428 /* Time to score this segment against desired direction. */
430 iAng32 = Angle32(iDifX, iDifY);
432 iPath32 = *cpPath;
434 if (iAng32 >= iPath32)
435 iDif32 = iAng32 - iPath32;
436 else
437 iDif32 = iPath32 - iAng32;
439 return iDif32 * diAngCostScale + diAngCostBase;
441 } /* end if path len is 1. */
442 else {
444 iScore = diHugeCost * iPathLen * 2;
445 iPathMid = iPathLen >> 1;
446 iPathRest = iPathLen - iPathMid;
448 if (iLen < 20)
449 iStep = 1;
450 else
451 iStep = iLen / 10;
453 for (iMid = iPathMid; iMid < (Long) (iLen - iPathRest); iMid += iStep) {
455 /* TDR original doesn't increase iDepth... -rwells, 970719. */
457 iThisScore = StrokeDicScoreStroke(bpX, bpY, iMid+1,
458 cpPath, iPathMid, iDepth+1);
460 iThisScore += StrokeDicScoreStroke(bpX+iMid, bpY+iMid, iLen-iMid,
461 cpPath+iPathMid, iPathRest,
462 iDepth+1);
464 /* TDR original doesn't divide sum by 2... -rwells, 970719. */
465 iThisScore >>= 1;
467 if (iThisScore < iScore)
468 iScore = iThisScore;
470 } /* end for trials on various mid-point divisions of stroke... */
472 return iScore;
473 } /* end if path len is >1. */
476 /* ----- StrokeScorerExtraFilters ---------------------------------------------*/
478 CharPtr StrokeScorerExtraFilters(StrokeScorer *pScorer,
479 CharPtr cp, ULong* ipScore /*OUT*/) {
480 char c;
481 char cArg[2];
482 Byte iStroke[2];
483 Long iDiff, iVal[2];
484 UInt idx = 0;
485 Boolean bMust = false;
487 MemoWrite(" F(");
489 cArg[0] = cArg[1] = 0;
490 iStroke[0] = iStroke[1] = 0;
492 /* Simple parser for Filter strings. assumes a1-b1 structure,
493 * where a and b can be any single alphabetic cmd char, the
494 * numbers can be multiple digit, and b1 can optionally be
495 * followed by '!' to insist on the filter passing. There can
496 * be multiple filters but they have to be separated by '!' or
497 * space(s). The filter string is terminated by a null byte
498 * or an 8-bit char, the beginning of the next entry.
499 * Leading spaces and trailing spaces are ignored. -rwells, 970722.
502 for (c = *cp; true; cp++, c = *cp) {
503 switch (c) {
505 case 'x':
506 case 'y':
507 case 'i':
508 case 'j':
509 case 'a':
510 case 'b':
511 case 'l':
512 cArg[idx] = c;
514 MemoWrite2d(" c",idx);
515 MemoWrite("=");
516 MemoWriteLen(cArg+idx, 1);
517 break;
519 case '0':
520 case '1':
521 case '2':
522 case '3':
523 case '4':
524 case '5':
525 case '6':
526 case '7':
527 case '8':
528 case '9':
529 iStroke[idx] = iStroke[idx] * 10 + (c - '0');
530 MemoWrite2d(":", iStroke[idx]);
531 break;
533 case '-': /* Switch to second arg when we see minus. */
534 idx = 1;
535 MemoWrite(" - ");
536 break;
538 case '!':
539 bMust = true;
540 MemoWrite(" ! ");
541 /* FALLTHRU */
543 case ' ':
544 default:
545 MemoWrite2d(" @", idx);
546 MemoWrite(")");
548 /* If we are in the second argument, execute and reset. */
549 if (idx == 1) {
550 iVal[0] = StrokeScorerExtraEval(pScorer, cArg[0], iStroke[0]);
551 iVal[1] = StrokeScorerExtraEval(pScorer, cArg[1], iStroke[1]);
552 iDiff = (iVal[0] - iVal[1]);
554 MemoWrite(" ");
555 MemoWriteLen(cArg+0, 1);
556 MemoWrite2d("", iStroke[0]);
557 MemoWrite2d(":", iVal[0]);
559 MemoWrite("-");
560 MemoWriteLen(cArg+1, 1);
561 MemoWrite2d("", iStroke[1]);
562 MemoWrite2d(":", iVal[1]);
564 MemoWrite2d("=", iDiff);
566 if (iDiff < 0) {
567 iDiff = -iDiff;
568 if (bMust)
569 iDiff = 9999999;
570 if (*ipScore < (diMaxScoreSquared-iDiff))
571 *ipScore += iDiff;
572 else
573 *ipScore = diMaxScoreSquared;
575 else {
576 if ((Long) *ipScore > iDiff)
577 *ipScore -= iDiff;
578 else
579 *ipScore = 0;
582 MemoWrite2d(" ips=", *ipScore);
584 /* Reset state for next filter... */
585 idx = 0;
586 bMust = false;
587 cArg[0] = cArg[1] = 0;
588 iStroke[0] = iStroke[1] = 0;
591 /* If this is a terminating char, break out of loop and return. */
592 if ((c & 0x80) || (c == '\0'))
593 goto ReturnNow;
594 } /* end switch on char */
595 } /* end for each char in filter spec... */
596 ReturnNow:
597 return cp;
600 /* ----- StrokeScorerExtraEval ------------------------------------------------*/
602 Long StrokeScorerExtraEval(StrokeScorer *pScorer,
603 char cArg, UInt iStroke) {
604 Long iVal, iSquared;
605 Byte* bpX;
606 Byte* bpY;
607 UInt iLen;
608 RawStroke* rsp;
610 iStroke--; /* Formula strokes are 1-based, my stroke
611 * array is 0-based. */
613 if (iStroke >= pScorer->m_iStrokeCnt)
614 return 0; /* Should not happen... */
617 rsp = pScorer->m_pRawStrokes + iStroke;
618 iLen = rsp->m_len;
619 bpX = rsp->m_x;
620 bpY = rsp->m_y;
622 /* Heretofore we have left the coordinates as we got them, screen
623 * coordinates with 0,0 in upper left, y increasing downward,
624 * x increasing to the right. And the extra filters agree with this.
625 * -rwells, 970723.
627 * Removed the subtraction of Rect_x, Rect_y here - it will mess
628 * up debugging messages if the client doesn't subtract them before-
629 * hand, but otherwise should be harmless.
630 * - OWT, 970903
633 switch(cArg) {
634 case 'x':
635 return bpX[0];
636 case 'y':
637 return bpY[0];
638 case 'i':
639 return bpX[iLen-1];
640 case 'j':
641 return bpY[iLen-1];
642 case 'a':
643 return ((bpX[0] + bpX[iLen-1]) >> 1);
644 case 'b':
645 return ((bpY[0] + bpY[iLen-1]) >> 1);
647 case 'l':
648 /* These are byte values - they can't get very large,
649 * so don't need to check against diMaxScoreToSquare...
651 iSquared = (((Long) bpX[0]) - ((Long)bpX[iLen-1]));
652 iVal = iSquared * iSquared;
654 iSquared = (((Long) bpY[0]) - ((Long)bpY[iLen-1]));
655 iVal += iSquared * iSquared;
657 return (Long) SqrtULong((Long) iVal);
660 return 0; /* Unrecognized cArg cmd char. */
662 /* ----- end of scoring.c --------------------------------------------------*/