1 /* -*- mode: C; c-file-style: "bsd"; tab-width: 4 -*- */
2 /* scoring.c - The handwriting recognition engine
3 * JStroke 1.x - Japanese Kanji handwriting recognition technology demo.
4 * Copyright (C) 1997 Robert E. Wells
5 * http://wellscs.com/pilot
6 * mailto:robert@wellscs.com
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version 2
11 * of the License, or (at your option) any later version.
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with this program (gpl.html); if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
22 * Derived from prior work by Todd David Rudick on JavaDict and StrokeDic.
23 * Makes use of KANJIDIC data from Jim Breen of Monash University.
24 * Further credit details available at http://wellscs.com/pilot
25 * See readme.txt, changelo, and gpl.html for more information.
26 * -------------------------------------------------------------------------*/
31 #include "memowrite.h"
33 #define diAngCostBase 52 // See angles.pl for derivation.
34 #define diAngCostScale 98 // See angles.pl for derivation.
35 #define diHugeCost (((((ULong)24)*diAngCostScale)+diAngCostBase)*100)
37 #define diMaxScoreToSquare ((ULong) 0xffff)
38 #define diMaxScoreSquared (diMaxScoreToSquare*diMaxScoreToSquare)
40 #define diPathBufLen 16
42 void StrokeScorerEvalItem(StrokeScorer
*pScorer
, StrokeDicEntryList
*cpEntry
,
43 ULong
* ipScore
/*OUT*/);
45 ULong
StrokeDicScoreStroke(Byte
* bpX
, Byte
* bpY
, UInt iLen
,
46 CharPtr cpPath
, UInt iPathLen
,
49 CharPtr
StrokeScorerExtraFilters(StrokeScorer
*pscorer
,
50 CharPtr cp
, ULong
* ipScore
/*OUT*/);
52 Long
StrokeScorerExtraEval(StrokeScorer
*pscorer
,
53 char cArg
, UInt iStroke
);
55 ULong
SqrtULong(ULong val
);
57 /* ----- SqrtULong ---------------------------------------------------------*/
59 ULong
SqrtULong(ULong val
) {
60 ULong root
, rootLo
, rootHi
;
66 else if (val
>= diMaxScoreSquared
)
67 return diMaxScoreToSquare
;
70 rootHi
= (val
>> 1); /* will be >= 1, since val > 1. */
71 if (rootHi
> diMaxScoreToSquare
)
72 rootHi
= diMaxScoreToSquare
;
74 while ((root
= ((rootLo
+ rootHi
) >> 1)) > rootLo
) {
75 if ((root
* root
) <= val
) {
86 /* ----- StrokeScorerCreate-------------------------------------------------*/
87 /* Create a StrokeScorer object. (Returns NULL if can't get memory) */
89 StrokeScorer
*StrokeScorerCreate (StrokeDic cpStrokeDic
, RawStroke
*rsp
,
91 StrokeScorer
*pScorer
= (StrokeScorer
*) MemPtrNew(sizeof(StrokeScorer
));
93 ErrBox("Not enough memory.");
97 pScorer
->m_cpStrokeDic
= cpStrokeDic
;
98 pScorer
->m_pRawStrokes
= rsp
;
99 pScorer
->m_iStrokeCnt
= iStrokeCnt
;
100 pScorer
->m_iScoreLen
= 0;
102 pScorer
->m_pScores
= (ScoreItemPtr
) MemPtrNew(diMaxListCount
*sizeof(StrokeScorer
));
104 if (!pScorer
->m_pScores
) {
105 ErrBox("Not enough memory.");
110 pScorer
->m_cpPath
= MemPtrNew(diPathBufLen
+1);
112 if (!pScorer
->m_cpPath
) {
113 ErrBox("Not enough memory.");
114 MemPtrFree(pScorer
->m_pScores
);
122 /* ----- StrokeScorerDestroy-------------------------------------------------*/
123 /* Destroy a StrokeScorer object */
125 void StrokeScorerDestroy (StrokeScorer
*pScorer
) {
127 MemPtrFree (pScorer
->m_pScores
);
128 MemPtrFree (pScorer
->m_cpPath
);
129 MemPtrFree (pScorer
);
133 /* ----- StrokeScorerProcess-------------------------------------------------*/
134 /* Process some database entries (maximum iMaxCnt, -1 for all).
135 Returns 0 when none remaining (should eventually return count remaining
136 to facilitate a progressbar */
138 Long
StrokeScorerProcess (StrokeScorer
*pScorer
, Long iMaxCnt
) {
139 StrokeDicEntryList
*cp
;
142 ScoreItemPtr pScore
, pScoreBase
, pSrc
;
145 ErrBox("StrokeScorerProcess: pScorer == NULL.");
149 pScoreBase
= pScorer
->m_pScores
;
151 /* Evaluate all the items in cpStrokeDic against Context,
152 * and update ScoreItems list as we go.
156 for (cp
= pScorer
->m_cpStrokeDic
; cp
; cp
= cp
->m_next
)
160 if (iMaxCnt
>= 0 && iCnt
> iMaxCnt
)
163 StrokeScorerEvalItem(pScorer
, cp
, &iScore
);
165 for (pScore
= pScoreBase
+pScorer
->m_iScoreLen
-1;
166 pScore
>=pScoreBase
; pScore
--) {
167 if (iScore
>= pScore
->m_iScore
)
172 /* If we have a top score, lets register it. */
173 if (pScore
< (pScoreBase
+ diMaxListCount
)) {
175 /* Increase the score list length if it isn't full yet. */
176 if (pScorer
->m_iScoreLen
< diMaxListCount
)
177 pScorer
->m_iScoreLen
++;
179 /* Push down all lower scores in the list to make room. */
180 for (pSrc
= pScoreBase
+pScorer
->m_iScoreLen
-2; pSrc
>= pScore
; pSrc
--) {
181 pSrc
[1].m_iScore
= pSrc
->m_iScore
;
182 pSrc
[1].m_cp
= pSrc
->m_cp
;
185 /* Actually store our info in the list. */
186 pScore
->m_iScore
= iScore
;
187 pScore
->m_cp
= &cp
->m_entry
;
190 } /* for each stroke description... */
193 return 1; /* should be count remaining */
198 /* ----- StrokeScorerEvalItem -----------------------------------------------*/
200 void StrokeScorerEvalItem(StrokeScorer
*pScorer
, StrokeDicEntryList
*cpEntry
,
201 ULong
* ipScore
/*OUT*/) {
202 StrokeDicEntryList
*cp
= cpEntry
;
203 CharPtr stroke_ptr
= cp
->m_entry
.m_strokes
;
204 CharPtr filter_ptr
= cp
->m_entry
.m_filters
;
206 CharPtr cpPath
= pScorer
->m_cpPath
;
212 // MemoWriteLen(pScorer->m_cpStrokeDic, 2); /* DEBUG: tag trace with SJIS char. */
214 // if (*cp) cp++; /* Skip over first half SJIS char. */
215 // if (*cp) cp++; /* Skip over second half SJIS char. */
217 /* The first char must have high order bit set,
218 * and the second char MAY have high order bit set,
219 * but a subsequent char with high order bit set must be
220 * the beginning of the next entry. -rwells, 970712.
223 /* Loop through stroke descriptions */
224 for (iStroke
= 0; iStroke
< pScorer
->m_iStrokeCnt
; iStroke
++) {
228 while(*stroke_ptr
==' '||*stroke_ptr
=='\t')
231 switch (*stroke_ptr
) { /* Break out on first char value... */
232 case '1': /* TDR='1' CLK=07:30 DEG=225 */
233 *cpPathEnd
++ = 20; break;
234 case '2': /* TDR='2' CLK=06:00 DEG=180 */
235 *cpPathEnd
++ = 16; break;
236 case '3': /* TDR='3' CLK=04:30 DEG=135 */
237 *cpPathEnd
++ = 12; break;
238 case '4': /* TDR='4' CLK=09:00 DEG=270 */
239 *cpPathEnd
++ = 24; break;
240 case '6': /* TDR='6' CLK=03:00 DEG=090 */
241 *cpPathEnd
++ = 8; break;
242 case '7': /* TDR='7' CLK=10:30 DEG=315 */
243 *cpPathEnd
++ = 28; break;
244 case '8': /* TDR='8' CLK=12:00 DEG=360 */
245 *cpPathEnd
++ = 0; break;
246 case '9': /* TDR='9' CLK=01:30 DEG=045 */
247 *cpPathEnd
++ = 4; break;
248 case 'x': /* TDR='x' down 06:00 then 07:30 */
249 *cpPathEnd
++ = 16; *cpPathEnd
++ = 20; break;
250 case 'y': /* TDR='y' down 06:00 then 04:30 */
251 *cpPathEnd
++ = 16; *cpPathEnd
++ = 12; break;
252 case 'c': /* TDR='c' down 06:00 then 03:00 */
253 *cpPathEnd
++ = 16; *cpPathEnd
++ = 8; break;
254 case 'b': /* TDR='b' across 03:00 then 06:00 */
255 *cpPathEnd
++ = 8; *cpPathEnd
++ = 16; break;
258 } /* end switch on first char value */
260 for (stroke_ptr
++; ; stroke_ptr
++) { /* Loop through following chars for stroke */
261 switch (*stroke_ptr
) { /* Break out on char value... */
262 case '1': /* TDR='1' CLK=07:30 DEG=225 */
263 *cpPathEnd
++ = 20; break;
264 case '2': /* TDR='2' CLK=06:00 DEG=180 */
265 *cpPathEnd
++ = 16; break;
266 case '3': /* TDR='3' CLK=04:30 DEG=135 */
267 *cpPathEnd
++ = 12; break;
268 case '4': /* TDR='4' CLK=09:00 DEG=270 */
269 *cpPathEnd
++ = 24; break;
270 case '6': /* TDR='6' CLK=03:00 DEG=090 */
271 *cpPathEnd
++ = 8; break;
272 case '7': /* TDR='7' CLK=10:30 DEG=315 */
273 *cpPathEnd
++ = 28; break;
274 case '8': /* TDR='8' CLK=12:00 DEG=360 */
275 *cpPathEnd
++ = 0; break;
276 case '9': /* TDR='9' CLK=01:30 DEG=045 */
277 *cpPathEnd
++ = 4; break;
278 case 'x': /* TDR='x' down 06:00 then 07:30 */
279 *cpPathEnd
++ = 16; *cpPathEnd
++ = 20; break;
280 case 'y': /* TDR='y' down 06:00 then 04:30 */
281 *cpPathEnd
++ = 16; *cpPathEnd
++ = 12; break;
282 case 'c': /* TDR='c' down 06:00 then 03:00 */
283 *cpPathEnd
++ = 16; *cpPathEnd
++ = 8; break;
284 case 'b': /* TDR='b' across 03:00 then 06:00 */
285 *cpPathEnd
++ = 8; *cpPathEnd
++ = 16; break;
288 } /* end switch on char value */
289 } /* end loop through chars for stroke */
292 rsp
= &(pScorer
->m_pRawStrokes
[iStroke
]);
294 iThisScore
= StrokeDicScoreStroke(rsp
->m_x
, rsp
->m_y
, rsp
->m_len
,
295 cpPath
, (cpPathEnd
- cpPath
),
298 MemoWrite2d(" s", iStroke
+1);
299 MemoWrite2d("=", iThisScore
); /* DEBUG: stroke score */
301 if (iThisScore
>= diMaxScoreToSquare
)
302 iThisScore
= diMaxScoreSquared
;
304 iThisScore
= (iThisScore
* iThisScore
);
306 if (iScore
>= (diMaxScoreSquared
- iThisScore
))
307 iScore
= diMaxScoreSquared
;
309 iScore
+= iThisScore
;
311 } /* end loop through stroke descriptions */
314 iScore
= SqrtULong(iScore
);
317 MemoWrite2d(" is=", iScore
); /* DEBUG: overall stroke score */
319 /* Handle optional extra filters... may modify *ipScore, updates cp. */
321 filter_ptr
= StrokeScorerExtraFilters(pScorer
, filter_ptr
, ipScore
);
324 if (iStroke
!= pScorer
->m_iStrokeCnt
)
325 ErrBox("JStrokeDic miscount");
328 MemoWrite2d(" fs=", *ipScore
); /* DEBUG: final score */
332 /* ----- StrokeScorerTopPicks -----------------------------------------------*/
333 /* Return best diMaxListCount candidates processed so far */
335 ListMem
* StrokeScorerTopPicks (StrokeScorer
*pScorer
)
340 ScoreItemPtr pScore
, pScoreBase
;
343 ErrBox("StrokeScorerTopPicks: pScorer == NULL.");
347 /* If no list, just return an empty list. */
348 if (pScorer
->m_iScoreLen
<= 0)
349 return AppEmptyList();
351 pScoreBase
= pScorer
->m_pScores
;
353 /* One extra for NULL terminator */
354 iListSize
= (pScorer
->m_iScoreLen
)*sizeof(CharPtr
);
356 if (!(pListMem
= MemPtrNew(sizeof(ListMem
) + iListSize
))) {
357 ErrBox("ERROR: no mem in top picks");
361 pListMem
->m_argc
= pScorer
->m_iScoreLen
;
364 for (pScore
= pScoreBase
; pScore
< (pScoreBase
+pScorer
->m_iScoreLen
); pScore
++)
368 //snprintf(buf,1000,"%s #%lu", pScore->m_cp->m_character, pScore->m_iScore);
369 snprintf(buf
,1000,"%s", pScore
->m_cp
->m_character
);
371 pListMem
->m_argv
[iat
]=strdup(buf
);
373 if(!pListMem
->m_argv
[iat
])
375 ErrBox("ERROR: no mem in top picks");
381 pListMem
->m_argv
[iat
]=0;
387 /* ----- StrokeDicScoreStroke ---------------------------------------------- */
389 ULong
StrokeDicScoreStroke(Byte
* bpX
, Byte
* bpY
, UInt iLen
,
390 CharPtr cpPath
, UInt iPathLen
,
392 ULong iScore
, iThisScore
;
393 Long iMid
, iStep
, iPathMid
, iPathRest
;
395 UInt iAng32
, iPath32
, iDif32
;
397 //small values of iLen can cause the following algorithm to underflow
398 if (iLen
< 2 || iPathLen
< 1 || iLen
< ((iPathLen
+1)>>1) )
402 iDifX
= bpX
[iLen
-1] - bpX
[0];
403 iDifY
= bpY
[0] - bpY
[iLen
-1]; /* Flip from display to math axes. */
405 if (iDifX
== 0 && iDifY
== 0) /* Two samples at same place... */
408 /* Subdivide recursively while stroke is long and depth is shallow.
409 * $$$ These values are pretty magic... review later. -rwells, 970719.
410 * TDR used 20*20... -rwells, 970719.
412 if ((iDifX
*iDifX
+ iDifY
*iDifY
) > (20*20) && iLen
> 5 && iDepth
< 4) {
416 /* Note that we use the middle point on both sides... */
418 iScore
= StrokeDicScoreStroke(bpX
, bpY
, iMid
+1,
419 cpPath
, iPathLen
, iDepth
+1);
421 iScore
+= StrokeDicScoreStroke(bpX
+iMid
, bpY
+iMid
, iLen
-iMid
,
422 cpPath
, iPathLen
, iDepth
+1);
424 return (iScore
>> 1);
426 } /* End if stroke is long, and depth is shallow... */
428 /* Time to score this segment against desired direction. */
430 iAng32
= Angle32(iDifX
, iDifY
);
434 if (iAng32
>= iPath32
)
435 iDif32
= iAng32
- iPath32
;
437 iDif32
= iPath32
- iAng32
;
439 return iDif32
* diAngCostScale
+ diAngCostBase
;
441 } /* end if path len is 1. */
444 iScore
= diHugeCost
* iPathLen
* 2;
445 iPathMid
= iPathLen
>> 1;
446 iPathRest
= iPathLen
- iPathMid
;
453 for (iMid
= iPathMid
; iMid
< (Long
) (iLen
- iPathRest
); iMid
+= iStep
) {
455 /* TDR original doesn't increase iDepth... -rwells, 970719. */
457 iThisScore
= StrokeDicScoreStroke(bpX
, bpY
, iMid
+1,
458 cpPath
, iPathMid
, iDepth
+1);
460 iThisScore
+= StrokeDicScoreStroke(bpX
+iMid
, bpY
+iMid
, iLen
-iMid
,
461 cpPath
+iPathMid
, iPathRest
,
464 /* TDR original doesn't divide sum by 2... -rwells, 970719. */
467 if (iThisScore
< iScore
)
470 } /* end for trials on various mid-point divisions of stroke... */
473 } /* end if path len is >1. */
476 /* ----- StrokeScorerExtraFilters ---------------------------------------------*/
478 CharPtr
StrokeScorerExtraFilters(StrokeScorer
*pScorer
,
479 CharPtr cp
, ULong
* ipScore
/*OUT*/) {
485 Boolean bMust
= false;
489 cArg
[0] = cArg
[1] = 0;
490 iStroke
[0] = iStroke
[1] = 0;
492 /* Simple parser for Filter strings. assumes a1-b1 structure,
493 * where a and b can be any single alphabetic cmd char, the
494 * numbers can be multiple digit, and b1 can optionally be
495 * followed by '!' to insist on the filter passing. There can
496 * be multiple filters but they have to be separated by '!' or
497 * space(s). The filter string is terminated by a null byte
498 * or an 8-bit char, the beginning of the next entry.
499 * Leading spaces and trailing spaces are ignored. -rwells, 970722.
502 for (c
= *cp
; true; cp
++, c
= *cp
) {
514 MemoWrite2d(" c",idx
);
516 MemoWriteLen(cArg
+idx
, 1);
529 iStroke
[idx
] = iStroke
[idx
] * 10 + (c
- '0');
530 MemoWrite2d(":", iStroke
[idx
]);
533 case '-': /* Switch to second arg when we see minus. */
545 MemoWrite2d(" @", idx
);
548 /* If we are in the second argument, execute and reset. */
550 iVal
[0] = StrokeScorerExtraEval(pScorer
, cArg
[0], iStroke
[0]);
551 iVal
[1] = StrokeScorerExtraEval(pScorer
, cArg
[1], iStroke
[1]);
552 iDiff
= (iVal
[0] - iVal
[1]);
555 MemoWriteLen(cArg
+0, 1);
556 MemoWrite2d("", iStroke
[0]);
557 MemoWrite2d(":", iVal
[0]);
560 MemoWriteLen(cArg
+1, 1);
561 MemoWrite2d("", iStroke
[1]);
562 MemoWrite2d(":", iVal
[1]);
564 MemoWrite2d("=", iDiff
);
570 if (*ipScore
< (diMaxScoreSquared
-iDiff
))
573 *ipScore
= diMaxScoreSquared
;
576 if ((Long
) *ipScore
> iDiff
)
582 MemoWrite2d(" ips=", *ipScore
);
584 /* Reset state for next filter... */
587 cArg
[0] = cArg
[1] = 0;
588 iStroke
[0] = iStroke
[1] = 0;
591 /* If this is a terminating char, break out of loop and return. */
592 if ((c
& 0x80) || (c
== '\0'))
594 } /* end switch on char */
595 } /* end for each char in filter spec... */
600 /* ----- StrokeScorerExtraEval ------------------------------------------------*/
602 Long
StrokeScorerExtraEval(StrokeScorer
*pScorer
,
603 char cArg
, UInt iStroke
) {
610 iStroke
--; /* Formula strokes are 1-based, my stroke
611 * array is 0-based. */
613 if (iStroke
>= pScorer
->m_iStrokeCnt
)
614 return 0; /* Should not happen... */
617 rsp
= pScorer
->m_pRawStrokes
+ iStroke
;
622 /* Heretofore we have left the coordinates as we got them, screen
623 * coordinates with 0,0 in upper left, y increasing downward,
624 * x increasing to the right. And the extra filters agree with this.
627 * Removed the subtraction of Rect_x, Rect_y here - it will mess
628 * up debugging messages if the client doesn't subtract them before-
629 * hand, but otherwise should be harmless.
643 return ((bpX
[0] + bpX
[iLen
-1]) >> 1);
645 return ((bpY
[0] + bpY
[iLen
-1]) >> 1);
648 /* These are byte values - they can't get very large,
649 * so don't need to check against diMaxScoreToSquare...
651 iSquared
= (((Long
) bpX
[0]) - ((Long
)bpX
[iLen
-1]));
652 iVal
= iSquared
* iSquared
;
654 iSquared
= (((Long
) bpY
[0]) - ((Long
)bpY
[iLen
-1]));
655 iVal
+= iSquared
* iSquared
;
657 return (Long
) SqrtULong((Long
) iVal
);
660 return 0; /* Unrecognized cArg cmd char. */
662 /* ----- end of scoring.c --------------------------------------------------*/