3 # SpeakGoodChinese: SGC_ToneRecognizer.praat processes student utterances
4 # and generates a report on their tone production
6 # Copyright (C) 2007 R.J.J.H. van Son
7 # The SpeakGoodChinese team are:
8 # Guangqin Chen, Zhonyan Chen, Stefan de Koning, Eveline van Hagen,
9 # Rob van Son, Dennis Vierkant, David Weenink
11 # This program is free software; you can redistribute it and/or modify
12 # it under the terms of the GNU General Public License as published by
13 # the Free Software Foundation; either version 2 of the License, or
14 # (at your option) any later version.
16 # This program is distributed in the hope that it will be useful,
17 # but WITHOUT ANY WARRANTY; without even the implied warranty of
18 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 # GNU General Public License for more details.
21 # You should have received a copy of the GNU General Public License
22 # along with this program; if not, write to the Free Software
23 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
26 form Mandarin Tone recognition
27 word currentSound lastExample.wav
30 positive precision_(st) 3
36 boolean logPerformance
39 include ToneRecognition.praat
41 # Check if log/logPerfomance* exists
42 # If it does, do log the performance
43 Create Strings as file list... logList log/logPerformance*
44 number_of_logfiles = Get number of strings
45 if number_of_logfiles > 0
49 select Strings logList
54 pinyin$ = replace_regex$(pinyin$, "^\s*(.+)\s*$", "\1", 1)
55 pinyin$ = replace_regex$(pinyin$, "5", "0", 0)
58 # Reduction (lower register and narrow range) means errors
59 # The oposite mostly not. Asymmetry alows more room upward
60 # than downward (asymmetry = 2 => highBoundaryFactor ^ 2)
63 # Kill octave jumps: DANGEROUS
65 # Silence soft noise: DANGEROUS
83 # Stick to the raw recognition results or not
86 currentTestWord$ = pinyin$
88 precisionFactor = 2^(precision/12)
89 highBoundaryFactor = precisionFactor ^ asymmetry
90 lowBoundaryFactor = 1/precisionFactor
92 # Generate reference example
93 # Start with a range of 1 octave and a speed factor of 1
96 upperRegisterInput = register
97 execute ToneScript.praat 'currentTestWord$' 'upperRegisterInput' 1 1 CorrectPitch
99 select Pitch 'currentTestWord$'
100 durationModel = Get total duration
101 maximumModelFzero = Get quantile... 0 0 0.95 Hertz
102 minimumModelFzero = Get quantile... 0 0 0.05 Hertz
104 if minimumModelFzero > 0
105 modelPitchRange = maximumModelFzero / minimumModelFzero
109 Read from file... 'currentSound$'
114 select Sound inputSound
115 soundlength = Get total duration
116 To TextGrid (silences)... 'minimumPitch' 0 -30 0.5 0.1 silent sounding
119 select TextGrid inputSound
120 numberofIntervals = Get number of intervals... 1
122 # Remove buzzing and other obnoxious sounds (if switched on)
123 for i from 1 to numberofIntervals
124 select TextGrid inputSound
125 value$ = Get label of interval... 1 'i'
126 begintime = Get starting point... 1 'i'
127 endtime = Get end point... 1 'i'
130 if value$ = "silent" and silenceSoftNoises > 0
131 select Sound inputSound
132 Set part to zero... 'begintime' 'endtime' at nearest zero crossing
136 # Select target speech
138 for i from 1 to numberofIntervals
139 select TextGrid inputSound
141 value$ = Get label of interval... 1 'i'
142 begintime = Get starting point... 1 'i'
143 endtime = Get end point... 1 'i'
145 if value$ != "silent"
146 if begintime > spacing / 2
147 begintime = begintime - (spacing / 2)
151 if endtime + (spacing / 2) < soundlength
152 endtime = endtime + (spacing / 2)
154 endtime = soundlength
157 select Sound inputSound
158 Extract part... 'begintime' 'endtime' Rectangular 1.0 no
161 newPower = Get power... 0 0
162 if newPower > maximumPower
167 select Sound newSource
169 maximumPower = newPower
171 select Sound newSource
177 select Sound inputSound
178 plus TextGrid inputSound
183 durationSource = Get total duration
184 # noprogress To Pitch (ac)... 0 'minimumPitch' 15 yes 0.2 0.6 0.02 0.5 0.3 'maximumPitch'
185 noprogress To Pitch... 0.0 'minimumPitch' 'maximumPitch'
186 Rename... SourcePitch
188 # It is rather dangerous to kill Octave errors, so be careful
189 if killOctaveJumps > 0
192 Rename... SourcePitch
193 select Pitch OldSource
197 # Remove all pitch points outside a band around the upper register
198 select Pitch SourcePitch
199 upperCutOff = 1.7*upperRegisterInput
200 lowerCutOff = upperRegisterInput/4
201 Formula... if self > 'upperCutOff' then -1 else self endif
202 Formula... if self < 'lowerCutOff' then -1 else self endif
205 select Pitch SourcePitch
206 maximumRecFzero = Get quantile... 0 0 0.95 Hertz
207 timeMaximum = Get time of maximum... 0 0 Hertz Parabolic
208 minimumRecFzero = Get quantile... 0 0 0.05 Hertz
209 timeMinimum = Get time of minimum... 0 0 Hertz Parabolic
210 if maximumRecFzero = undefined
211 # Read and select the feedbacktext
212 Read Table from tab-separated file... feedback/ToneFeedback.txt
213 Rename... ToneFeedback
214 numberOfFeedbackRows = Get number of rows
216 # Determine what should be told to the student
217 recognitionText$ = "'currentTestWord$': ???"
218 for i from 1 to numberOfFeedbackRows
219 select Table ToneFeedback
220 toneOne$ = Get value... 'i' T1
221 toneTwo$ = Get value... 'i' T2
222 toneText$ = Get value... 'i' Feedback
224 if toneOne$ = "NoSound"
225 feedbackText$ = toneText$
228 recognitionText$ > feedback.txt
229 newline$ >> feedback.txt
230 feedbackText$ >> feedback.txt
231 newline$ >> feedback.txt
233 select Table ToneFeedback
235 exit Error, nothing recorded
238 if minimumRecFzero > 0
239 recPitchRange = maximumRecFzero / minimumRecFzero
241 newUpperRegister = maximumRecFzero / maximumModelFzero * upperRegisterInput
242 newToneRange = recPitchRange / modelPitchRange
246 if newUpperRegister > highBoundaryFactor * upperRegisterInput
247 newUpperRegister = highBoundaryFactor * upperRegisterInput
248 registerUsed$ = "High"
249 elsif newUpperRegister < lowBoundaryFactor * upperRegisterInput
250 newUpperRegister = lowBoundaryFactor * upperRegisterInput
251 registerUsed$ = "Low"
253 if newToneRange > highBoundaryFactor
254 newToneRange = highBoundaryFactor
256 elsif newToneRange < lowBoundaryFactor
257 newToneRange = lowBoundaryFactor
258 rangeUsed$ = "Narrow"
262 if durationModel > spacing
263 speedFactor = (durationSource - spacing) / (durationModel - spacing)
267 newUpperRegister = round(newUpperRegister)
269 # Remove all pitch points outside a band around the upper register
270 select Pitch SourcePitch
271 upperCutOff = 1.5*newUpperRegister
272 lowerCutOff = newUpperRegister/3
273 Formula... if self > 'upperCutOff' then -1 else self endif
274 Formula... if self < 'lowerCutOff' then -1 else self endif
276 if killOctaveJumps > 0
277 Rename... OldSourcePitch
279 Rename... SourcePitch
280 select Pitch OldSourcePitch
284 # It is good to have the lowest and highest pitch frequencies
285 select Pitch SourcePitch
286 timeMaximum = Get time of maximum... 0 0 Hertz Parabolic
287 timeMinimum = Get time of minimum... 0 0 Hertz Parabolic
289 # Clean up the old example pitch
290 select Pitch 'currentTestWord$'
293 # Do the tone recognition
294 call FreeToneRecognition 'currentTestWord$' "REUSEPITCH" "" 'newUpperRegister' 'newToneRange' 'speedFactor'
295 #execute ToneScript.praat 'currentTestWord$' 'newUpperRegister' 'newToneRange' 'speedFactor' CorrectPitch
296 execute ToneScript.praat 'currentTestWord$' 'upperRegisterInput' 'newToneRange' 'speedFactor' CorrectPitch
299 originalRecognizedWord$ = choiceReference$
301 # First syllable: 2<->3 (6) exchanges (incl 6)
302 if rindex_regex(currentTestWord$, "^[a-zA-Z]+2[a-zA-Z]+[0-4]$") > 0
303 if rindex_regex(choiceReference$, "^[a-zA-Z]+[36][a-zA-Z]+[0-4]$") > 0
304 choiceReference$ = replace_regex$(choiceReference$, "[36]([a-zA-Z]+[0-4])$", "2\1", 0)
306 elsif rindex_regex(currentTestWord$, "^[a-zA-Z]+3[a-zA-Z]+[0-4]$") > 0
307 if rindex_regex(choiceReference$, "^[a-zA-Z]+[26][a-zA-Z]+[0-4]$") > 0
308 choiceReference$ = replace_regex$(choiceReference$, "[26]([a-zA-Z]+[0-4])$", "3\1", 0)
310 # A single second tone is often misidentified as a neutral tone,
311 # A real neutral tone would be too low or too narrow and be discarded
312 # Leaves us with erroneous tone 4
313 elsif rindex_regex(currentTestWord$, "^[a-zA-Z]+2$") > 0
314 if rindex_regex(choiceReference$, "^[a-zA-Z]+0$") > 0 and timeMinimum < timeMaximum
315 choiceReference$ = replace_regex$(choiceReference$, "0", "2", 0)
317 # A single fourth tone is often misidentified as a neutral tone,
318 # A real neutral tone would be too low or too narrow and be discarded
319 # Leaves us with erroneous tones 2 and 3
320 elsif rindex_regex(currentTestWord$, "^[a-zA-Z]+4$") > 0
321 if rindex_regex(choiceReference$, "^[a-zA-Z]+0$") > 0 and timeMaximum < timeMinimum
322 choiceReference$ = replace_regex$(choiceReference$, "0", "4", 0)
326 # Second (last) syllable, 0<->6 exchanges and 2<->3
327 # A recognized 0 after a 4 can be a 2: 4-0 => 4-2
328 if rindex_regex(currentTestWord$, "[a-zA-Z]+[4][a-zA-Z]+2$") > 0
329 if rindex_regex(choiceReference$, "[a-zA-Z]+[4][a-zA-Z]+[0]$") > 0
330 choiceReference$ = replace_regex$(choiceReference$, "[0]$", "2", 0)
333 # A final 6 after a valid tone is often a recognition error
334 # A final 6 can be a 0
335 if rindex_regex(currentTestWord$, "[a-zA-Z]+[0-9][a-zA-Z]+0$") > 0
336 if rindex_regex(choiceReference$, "[a-zA-Z]+[0-4][a-zA-Z]+6$") > 0
337 choiceReference$ = replace_regex$(choiceReference$, "6$", "0", 0)
339 # Second (last) syllable, 2<->3 exchanges after [23] tones
340 # A recognized 6 (or 3) after a valid tone [1-4] is mostly wrong, can be a 2
341 elsif rindex_regex(currentTestWord$, "[a-zA-Z]+[1-4][a-zA-Z]+2$") > 0
342 if rindex_regex(choiceReference$, "[a-zA-Z]+[1-4][a-zA-Z]+[36]$") > 0
343 choiceReference$ = replace_regex$(choiceReference$, "[36]$", "2", 0)
345 # A recognized 6 after a [23] is mostly wrong, can be a 3
346 elsif rindex_regex(currentTestWord$, "[a-zA-Z]+[23][a-zA-Z]+3$") > 0
347 if rindex_regex(choiceReference$, "[a-zA-Z]+[23][a-zA-Z]+[26]$") > 0
348 choiceReference$ = replace_regex$(choiceReference$, "[26]$", "3", 0)
350 # A recognized 6 after a [3] is mostly wrong, can be a 1
351 elsif rindex_regex(currentTestWord$, "[a-zA-Z]+[3][a-zA-Z]+1$") > 0
352 if rindex_regex(choiceReference$, "[a-zA-Z]+[3][a-zA-Z]+[6]$") > 0
353 choiceReference$ = replace_regex$(choiceReference$, "[6]$", "1", 0)
357 # Clean up odd things constructed with special cases
358 # Target is 3-3, but recognized is 2-3, which is CORRECT. Change it into 3-3
359 if rindex_regex(currentTestWord$, "[a-zA-Z]+[3][a-zA-Z]+[3]$") > 0
360 if rindex_regex(choiceReference$, "[a-zA-Z]+[2][a-zA-Z]+[3]$") > 0
361 choiceReference$ = replace_regex$(choiceReference$, "[2]([a-zA-Z]+[3])$", "3\1", 0)
366 # If wrong, then undo all changes
367 if currentTestWord$ != choiceReference$
368 choiceReference$ = originalRecognizedWord$
371 ###############################################
373 # Experimental recognition of initial sound
375 ###############################################
377 toneChoiceReference$ = choiceReference$
378 if fileReadable("../wordlists/CoGMandarinSounds/'choiceReference$'.cog")
380 Write to WAV file... lastExample.wav
382 execute InitialRecognition.praat 'choiceReference$' lastExample.wav ../wordlists/CoGMandarinSounds
383 choiceReference$ < lastInitialRecognitionResult.txt
386 ###############################################
390 ###############################################
391 result$ = "'tab$''currentTestWord$''tab$''choiceReference$''tab$''newUpperRegister''tab$''newToneRange''tab$''speedFactor''tab$''registerUsed$''tab$''rangeUsed$'"
392 if currentTestWord$ = toneChoiceReference$
393 result$ = "Correct:"+result$
395 result$ = "Wrong:"+result$
398 # Initialize result texts
399 recognitionText$ = "'currentTestWord$': "
400 choiceText$ = replace_regex$(choiceReference$, "6", "\?", 0)
401 feedbackText$ = "----"
403 # Read and select the feedbacktext
404 Read Table from tab-separated file... feedback/ToneFeedback.txt
405 Rename... ToneFeedback
406 numberOfFeedbackRows = Get number of rows
408 # Separate tone from pronunciation errors
409 currentToneWord$ = replace_regex$(currentTestWord$, "[a-z]+", "\*", 0)
410 choiceToneReference$ = replace_regex$(choiceReference$, "[a-z]+", "\*", 0)
412 # Determine what should be told to the student
413 if registerUsed$ = "Low"
414 recognitionText$ = recognitionText$ + "???"
415 for i from 1 to numberOfFeedbackRows
416 select Table ToneFeedback
417 toneOne$ = Get value... 'i' T1
418 toneTwo$ = Get value... 'i' T2
419 toneText$ = Get value... 'i' Feedback
422 feedbackText$ = toneText$
425 elsif rangeUsed$ = "Narrow"
426 recognitionText$ = recognitionText$ + "???"
427 for i from 1 to numberOfFeedbackRows
428 select Table ToneFeedback
429 toneOne$ = Get value... 'i' T1
430 toneTwo$ = Get value... 'i' T2
431 toneText$ = Get value... 'i' Feedback
433 if toneOne$ = "Narrow"
434 feedbackText$ = toneText$
437 elsif registerUsed$ = "High"
438 recognitionText$ = recognitionText$ + choiceText$
439 for i from 1 to numberOfFeedbackRows
440 select Table ToneFeedback
441 toneOne$ = Get value... 'i' T1
442 toneTwo$ = Get value... 'i' T2
443 toneText$ = Get value... 'i' Feedback
446 feedbackText$ = toneText$
449 elsif rangeUsed$ = "Wide"
450 recognitionText$ = recognitionText$ + choiceText$
451 for i from 1 to numberOfFeedbackRows
452 select Table ToneFeedback
453 toneOne$ = Get value... 'i' T1
454 toneTwo$ = Get value... 'i' T2
455 toneText$ = Get value... 'i' Feedback
458 feedbackText$ = toneText$
461 # Bad tones, first handle first syllable
462 elsif rindex_regex(choiceReference$, "^[a-zA-Z]+6") > 0
463 recognitionText$ = recognitionText$ + choiceText$
465 for i from 1 to numberOfFeedbackRows
466 select Table ToneFeedback
467 toneOne$ = Get value... 'i' T1
468 toneTwo$ = Get value... 'i' T2
469 toneText$ = Get value... 'i' Feedback
474 recognitionText$ = recognitionText$ + " ('toneText$')"
475 elsif rindex_regex(currentTestWord$, "^[a-zA-Z]+'toneOne$'") > 0 and toneTwo$ = "-"
476 feedbackText$ = feedbackText$ + toneText$ + " "
479 # Bad tones, then handle second syllable
480 elsif rindex_regex(choiceReference$, "[a-zA-Z]+6$") > 0
481 recognitionText$ = recognitionText$ + choiceText$
483 for i from 1 to numberOfFeedbackRows
484 select Table ToneFeedback
485 toneOne$ = Get value... 'i' T1
486 toneTwo$ = Get value... 'i' T2
487 toneText$ = Get value... 'i' Feedback
492 recognitionText$ = recognitionText$ + " ('toneText$')"
493 elsif rindex_regex(currentTestWord$, "[a-zA-Z]+'toneOne$'$") > 0 and toneTwo$ = "-"
494 feedbackText$ = feedbackText$ + toneText$ + " "
497 # Just plain wrong tones
498 elsif currentToneWord$ <> choiceToneReference$
499 recognitionText$ = recognitionText$ + choiceText$
500 for i from 1 to numberOfFeedbackRows
501 select Table ToneFeedback
502 toneOne$ = Get value... 'i' T1
503 toneTwo$ = Get value... 'i' T2
504 toneText$ = Get value... 'i' Feedback
506 if rindex_regex(currentTestWord$, "^[a-zA-Z]+'toneOne$'$") > 0 and toneTwo$ = "-"
507 feedbackText$ = toneText$
508 elsif rindex_regex(currentTestWord$, "^[a-zA-Z]+'toneOne$'[a-zA-Z]+'toneTwo$'$") > 0
509 feedbackText$ = toneText$
510 elsif toneOne$ = "Wrong"
511 recognitionText$ = recognitionText$ + " ('toneText$')"
516 recognitionText$ = recognitionText$ + choiceText$
517 for i from 1 to numberOfFeedbackRows
518 select Table ToneFeedback
519 toneOne$ = Get value... 'i' T1
520 toneTwo$ = Get value... 'i' T2
521 toneText$ = Get value... 'i' Feedback
523 if toneOne$ = "Correct"
524 feedbackText$ = toneText$
530 recognitionText$ > feedback.txt
531 newline$ >> feedback.txt
532 feedbackText$ >> feedback.txt
533 newline$ >> feedback.txt
536 currentDate$ = date$()
537 timeStamp$ = replace_regex$(currentDate$, "[^a-zA-Z0-9\-_]", "-", 0)
539 outfilename$ = "'currentTestWord$'_'choiceReference$'_'upperRegisterInput'_'timeStamp$'.wav"
540 fileappend log/logFile.txt 'result$''tab$''upperRegisterInput'Hz'tab$''currentDate$''tab$''outfilename$''newline$'
542 Write to WAV file... log/'outfilename$'
544 # printline 'result$'
545 result$ > lastResult.txt
546 newline$ >> lastResult.txt
549 select Table ToneFeedback
552 if example$ = "Replay"
553 call newPitchOnSound Source SourcePitch 'currentTestWord$'
554 select Sound lastExample
556 elsif example$ = "Hum"
557 select Pitch 'currentTestWord$'
559 Rename... lastExample
566 select Sound lastExample
567 Write to WAV file... lastExample.wav
570 if example$ <> "none"
571 freqTop = 1.5 * upperRegisterInput
573 Select outer viewport... 0 6 0 4
575 select Pitch SourcePitch
578 Draw... 0 0 0 'freqTop' 1
580 Draw... 0 0 0 'freqTop' 0
582 select Pitch 'currentTestWord$'
584 Draw... 0 0 0 'freqTop' 0
587 Text top... no 'currentTestWord$'
590 select Pitch SourcePitch
591 Write to short text file... ../records/'currentTestWord$'.Pitch
593 #Moet nog wat anders worden!
594 Write to short text file... ../records/'currentTestWord$'.PitchTier
599 plus Pitch SourcePitch
600 plus Pitch 'currentTestWord$'
601 plus Sound lastExample
604 # Replace source pitch with
605 # Names of PitchTiers
606 procedure newPitchOnSound sourceSound$ sourceName$ correctName$
607 # Align generated pitch with source
608 select Pitch 'sourceName$'
609 thisDuration = Get total duration
611 numberOfFrames = Get number of frames
612 numberOfVoicedFrames = Count voiced frames
614 if numberOfVoicedFrames > 0
615 declineFactor = 0.5**(1/numberOfVoicedFrames)
617 Formula... if self > 0 then if self[col-1]<=0 then 200 else self[col -1]*declineFactor endif else self endif
619 select Pitch 'correctName$'
620 thisDuration = Get total duration
622 numberOfFrames = Get number of frames
623 numberOfVoicedFrames = Count voiced frames
625 if numberOfVoicedFrames > 0
626 declineFactor = 0.5**(1/numberOfVoicedFrames)
628 Formula... if self > 0 then if self[col-1]<=0 then 200 else self[col -1]*declineFactor endif else self endif
630 select Pitch sourceVoiced
631 plus Pitch spokenVoiced
633 noprogress To DTW... 24 10 yes yes no restriction
635 Find path... yes yes no restriction
637 select Pitch sourceVoiced
638 plus Pitch spokenVoiced
641 # Construct PitchTiers
642 select Pitch 'correctName$'
645 select Pitch 'sourceName$'
646 duration = Get total duration
647 Create PitchTier... CorrectPitch 0.0 'duration'
649 select PitchTier 'correctName$'
650 numberOfPoints = Get number of points
651 for i from 1 to numberOfPoints
652 select PitchTier 'correctName$'
653 time = Get time from index... 'i'
654 pitch = Get value at index... 'i'
656 newtime = Get time along path... 'time'
657 select PitchTier CorrectPitch
658 Add point... 'newtime' 'pitch'
661 # Generate Manipulation and replace the PitchTier
662 select Sound 'sourceSound$'
663 noprogress To Manipulation... 0.05 60 600
664 select Manipulation 'sourceSound$'
665 plus PitchTier CorrectPitch
667 select Manipulation 'sourceSound$'
668 Get resynthesis (PSOLA)
669 Rename... lastExample
672 select PitchTier CorrectPitch
673 plus PitchTier 'correctName$'
675 plus Manipulation Source