3 # SpeakGoodChinese: SGC_ToneRecognizer.praat processes student utterances
4 # and generates a report on their tone production
6 # Copyright (C) 2007-2010 R.J.J.H. van Son
7 # The SpeakGoodChinese team are:
8 # Guangqin Chen, Zhonyan Chen, Stefan de Koning, Eveline van Hagen,
9 # Rob van Son, Dennis Vierkant, David Weenink
11 # This program is free software; you can redistribute it and/or modify
12 # it under the terms of the GNU General Public License as published by
13 # the Free Software Foundation; either version 2 of the License, or
14 # (at your option) any later version.
16 # This program is distributed in the hope that it will be useful,
17 # but WITHOUT ANY WARRANTY; without even the implied warranty of
18 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 # GNU General Public License for more details.
21 # You should have received a copy of the GNU General Public License
22 # along with this program; if not, write to the Free Software
23 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
26 # include ToneRecognition.praat
27 # include ToneScript.praat
30 procedure sgc_ToneProt sgc_ToneProt.currentSound$ sgc_ToneProt.pinyin$ sgc_ToneProt.register sgc_ToneProt.proficiency sgc_ToneProt.language$
31 # Remove if included in main program!
32 sgc_ToneProt.viewportMargin = 5
34 sgc_ToneProt.precision = 3
35 if sgc_ToneProt.proficiency
36 sgc_ToneProt.precision = 1.5
38 # Stick to the raw recognition results or not
39 sgc_ToneProt.ultraStrict = sgc_ToneProt.proficiency
41 # Read and select the feedbacktext
42 call testLoadTable ToneFeedback_'sgc_ToneProt.language$'
43 if testLoadTable.table > 0
44 call loadTable ToneFeedback_'sgc_ToneProt.language$'
46 call loadTable ToneFeedback_EN
48 Rename... ToneFeedback
49 numberOfFeedbackRows = Get number of rows
52 if sgc_ToneProt.pinyin$ <> ""
53 sgc_ToneProt.pinyin$ = replace_regex$(sgc_ToneProt.pinyin$, "^\s*(.+)\s*$", "\1", 1)
54 sgc_ToneProt.pinyin$ = replace_regex$(sgc_ToneProt.pinyin$, "5", "0", 0)
55 # Missing neutral tones
56 call add_missing_neutral_tones 'sgc_ToneProt.pinyin$'
57 sgc_ToneProt.pinyin$ = add_missing_neutral_tones.pinyin$
60 # Reduction (lower sgc_ToneProt.register and narrow range) means errors
61 # The oposite mostly not. Asymmetry alows more room upward
62 # than downward (asymmetry = 2 => highBoundaryFactor ^ 2)
65 # Kill octave jumps: DANGEROUS
69 sgc_ToneProt.minimumPitch = 50
70 sgc_ToneProt.maximumPitch = 500
71 if sgc_ToneProt.register > 400
72 sgc_ToneProt.minimumPitch = 60
73 sgc_ToneProt.maximumPitch = 600
74 elsif sgc_ToneProt.register > 250
75 sgc_ToneProt.minimumPitch = 50
76 sgc_ToneProt.maximumPitch = 500
78 sgc_ToneProt.minimumPitch = 40
79 sgc_ToneProt.maximumPitch = 400
82 sgc_ToneProt.currentTestWord$ = sgc_ToneProt.pinyin$
84 sgc_ToneProt.precisionFactor = 2^(sgc_ToneProt.precision/12)
85 highBoundaryFactor = sgc_ToneProt.precisionFactor ^ asymmetry
86 lowBoundaryFactor = 1/sgc_ToneProt.precisionFactor
88 # Generate reference example
89 # Start with a range of 1 octave and a speed factor of 1
92 sgc_ToneProt.upperRegisterInput = sgc_ToneProt.register
93 call toneScript 'sgc_ToneProt.currentTestWord$' 'sgc_ToneProt.upperRegisterInput' 1 1 CorrectPitch
95 select Pitch 'sgc_ToneProt.currentTestWord$'
96 sgc_ToneProt.durationModel = Get total duration
97 maximumModelFzero = Get quantile... 0 0 0.95 Hertz
98 minimumModelFzero = Get quantile... 0 0 0.05 Hertz
99 if maximumModelFzero = undefined
100 maximumModelFzero = 0
102 if minimumModelFzero = undefined
103 minimumModelFzero = 0
105 sgc_ToneProt.modelPitchRange = 2
106 if minimumModelFzero > 0
107 sgc_ToneProt.modelPitchRange = maximumModelFzero / minimumModelFzero
109 sgc_ToneProt.modelPitchRange = 0
113 if fileReadable(sgc_ToneProt.currentSound$)
114 Read from file... 'sgc_ToneProt.currentSound$'
117 select Sound 'sgc_ToneProt.currentSound$'
123 durationSource = Get total duration
124 call convert2Pitch 'sgc_ToneProt.minimumPitch' 'sgc_ToneProt.maximumPitch'
125 te.recordedPitch = convert2Pitch.object
126 Rename... SourcePitch
128 # It is rather dangerous to kill Octave errors, so be careful
129 if killOctaveJumps > 0
132 Rename... SourcePitch
133 te.recordedPitch = selected("Pitch")
134 select Pitch OldSource
138 # Remove all pitch points outside a band around the upper sgc_ToneProt.register
139 select te.recordedPitch
140 upperCutOff = 1.7*sgc_ToneProt.upperRegisterInput
141 lowerCutOff = sgc_ToneProt.upperRegisterInput/4
142 Formula... if self > 'upperCutOff' then -1 else self endif
143 Formula... if self < 'lowerCutOff' then -1 else self endif
146 select te.recordedPitch
147 maximumRecFzero = Get quantile... 0 0 0.95 Hertz
148 timeMaximum = Get time of maximum... 0 0 Hertz Parabolic
149 minimumRecFzero = Get quantile... 0 0 0.05 Hertz
150 timeMinimum = Get time of minimum... 0 0 Hertz Parabolic
151 if maximumRecFzero = undefined
152 # Determine what should be told to the student
153 .recognitionText$ = "'sgc_ToneProt.currentTestWord$': ???"
154 for i from 1 to numberOfFeedbackRows
155 select Table ToneFeedback
156 .toneOne$ = Get value... 'i' T1
157 .toneTwo$ = Get value... 'i' T2
158 .toneText$ = Get value... 'i' Feedback
161 if .toneOne$ = "NoSound"
162 .feedbackText$ = .toneText$
166 #exit Error, nothing recorded
170 if minimumRecFzero > 0
171 recPitchRange = maximumRecFzero / minimumRecFzero
173 sgc_ToneProt.newUpperRegister = maximumRecFzero / maximumModelFzero * sgc_ToneProt.upperRegisterInput
174 sgc_ToneProt.newToneRange = recPitchRange / sgc_ToneProt.modelPitchRange
175 if sgc_ToneProt.newUpperRegister = undefined
176 sgc_ToneProt.newUpperRegister = sgc_ToneProt.upperRegisterInput
178 if sgc_ToneProt.newToneRange = undefined
179 sgc_ToneProt.newToneRange = 1
182 sgc_ToneProt.registerUsed$ = "OK"
184 # Advanced speakers must not speak too High, or too "Dramatic"
185 # Beginning speakers also not too Low or too Narrow ranges
186 if sgc_ToneProt.newUpperRegister > highBoundaryFactor * sgc_ToneProt.upperRegisterInput
187 sgc_ToneProt.newUpperRegister = highBoundaryFactor * sgc_ToneProt.upperRegisterInput
188 sgc_ToneProt.registerUsed$ = "High"
189 elsif not sgc_ToneProt.proficiency and sgc_ToneProt.newUpperRegister < lowBoundaryFactor * sgc_ToneProt.upperRegisterInput
190 sgc_ToneProt.newUpperRegister = lowBoundaryFactor * sgc_ToneProt.upperRegisterInput
191 sgc_ToneProt.registerUsed$ = "Low"
194 if sgc_ToneProt.newToneRange > highBoundaryFactor
195 sgc_ToneProt.newToneRange = highBoundaryFactor
197 elsif not sgc_ToneProt.proficiency and sgc_ToneProt.newToneRange < lowBoundaryFactor and not sgc_ToneProt.proficiency
198 # Don't do this for advanced speakers
199 sgc_ToneProt.newToneRange = lowBoundaryFactor
200 rangeUsed$ = "Narrow"
204 if sgc_ToneProt.durationModel > spacing
205 speedFactor = (durationSource - spacing) / (sgc_ToneProt.durationModel - spacing)
209 sgc_ToneProt.newUpperRegister = round(sgc_ToneProt.newUpperRegister)
211 # Remove all pitch points outside a band around the upper sgc_ToneProt.register
212 select te.recordedPitch
213 upperCutOff = 1.5*sgc_ToneProt.newUpperRegister
214 lowerCutOff = sgc_ToneProt.newUpperRegister/3
215 Formula... if self > 'upperCutOff' then -1 else self endif
216 Formula... if self < 'lowerCutOff' then -1 else self endif
218 if killOctaveJumps > 0
219 Rename... OldSourcePitch
221 Rename... SourcePitch
222 te.recordedPitch = selected("Pitch")
223 select Pitch OldSourcePitch
227 # It is good to have the lowest and highest pitch frequencies
228 select te.recordedPitch
229 timeMaximum = Get time of maximum... 0 0 Hertz Parabolic
230 timeMinimum = Get time of minimum... 0 0 Hertz Parabolic
232 # Clean up the old example pitch
233 select Pitch 'sgc_ToneProt.currentTestWord$'
236 # Do the tone recognition
237 .numSyllables = toneScript.syllableCount
238 sgc_ToneProt.choiceReference$ = sgc_ToneProt.currentTestWord$
240 while sgc_ToneProt.choiceReference$ = sgc_ToneProt.currentTestWord$ and .skipSyllables < .numSyllables
241 call FreeToneRecognition 'sgc_ToneProt.choiceReference$' "REUSEPITCH" "" 'sgc_ToneProt.newUpperRegister' 'sgc_ToneProt.newToneRange' 'speedFactor' '.skipSyllables'
244 call toneScript 'sgc_ToneProt.currentTestWord$' 'sgc_ToneProt.upperRegisterInput' 'sgc_ToneProt.newToneRange' 'speedFactor' CorrectPitch
246 originalRecognizedWord$ = sgc_ToneProt.choiceReference$
247 if sgc_ToneProt.ultraStrict = 0
248 # [23]3 is often misidentified as 23, 20 or 30
249 if rindex_regex(sgc_ToneProt.currentTestWord$, "[23][^0-9]+3") > 0
250 if rindex_regex(sgc_ToneProt.currentTestWord$, "3[^0-9]+3") > 0
251 .c = rindex_regex(sgc_ToneProt.currentTestWord$, "3[^0-9]+3") - 1
252 if rindex_regex(sgc_ToneProt.choiceReference$, "^(.{'.c'})[23][^0-9]+[023]") > 0
253 sgc_ToneProt.choiceReference$ = replace_regex$(sgc_ToneProt.choiceReference$, "^(.{'.c'})[23]([^0-9]+)[023]", "\13\23", 1)
256 if rindex_regex(sgc_ToneProt.currentTestWord$, "2[^0-9]+3") > 0
257 .c = rindex_regex(sgc_ToneProt.currentTestWord$, "2[^0-9]+3") - 1
258 if rindex_regex(sgc_ToneProt.choiceReference$, "^(.{'.c'})[23][^0-9]+[023]") > 0
259 sgc_ToneProt.choiceReference$ = replace_regex$(sgc_ToneProt.choiceReference$, "([^0-9]+)[23]([^0-9]+)[023]", "\12\23", 1)
264 # First syllable: 2<->3 exchanges
265 if rindex_regex(sgc_ToneProt.currentTestWord$, "^[^0-9]+2") > 0
266 if rindex_regex(sgc_ToneProt.choiceReference$, "^[^0-9]+3") > 0
267 sgc_ToneProt.choiceReference$ = replace_regex$(sgc_ToneProt.choiceReference$, "^([^0-9]+)[36]", "\12", 0)
269 elsif rindex_regex(sgc_ToneProt.currentTestWord$, "^[^0-9]+3") > 0
270 if rindex_regex(sgc_ToneProt.choiceReference$, "^[^0-9]+2") > 0
271 sgc_ToneProt.choiceReference$ = replace_regex$(sgc_ToneProt.choiceReference$, "^([^0-9]+)2", "\13", 0)
273 # A single second tone is often misidentified as a neutral tone,
274 # A real neutral tone would be too low or too narrow and be discarded
275 elsif rindex_regex(sgc_ToneProt.currentTestWord$, "^[^0-9]+2$") > 0
276 if rindex_regex(sgc_ToneProt.choiceReference$, "^[^0-9]+0$") > 0 and timeMinimum < timeMaximum
277 sgc_ToneProt.choiceReference$ = replace_regex$(sgc_ToneProt.choiceReference$, "0", "2", 0)
279 # A single fourth tone is often misidentified as a neutral tone,
280 # A real neutral tone would be too low or too narrow and be discarded
281 elsif rindex_regex(sgc_ToneProt.currentTestWord$, "^[^0-9]+4$") > 0
282 if rindex_regex(sgc_ToneProt.choiceReference$, "^[^0-9]+0$") > 0 and timeMaximum < timeMinimum
283 sgc_ToneProt.choiceReference$ = replace_regex$(sgc_ToneProt.choiceReference$, "0", "4", 0)
288 # A recognized 0 after a 4 can be a 2: 4-0 => 4-2
289 if rindex_regex(sgc_ToneProt.currentTestWord$, "4[^0-9]+2") > 0
290 .c = rindex_regex(sgc_ToneProt.currentTestWord$, "4[^0-9]+2") - 1
291 if rindex_regex(sgc_ToneProt.choiceReference$, "^(.{'.c'})4[^0-9]+0") > 0
292 sgc_ToneProt.choiceReference$ = replace_regex$(sgc_ToneProt.choiceReference$, "^(.{'.c'}4[^0-9]+)0", "\12", 0)
297 # A recognized 0 between two tones 4 can be a 1
298 if rindex_regex(sgc_ToneProt.currentTestWord$, "4[^0-9]+1[^0-9]+4") > 0
299 .c = rindex_regex(sgc_ToneProt.currentTestWord$, "4[^0-9]+1[^0-9]+4") - 1
300 if rindex_regex(sgc_ToneProt.choiceReference$, "^(.{'.c'})4[^0-9]+0[^0-9]+4") > 0
301 sgc_ToneProt.choiceReference$ = replace_regex$(sgc_ToneProt.choiceReference$, "^(.{'.c'}4[^0-9]+)0([^0-9]+4)", "\11\2", 0)
307 # If wrong, then undo all changes
308 if sgc_ToneProt.currentTestWord$ != sgc_ToneProt.choiceReference$
309 sgc_ToneProt.choiceReference$ = originalRecognizedWord$
312 sgc_ToneProt.toneChoiceReference$ = sgc_ToneProt.choiceReference$
314 ###############################################
318 ###############################################
319 result$ = "'tab$''sgc_ToneProt.currentTestWord$''tab$''sgc_ToneProt.choiceReference$''tab$''sgc_ToneProt.newUpperRegister''tab$''sgc_ToneProt.newToneRange''tab$''speedFactor''tab$''sgc_ToneProt.registerUsed$''tab$''rangeUsed$'"
320 if sgc_ToneProt.currentTestWord$ = sgc_ToneProt.toneChoiceReference$
321 result$ = "Correct:"+result$
323 result$ = "Wrong:"+result$
326 # Initialize result texts
327 .recognitionText$ = "'sgc_ToneProt.currentTestWord$': "
328 .choiceText$ = replace_regex$(sgc_ToneProt.choiceReference$, "6", "\?", 0)
329 .feedbackText$ = "----"
331 # Separate tone from pronunciation errors
332 currentToneWord$ = replace_regex$(sgc_ToneProt.currentTestWord$, "[a-z]+", "\*", 0)
333 choiceToneReference$ = replace_regex$(sgc_ToneProt.choiceReference$, "[a-z]+", "\*", 0)
335 # Determine what should be told to the student
336 if sgc_ToneProt.registerUsed$ = "Low"
337 .recognitionText$ = .recognitionText$ + "???"
338 for i from 1 to numberOfFeedbackRows
339 select Table ToneFeedback
340 .toneOne$ = Get value... 'i' T1
341 .toneTwo$ = Get value... 'i' T2
342 .toneText$ = Get value... 'i' Feedback
345 .feedbackText$ = .toneText$
349 elsif rangeUsed$ = "Narrow"
350 .recognitionText$ = .recognitionText$ + "???"
351 for i from 1 to numberOfFeedbackRows
352 select Table ToneFeedback
353 .toneOne$ = Get value... 'i' T1
354 .toneTwo$ = Get value... 'i' T2
355 .toneText$ = Get value... 'i' Feedback
357 if .toneOne$ = "Narrow"
358 .feedbackText$ = .toneText$
362 elsif sgc_ToneProt.registerUsed$ = "High"
363 .recognitionText$ = .recognitionText$ + .choiceText$
364 for i from 1 to numberOfFeedbackRows
365 select Table ToneFeedback
366 .toneOne$ = Get value... 'i' T1
367 .toneTwo$ = Get value... 'i' T2
368 .toneText$ = Get value... 'i' Feedback
370 if .toneOne$ = "High"
371 .feedbackText$ = .toneText$
375 elsif rangeUsed$ = "Wide"
376 .recognitionText$ = .recognitionText$ + .choiceText$
377 for i from 1 to numberOfFeedbackRows
378 select Table ToneFeedback
379 .toneOne$ = Get value... 'i' T1
380 .toneTwo$ = Get value... 'i' T2
381 .toneText$ = Get value... 'i' Feedback
383 if .toneOne$ = "Wide"
384 .feedbackText$ = .toneText$
388 # Bad tones, first handle first syllable
389 elsif rindex_regex(sgc_ToneProt.choiceReference$, "^[a-zA-Z]+6") > 0
390 .recognitionText$ = .recognitionText$ + .choiceText$
392 for i from 1 to numberOfFeedbackRows
393 select Table ToneFeedback
394 .toneOne$ = Get value... 'i' T1
395 .toneTwo$ = Get value... 'i' T2
396 .toneText$ = Get value... 'i' Feedback
401 .recognitionText$ = .recognitionText$ + " ('.toneText$')"
403 elsif rindex_regex(sgc_ToneProt.currentTestWord$, "^[a-zA-Z]+'.toneOne$'") > 0 and .toneTwo$ = "-"
404 .feedbackText$ = .feedbackText$ + .toneText$ + " "
407 # Bad tones, then handle second syllable
408 elsif rindex_regex(sgc_ToneProt.choiceReference$, "[a-zA-Z]+6$") > 0
409 .recognitionText$ = .recognitionText$ + .choiceText$
411 for i from 1 to numberOfFeedbackRows
412 select Table ToneFeedback
413 .toneOne$ = Get value... 'i' T1
414 .toneTwo$ = Get value... 'i' T2
415 .toneText$ = Get value... 'i' Feedback
420 .recognitionText$ = .recognitionText$ + " ('.toneText$')"
422 elsif rindex_regex(sgc_ToneProt.currentTestWord$, "[a-zA-Z]+'.toneOne$'$") > 0 and .toneTwo$ = "-"
423 .feedbackText$ = .feedbackText$ + .toneText$ + " "
426 # Just plain wrong tones
427 elsif currentToneWord$ <> choiceToneReference$
428 .recognitionText$ = .recognitionText$ + .choiceText$
429 for i from 1 to numberOfFeedbackRows
430 select Table ToneFeedback
431 .toneOne$ = Get value... 'i' T1
432 .toneTwo$ = Get value... 'i' T2
433 .toneText$ = Get value... 'i' Feedback
435 if rindex_regex(sgc_ToneProt.currentTestWord$, "^[a-zA-Z]+'.toneOne$'$") > 0 and .toneTwo$ = "-"
436 .feedbackText$ = .toneText$
437 elsif rindex_regex(sgc_ToneProt.currentTestWord$, "^[a-zA-Z]+'.toneOne$'[a-zA-Z]+'.toneTwo$'$") > 0
438 .feedbackText$ = .toneText$
439 elsif .toneOne$ = "Wrong"
440 .recognitionText$ = .recognitionText$ + " ('.toneText$')"
446 .recognitionText$ = .recognitionText$ + .choiceText$
447 for i from 1 to numberOfFeedbackRows
448 select Table ToneFeedback
449 .toneOne$ = Get value... 'i' T1
450 .toneTwo$ = Get value... 'i' T2
451 .toneText$ = Get value... 'i' Feedback
453 if .toneOne$ = "Correct"
454 .feedbackText$ = .toneText$
463 Create Table with column names... Feedback 3 Text
464 Set string value... 1 Text '.recognitionText$'
465 Set string value... 2 Text '.feedbackText$'
466 Set string value... 3 Text '.label$'
469 select Table ToneFeedback
473 freqTop = 1.5 * sgc_ToneProt.upperRegisterInput
475 # Replace recorded sound with new sound
476 if not fileReadable(sgc_ToneProt.currentSound$)
477 select Sound 'sgc_ToneProt.currentSound$'
480 Copy... 'sgc_ToneProt.currentSound$'
486 plus Pitch 'sgc_ToneProt.currentTestWord$'