3 # SpeakGoodChinese: SGC_ToneRecognizer.praat processes student utterances
4 # and generates a report on their tone production
6 # Copyright (C) 2007-2010 R.J.J.H. van Son
7 # The SpeakGoodChinese team are:
8 # Guangqin Chen, Zhonyan Chen, Stefan de Koning, Eveline van Hagen,
9 # Rob van Son, Dennis Vierkant, David Weenink
11 # This program is free software; you can redistribute it and/or modify
12 # it under the terms of the GNU General Public License as published by
13 # the Free Software Foundation; either version 2 of the License, or
14 # (at your option) any later version.
16 # This program is distributed in the hope that it will be useful,
17 # but WITHOUT ANY WARRANTY; without even the implied warranty of
18 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 # GNU General Public License for more details.
21 # You should have received a copy of the GNU General Public License
22 # along with this program; if not, write to the Free Software
23 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
26 # include ToneRecognition.praat
27 # include ToneScript.praat
30 procedure sgc_ToneProt sgc_ToneProt.currentSound$ sgc_ToneProt.pinyin$ sgc_ToneProt.register sgc_ToneProt.proficiency sgc_ToneProt.language$
31 # Remove if included in main program!
32 sgc_ToneProt.viewportMargin = 5
34 sgc_ToneProt.precision = 3
35 if sgc_ToneProt.proficiency
36 sgc_ToneProt.precision = 1.5
38 # Stick to the raw recognition results or not
39 sgc_ToneProt.ultraStrict = sgc_ToneProt.proficiency
41 # Read and select the feedbacktext
42 call loadTable ToneFeedback_'sgc_ToneProt.language$'
43 Rename... ToneFeedback
44 numberOfFeedbackRows = Get number of rows
47 if sgc_ToneProt.pinyin$ <> ""
48 sgc_ToneProt.pinyin$ = replace_regex$(sgc_ToneProt.pinyin$, "^\s*(.+)\s*$", "\1", 1)
49 sgc_ToneProt.pinyin$ = replace_regex$(sgc_ToneProt.pinyin$, "5", "0", 0)
50 # Missing neutral tones
51 call add_missing_neutral_tones 'sgc_ToneProt.pinyin$'
52 sgc_ToneProt.pinyin$ = add_missing_neutral_tones.pinyin$
55 # Reduction (lower sgc_ToneProt.register and narrow range) means errors
56 # The oposite mostly not. Asymmetry alows more room upward
57 # than downward (asymmetry = 2 => highBoundaryFactor ^ 2)
60 # Kill octave jumps: DANGEROUS
64 sgc_ToneProt.minimumPitch = 50
65 sgc_ToneProt.maximumPitch = 500
66 if sgc_ToneProt.register > 400
67 sgc_ToneProt.minimumPitch = 60
68 sgc_ToneProt.maximumPitch = 600
69 elsif sgc_ToneProt.register > 250
70 sgc_ToneProt.minimumPitch = 50
71 sgc_ToneProt.maximumPitch = 500
73 sgc_ToneProt.minimumPitch = 40
74 sgc_ToneProt.maximumPitch = 400
77 sgc_ToneProt.currentTestWord$ = sgc_ToneProt.pinyin$
79 sgc_ToneProt.precisionFactor = 2^(sgc_ToneProt.precision/12)
80 highBoundaryFactor = sgc_ToneProt.precisionFactor ^ asymmetry
81 lowBoundaryFactor = 1/sgc_ToneProt.precisionFactor
83 # Generate reference example
84 # Start with a range of 1 octave and a speed factor of 1
87 sgc_ToneProt.upperRegisterInput = sgc_ToneProt.register
88 call toneScript 'sgc_ToneProt.currentTestWord$' 'sgc_ToneProt.upperRegisterInput' 1 1 CorrectPitch
90 select Pitch 'sgc_ToneProt.currentTestWord$'
91 sgc_ToneProt.durationModel = Get total duration
92 maximumModelFzero = Get quantile... 0 0 0.95 Hertz
93 minimumModelFzero = Get quantile... 0 0 0.05 Hertz
94 if maximumModelFzero = undefined
97 if minimumModelFzero = undefined
100 sgc_ToneProt.modelPitchRange = 2
101 if minimumModelFzero > 0
102 sgc_ToneProt.modelPitchRange = maximumModelFzero / minimumModelFzero
104 sgc_ToneProt.modelPitchRange = 0
108 if fileReadable(sgc_ToneProt.currentSound$)
109 Read from file... 'sgc_ToneProt.currentSound$'
112 select Sound 'sgc_ToneProt.currentSound$'
118 durationSource = Get total duration
119 call convert2Pitch 'sgc_ToneProt.minimumPitch' 'sgc_ToneProt.maximumPitch'
120 te.recordedPitch = convert2Pitch.object
121 Rename... SourcePitch
123 # It is rather dangerous to kill Octave errors, so be careful
124 if killOctaveJumps > 0
127 Rename... SourcePitch
128 te.recordedPitch = selected("Pitch")
129 select Pitch OldSource
133 # Remove all pitch points outside a band around the upper sgc_ToneProt.register
134 select te.recordedPitch
135 upperCutOff = 1.7*sgc_ToneProt.upperRegisterInput
136 lowerCutOff = sgc_ToneProt.upperRegisterInput/4
137 Formula... if self > 'upperCutOff' then -1 else self endif
138 Formula... if self < 'lowerCutOff' then -1 else self endif
141 select te.recordedPitch
142 maximumRecFzero = Get quantile... 0 0 0.95 Hertz
143 timeMaximum = Get time of maximum... 0 0 Hertz Parabolic
144 minimumRecFzero = Get quantile... 0 0 0.05 Hertz
145 timeMinimum = Get time of minimum... 0 0 Hertz Parabolic
146 if maximumRecFzero = undefined
147 # Determine what should be told to the student
148 .recognitionText$ = "'sgc_ToneProt.currentTestWord$': ???"
149 for i from 1 to numberOfFeedbackRows
150 select Table ToneFeedback
151 .toneOne$ = Get value... 'i' T1
152 .toneTwo$ = Get value... 'i' T2
153 .toneText$ = Get value... 'i' Feedback
156 if .toneOne$ = "NoSound"
157 .feedbackText$ = .toneText$
161 #exit Error, nothing recorded
165 if minimumRecFzero > 0
166 recPitchRange = maximumRecFzero / minimumRecFzero
168 sgc_ToneProt.newUpperRegister = maximumRecFzero / maximumModelFzero * sgc_ToneProt.upperRegisterInput
169 sgc_ToneProt.newToneRange = recPitchRange / sgc_ToneProt.modelPitchRange
170 if sgc_ToneProt.newUpperRegister = undefined
171 sgc_ToneProt.newUpperRegister = sgc_ToneProt.upperRegisterInput
173 if sgc_ToneProt.newToneRange = undefined
174 sgc_ToneProt.newToneRange = 1
177 sgc_ToneProt.registerUsed$ = "OK"
179 # Advanced speakers must not speak too High, or too "Dramatic"
180 # Beginning speakers also not too Low or too Narrow ranges
181 if sgc_ToneProt.newUpperRegister > highBoundaryFactor * sgc_ToneProt.upperRegisterInput
182 sgc_ToneProt.newUpperRegister = highBoundaryFactor * sgc_ToneProt.upperRegisterInput
183 sgc_ToneProt.registerUsed$ = "High"
184 elsif not sgc_ToneProt.proficiency and sgc_ToneProt.newUpperRegister < lowBoundaryFactor * sgc_ToneProt.upperRegisterInput
185 sgc_ToneProt.newUpperRegister = lowBoundaryFactor * sgc_ToneProt.upperRegisterInput
186 sgc_ToneProt.registerUsed$ = "Low"
189 if sgc_ToneProt.newToneRange > highBoundaryFactor
190 sgc_ToneProt.newToneRange = highBoundaryFactor
192 elsif not sgc_ToneProt.proficiency and sgc_ToneProt.newToneRange < lowBoundaryFactor and not sgc_ToneProt.proficiency
193 # Don't do this for advanced speakers
194 sgc_ToneProt.newToneRange = lowBoundaryFactor
195 rangeUsed$ = "Narrow"
199 if sgc_ToneProt.durationModel > spacing
200 speedFactor = (durationSource - spacing) / (sgc_ToneProt.durationModel - spacing)
204 sgc_ToneProt.newUpperRegister = round(sgc_ToneProt.newUpperRegister)
206 # Remove all pitch points outside a band around the upper sgc_ToneProt.register
207 select te.recordedPitch
208 upperCutOff = 1.5*sgc_ToneProt.newUpperRegister
209 lowerCutOff = sgc_ToneProt.newUpperRegister/3
210 Formula... if self > 'upperCutOff' then -1 else self endif
211 Formula... if self < 'lowerCutOff' then -1 else self endif
213 if killOctaveJumps > 0
214 Rename... OldSourcePitch
216 Rename... SourcePitch
217 te.recordedPitch = selected("Pitch")
218 select Pitch OldSourcePitch
222 # It is good to have the lowest and highest pitch frequencies
223 select te.recordedPitch
224 timeMaximum = Get time of maximum... 0 0 Hertz Parabolic
225 timeMinimum = Get time of minimum... 0 0 Hertz Parabolic
227 # Clean up the old example pitch
228 select Pitch 'sgc_ToneProt.currentTestWord$'
231 # Do the tone recognition
232 .numSyllables = toneScript.syllableCount
233 sgc_ToneProt.choiceReference$ = sgc_ToneProt.currentTestWord$
235 while sgc_ToneProt.choiceReference$ = sgc_ToneProt.currentTestWord$ and .skipSyllables < .numSyllables
236 call FreeToneRecognition 'sgc_ToneProt.choiceReference$' "REUSEPITCH" "" 'sgc_ToneProt.newUpperRegister' 'sgc_ToneProt.newToneRange' 'speedFactor' '.skipSyllables'
239 call toneScript 'sgc_ToneProt.currentTestWord$' 'sgc_ToneProt.upperRegisterInput' 'sgc_ToneProt.newToneRange' 'speedFactor' CorrectPitch
241 originalRecognizedWord$ = sgc_ToneProt.choiceReference$
242 if sgc_ToneProt.ultraStrict = 0
243 # [23]3 is often misidentified as 23, 20 or 30
244 if rindex_regex(sgc_ToneProt.currentTestWord$, "[23][^0-9]+3") > 0
245 if rindex_regex(sgc_ToneProt.currentTestWord$, "3[^0-9]+3") > 0
246 .c = rindex_regex(sgc_ToneProt.currentTestWord$, "3[^0-9]+3") - 1
247 if rindex_regex(sgc_ToneProt.choiceReference$, "^(.{'.c'})[23][^0-9]+[023]") > 0
248 sgc_ToneProt.choiceReference$ = replace_regex$(sgc_ToneProt.choiceReference$, "^(.{'.c'})[23]([^0-9]+)[023]", "\13\23", 1)
251 if rindex_regex(sgc_ToneProt.currentTestWord$, "2[^0-9]+3") > 0
252 .c = rindex_regex(sgc_ToneProt.currentTestWord$, "2[^0-9]+3") - 1
253 if rindex_regex(sgc_ToneProt.choiceReference$, "^(.{'.c'})[23][^0-9]+[023]") > 0
254 sgc_ToneProt.choiceReference$ = replace_regex$(sgc_ToneProt.choiceReference$, "([^0-9]+)[23]([^0-9]+)[023]", "\12\23", 1)
259 # First syllable: 2<->3 exchanges
260 if rindex_regex(sgc_ToneProt.currentTestWord$, "^[^0-9]+2") > 0
261 if rindex_regex(sgc_ToneProt.choiceReference$, "^[^0-9]+3") > 0
262 sgc_ToneProt.choiceReference$ = replace_regex$(sgc_ToneProt.choiceReference$, "^([^0-9]+)[36]", "\12", 0)
264 elsif rindex_regex(sgc_ToneProt.currentTestWord$, "^[^0-9]+3") > 0
265 if rindex_regex(sgc_ToneProt.choiceReference$, "^[^0-9]+2") > 0
266 sgc_ToneProt.choiceReference$ = replace_regex$(sgc_ToneProt.choiceReference$, "^([^0-9]+)2", "\13", 0)
268 # A single second tone is often misidentified as a neutral tone,
269 # A real neutral tone would be too low or too narrow and be discarded
270 elsif rindex_regex(sgc_ToneProt.currentTestWord$, "^[^0-9]+2$") > 0
271 if rindex_regex(sgc_ToneProt.choiceReference$, "^[^0-9]+0$") > 0 and timeMinimum < timeMaximum
272 sgc_ToneProt.choiceReference$ = replace_regex$(sgc_ToneProt.choiceReference$, "0", "2", 0)
274 # A single fourth tone is often misidentified as a neutral tone,
275 # A real neutral tone would be too low or too narrow and be discarded
276 elsif rindex_regex(sgc_ToneProt.currentTestWord$, "^[^0-9]+4$") > 0
277 if rindex_regex(sgc_ToneProt.choiceReference$, "^[^0-9]+0$") > 0 and timeMaximum < timeMinimum
278 sgc_ToneProt.choiceReference$ = replace_regex$(sgc_ToneProt.choiceReference$, "0", "4", 0)
283 # A recognized 0 after a 4 can be a 2: 4-0 => 4-2
284 if rindex_regex(sgc_ToneProt.currentTestWord$, "4[^0-9]+2") > 0
285 .c = rindex_regex(sgc_ToneProt.currentTestWord$, "4[^0-9]+2") - 1
286 if rindex_regex(sgc_ToneProt.choiceReference$, "^(.{'.c'})4[^0-9]+0") > 0
287 sgc_ToneProt.choiceReference$ = replace_regex$(sgc_ToneProt.choiceReference$, "^(.{'.c'}4[^0-9]+)0", "\12", 0)
292 # A recognized 0 between two tones 4 can be a 1
293 if rindex_regex(sgc_ToneProt.currentTestWord$, "4[^0-9]+1[^0-9]+4") > 0
294 .c = rindex_regex(sgc_ToneProt.currentTestWord$, "4[^0-9]+1[^0-9]+4") - 1
295 if rindex_regex(sgc_ToneProt.choiceReference$, "^(.{'.c'})4[^0-9]+0[^0-9]+4") > 0
296 sgc_ToneProt.choiceReference$ = replace_regex$(sgc_ToneProt.choiceReference$, "^(.{'.c'}4[^0-9]+)0([^0-9]+4)", "\11\2", 0)
302 # If wrong, then undo all changes
303 if sgc_ToneProt.currentTestWord$ != sgc_ToneProt.choiceReference$
304 sgc_ToneProt.choiceReference$ = originalRecognizedWord$
307 sgc_ToneProt.toneChoiceReference$ = sgc_ToneProt.choiceReference$
309 ###############################################
313 ###############################################
314 result$ = "'tab$''sgc_ToneProt.currentTestWord$''tab$''sgc_ToneProt.choiceReference$''tab$''sgc_ToneProt.newUpperRegister''tab$''sgc_ToneProt.newToneRange''tab$''speedFactor''tab$''sgc_ToneProt.registerUsed$''tab$''rangeUsed$'"
315 if sgc_ToneProt.currentTestWord$ = sgc_ToneProt.toneChoiceReference$
316 result$ = "Correct:"+result$
318 result$ = "Wrong:"+result$
321 # Initialize result texts
322 .recognitionText$ = "'sgc_ToneProt.currentTestWord$': "
323 .choiceText$ = replace_regex$(sgc_ToneProt.choiceReference$, "6", "\?", 0)
324 .feedbackText$ = "----"
326 # Separate tone from pronunciation errors
327 currentToneWord$ = replace_regex$(sgc_ToneProt.currentTestWord$, "[a-z]+", "\*", 0)
328 choiceToneReference$ = replace_regex$(sgc_ToneProt.choiceReference$, "[a-z]+", "\*", 0)
330 # Determine what should be told to the student
331 if sgc_ToneProt.registerUsed$ = "Low"
332 .recognitionText$ = .recognitionText$ + "???"
333 for i from 1 to numberOfFeedbackRows
334 select Table ToneFeedback
335 .toneOne$ = Get value... 'i' T1
336 .toneTwo$ = Get value... 'i' T2
337 .toneText$ = Get value... 'i' Feedback
340 .feedbackText$ = .toneText$
344 elsif rangeUsed$ = "Narrow"
345 .recognitionText$ = .recognitionText$ + "???"
346 for i from 1 to numberOfFeedbackRows
347 select Table ToneFeedback
348 .toneOne$ = Get value... 'i' T1
349 .toneTwo$ = Get value... 'i' T2
350 .toneText$ = Get value... 'i' Feedback
352 if .toneOne$ = "Narrow"
353 .feedbackText$ = .toneText$
357 elsif sgc_ToneProt.registerUsed$ = "High"
358 .recognitionText$ = .recognitionText$ + .choiceText$
359 for i from 1 to numberOfFeedbackRows
360 select Table ToneFeedback
361 .toneOne$ = Get value... 'i' T1
362 .toneTwo$ = Get value... 'i' T2
363 .toneText$ = Get value... 'i' Feedback
365 if .toneOne$ = "High"
366 .feedbackText$ = .toneText$
370 elsif rangeUsed$ = "Wide"
371 .recognitionText$ = .recognitionText$ + .choiceText$
372 for i from 1 to numberOfFeedbackRows
373 select Table ToneFeedback
374 .toneOne$ = Get value... 'i' T1
375 .toneTwo$ = Get value... 'i' T2
376 .toneText$ = Get value... 'i' Feedback
378 if .toneOne$ = "Wide"
379 .feedbackText$ = .toneText$
383 # Bad tones, first handle first syllable
384 elsif rindex_regex(sgc_ToneProt.choiceReference$, "^[a-zA-Z]+6") > 0
385 .recognitionText$ = .recognitionText$ + .choiceText$
387 for i from 1 to numberOfFeedbackRows
388 select Table ToneFeedback
389 .toneOne$ = Get value... 'i' T1
390 .toneTwo$ = Get value... 'i' T2
391 .toneText$ = Get value... 'i' Feedback
396 .recognitionText$ = .recognitionText$ + " ('.toneText$')"
398 elsif rindex_regex(sgc_ToneProt.currentTestWord$, "^[a-zA-Z]+'.toneOne$'") > 0 and .toneTwo$ = "-"
399 .feedbackText$ = .feedbackText$ + .toneText$ + " "
402 # Bad tones, then handle second syllable
403 elsif rindex_regex(sgc_ToneProt.choiceReference$, "[a-zA-Z]+6$") > 0
404 .recognitionText$ = .recognitionText$ + .choiceText$
406 for i from 1 to numberOfFeedbackRows
407 select Table ToneFeedback
408 .toneOne$ = Get value... 'i' T1
409 .toneTwo$ = Get value... 'i' T2
410 .toneText$ = Get value... 'i' Feedback
415 .recognitionText$ = .recognitionText$ + " ('.toneText$')"
417 elsif rindex_regex(sgc_ToneProt.currentTestWord$, "[a-zA-Z]+'.toneOne$'$") > 0 and .toneTwo$ = "-"
418 .feedbackText$ = .feedbackText$ + .toneText$ + " "
421 # Just plain wrong tones
422 elsif currentToneWord$ <> choiceToneReference$
423 .recognitionText$ = .recognitionText$ + .choiceText$
424 for i from 1 to numberOfFeedbackRows
425 select Table ToneFeedback
426 .toneOne$ = Get value... 'i' T1
427 .toneTwo$ = Get value... 'i' T2
428 .toneText$ = Get value... 'i' Feedback
430 if rindex_regex(sgc_ToneProt.currentTestWord$, "^[a-zA-Z]+'.toneOne$'$") > 0 and .toneTwo$ = "-"
431 .feedbackText$ = .toneText$
432 elsif rindex_regex(sgc_ToneProt.currentTestWord$, "^[a-zA-Z]+'.toneOne$'[a-zA-Z]+'.toneTwo$'$") > 0
433 .feedbackText$ = .toneText$
434 elsif .toneOne$ = "Wrong"
435 .recognitionText$ = .recognitionText$ + " ('.toneText$')"
441 .recognitionText$ = .recognitionText$ + .choiceText$
442 for i from 1 to numberOfFeedbackRows
443 select Table ToneFeedback
444 .toneOne$ = Get value... 'i' T1
445 .toneTwo$ = Get value... 'i' T2
446 .toneText$ = Get value... 'i' Feedback
448 if .toneOne$ = "Correct"
449 .feedbackText$ = .toneText$
458 Create Table with column names... Feedback 3 Text
459 Set string value... 1 Text '.recognitionText$'
460 Set string value... 2 Text '.feedbackText$'
461 Set string value... 3 Text '.label$'
464 select Table ToneFeedback
468 freqTop = 1.5 * sgc_ToneProt.upperRegisterInput
470 # Replace recorded sound with new sound
471 if not fileReadable(sgc_ToneProt.currentSound$)
472 select Sound 'sgc_ToneProt.currentSound$'
475 Copy... 'sgc_ToneProt.currentSound$'
481 plus Pitch 'sgc_ToneProt.currentTestWord$'