3 # SpeakGoodChinese: SGC_ToneRecognizer.praat processes student utterances
4 # and generates a report on their tone production
6 # Copyright (C) 2007-2010 R.J.J.H. van Son
7 # The SpeakGoodChinese team are:
8 # Guangqin Chen, Zhonyan Chen, Stefan de Koning, Eveline van Hagen,
9 # Rob van Son, Dennis Vierkant, David Weenink
11 # This program is free software; you can redistribute it and/or modify
12 # it under the terms of the GNU General Public License as published by
13 # the Free Software Foundation; either version 2 of the License, or
14 # (at your option) any later version.
16 # This program is distributed in the hope that it will be useful,
17 # but WITHOUT ANY WARRANTY; without even the implied warranty of
18 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 # GNU General Public License for more details.
21 # You should have received a copy of the GNU General Public License
22 # along with this program; if not, write to the Free Software
23 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
26 # include ToneRecognition.praat
27 # include ToneScript.praat
30 procedure sgc_ToneProt sgc_ToneProt.currentSound$ sgc_ToneProt.pinyin$ sgc_ToneProt.register sgc_ToneProt.proficiency sgc_ToneProt.language$
31 # Remove if included in main program!
32 sgc_ToneProt.viewportMargin = 5
34 sgc_ToneProt.precision = 3
35 if sgc_ToneProt.proficiency
36 sgc_ToneProt.precision = 1.5
38 # Stick to the raw recognition results or not
39 sgc_ToneProt.ultraStrict = sgc_ToneProt.proficiency
42 # Read and select the feedbacktext
43 call loadTable ToneFeedback_'sgc_ToneProt.language$'
44 Rename... ToneFeedback
45 numberOfFeedbackRows = Get number of rows
48 if sgc_ToneProt.pinyin$ <> ""
49 sgc_ToneProt.pinyin$ = replace_regex$(sgc_ToneProt.pinyin$, "^\s*(.+)\s*$", "\1", 1)
50 sgc_ToneProt.pinyin$ = replace_regex$(sgc_ToneProt.pinyin$, "5", "0", 0)
51 # Missing neutral tones
52 call add_missing_neutral_tones 'sgc_ToneProt.pinyin$'
53 sgc_ToneProt.pinyin$ = add_missing_neutral_tones.pinyin$
56 # Reduction (lower sgc_ToneProt.register and narrow range) means errors
57 # The oposite mostly not. Asymmetry alows more room upward
58 # than downward (asymmetry = 2 => highBoundaryFactor ^ 2)
61 # Kill octave jumps: DANGEROUS
65 sgc_ToneProt.minimumPitch = 50
66 sgc_ToneProt.maximumPitch = 500
67 if sgc_ToneProt.register > 400
68 sgc_ToneProt.minimumPitch = 60
69 sgc_ToneProt.maximumPitch = 600
70 elsif sgc_ToneProt.register > 250
71 sgc_ToneProt.minimumPitch = 50
72 sgc_ToneProt.maximumPitch = 500
74 sgc_ToneProt.minimumPitch = 40
75 sgc_ToneProt.maximumPitch = 400
78 sgc_ToneProt.currentTestWord$ = sgc_ToneProt.pinyin$
80 sgc_ToneProt.precisionFactor = 2^(sgc_ToneProt.precision/12)
81 highBoundaryFactor = sgc_ToneProt.precisionFactor ^ asymmetry
82 lowBoundaryFactor = 1/sgc_ToneProt.precisionFactor
84 # Generate reference example
85 # Start with a range of 1 octave and a speed factor of 1
88 sgc_ToneProt.upperRegisterInput = sgc_ToneProt.register
89 call toneScript 'sgc_ToneProt.currentTestWord$' 'sgc_ToneProt.upperRegisterInput' 1 1 CorrectPitch
91 select Pitch 'sgc_ToneProt.currentTestWord$'
92 sgc_ToneProt.durationModel = Get total duration
93 maximumModelFzero = Get quantile... 0 0 0.95 Hertz
94 minimumModelFzero = Get quantile... 0 0 0.05 Hertz
95 if maximumModelFzero = undefined
98 if minimumModelFzero = undefined
101 sgc_ToneProt.modelPitchRange = 2
102 if minimumModelFzero > 0
103 sgc_ToneProt.modelPitchRange = maximumModelFzero / minimumModelFzero
105 sgc_ToneProt.modelPitchRange = 0
109 if fileReadable(sgc_ToneProt.currentSound$)
110 Read from file... 'sgc_ToneProt.currentSound$'
113 select Sound 'sgc_ToneProt.currentSound$'
119 durationSource = Get total duration
120 call convert2Pitch 'sgc_ToneProt.minimumPitch' 'sgc_ToneProt.maximumPitch'
121 te.recordedPitch = convert2Pitch.object
122 Rename... SourcePitch
124 # It is rather dangerous to kill Octave errors, so be careful
125 if killOctaveJumps > 0
128 Rename... SourcePitch
129 te.recordedPitch = selected("Pitch")
130 select Pitch OldSource
134 # Remove all pitch points outside a band around the upper sgc_ToneProt.register
135 select te.recordedPitch
136 upperCutOff = 1.7*sgc_ToneProt.upperRegisterInput
137 lowerCutOff = sgc_ToneProt.upperRegisterInput/4
138 Formula... if self > 'upperCutOff' then -1 else self endif
139 Formula... if self < 'lowerCutOff' then -1 else self endif
142 select te.recordedPitch
143 maximumRecFzero = Get quantile... 0 0 0.95 Hertz
144 timeMaximum = Get time of maximum... 0 0 Hertz Parabolic
145 minimumRecFzero = Get quantile... 0 0 0.05 Hertz
146 timeMinimum = Get time of minimum... 0 0 Hertz Parabolic
147 if maximumRecFzero = undefined
148 # Determine what should be told to the student
149 .recognitionText$ = "'sgc_ToneProt.currentTestWord$': ???"
150 for i from 1 to numberOfFeedbackRows
151 select Table ToneFeedback
152 .toneOne$ = Get value... 'i' T1
153 .toneTwo$ = Get value... 'i' T2
154 .toneText$ = Get value... 'i' Feedback
157 if .toneOne$ = "NoSound"
158 .feedbackText$ = .toneText$
162 #exit Error, nothing recorded
166 if minimumRecFzero > 0
167 recPitchRange = maximumRecFzero / minimumRecFzero
169 sgc_ToneProt.newUpperRegister = maximumRecFzero / maximumModelFzero * sgc_ToneProt.upperRegisterInput
170 sgc_ToneProt.newToneRange = recPitchRange / sgc_ToneProt.modelPitchRange
171 if sgc_ToneProt.newUpperRegister = undefined
172 sgc_ToneProt.newUpperRegister = sgc_ToneProt.upperRegisterInput
174 if sgc_ToneProt.newToneRange = undefined
175 sgc_ToneProt.newToneRange = 1
178 sgc_ToneProt.registerUsed$ = "OK"
180 # Advanced speakers must not speak too High, or too "Dramatic"
181 # Beginning speakers also not too Low or too Narrow ranges
182 if sgc_ToneProt.newUpperRegister > highBoundaryFactor * sgc_ToneProt.upperRegisterInput
183 sgc_ToneProt.newUpperRegister = highBoundaryFactor * sgc_ToneProt.upperRegisterInput
184 sgc_ToneProt.registerUsed$ = "High"
185 elsif not sgc_ToneProt.proficiency and sgc_ToneProt.newUpperRegister < lowBoundaryFactor * sgc_ToneProt.upperRegisterInput
186 sgc_ToneProt.newUpperRegister = lowBoundaryFactor * sgc_ToneProt.upperRegisterInput
187 sgc_ToneProt.registerUsed$ = "Low"
190 if sgc_ToneProt.newToneRange > highBoundaryFactor
191 sgc_ToneProt.newToneRange = highBoundaryFactor
193 elsif not sgc_ToneProt.proficiency and sgc_ToneProt.newToneRange < lowBoundaryFactor and not sgc_ToneProt.proficiency
194 # Don't do this for advanced speakers
195 sgc_ToneProt.newToneRange = lowBoundaryFactor
196 rangeUsed$ = "Narrow"
200 if sgc_ToneProt.durationModel > spacing
201 speedFactor = (durationSource - spacing) / (sgc_ToneProt.durationModel - spacing)
205 sgc_ToneProt.newUpperRegister = round(sgc_ToneProt.newUpperRegister)
207 # Remove all pitch points outside a band around the upper sgc_ToneProt.register
208 select te.recordedPitch
209 upperCutOff = 1.5*sgc_ToneProt.newUpperRegister
210 lowerCutOff = sgc_ToneProt.newUpperRegister/3
211 Formula... if self > 'upperCutOff' then -1 else self endif
212 Formula... if self < 'lowerCutOff' then -1 else self endif
214 if killOctaveJumps > 0
215 Rename... OldSourcePitch
217 Rename... SourcePitch
218 te.recordedPitch = selected("Pitch")
219 select Pitch OldSourcePitch
223 # It is good to have the lowest and highest pitch frequencies
224 select te.recordedPitch
225 timeMaximum = Get time of maximum... 0 0 Hertz Parabolic
226 timeMinimum = Get time of minimum... 0 0 Hertz Parabolic
228 # Clean up the old example pitch
229 select Pitch 'sgc_ToneProt.currentTestWord$'
232 # Do the tone recognition
233 call FreeToneRecognition 'sgc_ToneProt.currentTestWord$' "REUSEPITCH" "" 'sgc_ToneProt.newUpperRegister' 'sgc_ToneProt.newToneRange' 'speedFactor'
234 call toneScript 'sgc_ToneProt.currentTestWord$' 'sgc_ToneProt.upperRegisterInput' 'sgc_ToneProt.newToneRange' 'speedFactor' CorrectPitch
237 originalRecognizedWord$ = sgc_ToneProt.choiceReference$
238 if sgc_ToneProt.ultraStrict = 0
239 # First syllable: 2<->3 (6) exchanges (incl 6)
240 if rindex_regex(sgc_ToneProt.currentTestWord$, "^[a-zA-Z]+2[a-zA-Z]+[0-4]$") > 0
241 if rindex_regex(sgc_ToneProt.choiceReference$, "^[a-zA-Z]+[36][a-zA-Z]+[0-4]$") > 0
242 sgc_ToneProt.choiceReference$ = replace_regex$(sgc_ToneProt.choiceReference$, "[36]([a-zA-Z]+[0-4])$", "2\1", 0)
244 elsif rindex_regex(sgc_ToneProt.currentTestWord$, "^[a-zA-Z]+3[a-zA-Z]+[0-4]$") > 0
245 if rindex_regex(sgc_ToneProt.choiceReference$, "^[a-zA-Z]+[26][a-zA-Z]+[0-4]$") > 0
246 sgc_ToneProt.choiceReference$ = replace_regex$(sgc_ToneProt.choiceReference$, "[26]([a-zA-Z]+[0-4])$", "3\1", 0)
248 # A single second tone is often misidentified as a neutral tone,
249 # A real neutral tone would be too low or too narrow and be discarded
250 # Leaves us with erroneous tone 4
251 elsif rindex_regex(sgc_ToneProt.currentTestWord$, "^[a-zA-Z]+2$") > 0
252 if rindex_regex(sgc_ToneProt.choiceReference$, "^[a-zA-Z]+0$") > 0 and timeMinimum < timeMaximum
253 sgc_ToneProt.choiceReference$ = replace_regex$(sgc_ToneProt.choiceReference$, "0", "2", 0)
255 # A single fourth tone is often misidentified as a neutral tone,
256 # A real neutral tone would be too low or too narrow and be discarded
257 # Leaves us with erroneous tones 2 and 3
258 elsif rindex_regex(sgc_ToneProt.currentTestWord$, "^[a-zA-Z]+4$") > 0
259 if rindex_regex(sgc_ToneProt.choiceReference$, "^[a-zA-Z]+0$") > 0 and timeMaximum < timeMinimum
260 sgc_ToneProt.choiceReference$ = replace_regex$(sgc_ToneProt.choiceReference$, "0", "4", 0)
264 # Second (last) syllable, 0<->6 exchanges and 2<->3
265 # A recognized 0 after a 4 can be a 2: 4-0 => 4-2
266 if rindex_regex(sgc_ToneProt.currentTestWord$, "[a-zA-Z]+[4][a-zA-Z]+2$") > 0
267 if rindex_regex(sgc_ToneProt.choiceReference$, "[a-zA-Z]+[4][a-zA-Z]+[0]$") > 0
268 sgc_ToneProt.choiceReference$ = replace_regex$(sgc_ToneProt.choiceReference$, "[0]$", "2", 0)
271 # A final 6 after a valid tone is often a recognition error
272 # A final 6 can be a 0
273 if rindex_regex(sgc_ToneProt.currentTestWord$, "[a-zA-Z]+[0-9][a-zA-Z]+0$") > 0
274 if rindex_regex(sgc_ToneProt.choiceReference$, "[a-zA-Z]+[0-4][a-zA-Z]+6$") > 0
275 sgc_ToneProt.choiceReference$ = replace_regex$(sgc_ToneProt.choiceReference$, "6$", "0", 0)
277 # Second (last) syllable, 2<->3 exchanges after [23] tones
278 # A recognized 6 (or 3) after a valid tone [1-4] is mostly wrong, can be a 2
279 elsif rindex_regex(sgc_ToneProt.currentTestWord$, "[a-zA-Z]+[1-4][a-zA-Z]+2$") > 0
280 if rindex_regex(sgc_ToneProt.choiceReference$, "[a-zA-Z]+[1-4][a-zA-Z]+[36]$") > 0
281 sgc_ToneProt.choiceReference$ = replace_regex$(sgc_ToneProt.choiceReference$, "[36]$", "2", 0)
283 # A recognized 6 after a [23] is mostly wrong, can be a 3
284 elsif rindex_regex(sgc_ToneProt.currentTestWord$, "[a-zA-Z]+[23][a-zA-Z]+3$") > 0
285 if rindex_regex(sgc_ToneProt.choiceReference$, "[a-zA-Z]+[23][a-zA-Z]+[26]$") > 0
286 sgc_ToneProt.choiceReference$ = replace_regex$(sgc_ToneProt.choiceReference$, "[26]$", "3", 0)
288 # A recognized 6 after a [3] is mostly wrong, can be a 1
289 elsif rindex_regex(sgc_ToneProt.currentTestWord$, "[a-zA-Z]+[3][a-zA-Z]+1$") > 0
290 if rindex_regex(sgc_ToneProt.choiceReference$, "[a-zA-Z]+[3][a-zA-Z]+[6]$") > 0
291 sgc_ToneProt.choiceReference$ = replace_regex$(sgc_ToneProt.choiceReference$, "[6]$", "1", 0)
295 # Clean up odd things constructed with special cases
296 # Target is 3-3, but recognized is 2-3, which is CORRECT. Change it into 3-3
297 if rindex_regex(sgc_ToneProt.currentTestWord$, "[a-zA-Z]+[3][a-zA-Z]+[3]$") > 0
298 if rindex_regex(sgc_ToneProt.choiceReference$, "[a-zA-Z]+[2][a-zA-Z]+[3]$") > 0
299 sgc_ToneProt.choiceReference$ = replace_regex$(sgc_ToneProt.choiceReference$, "[2]([a-zA-Z]+[3])$", "3\1", 0)
304 # If wrong, then undo all changes
305 if sgc_ToneProt.currentTestWord$ != sgc_ToneProt.choiceReference$
306 sgc_ToneProt.choiceReference$ = originalRecognizedWord$
309 sgc_ToneProt.toneChoiceReference$ = sgc_ToneProt.choiceReference$
311 ###############################################
315 ###############################################
316 result$ = "'tab$''sgc_ToneProt.currentTestWord$''tab$''sgc_ToneProt.choiceReference$''tab$''sgc_ToneProt.newUpperRegister''tab$''sgc_ToneProt.newToneRange''tab$''speedFactor''tab$''sgc_ToneProt.registerUsed$''tab$''rangeUsed$'"
317 if sgc_ToneProt.currentTestWord$ = sgc_ToneProt.toneChoiceReference$
318 result$ = "Correct:"+result$
320 result$ = "Wrong:"+result$
323 # Initialize result texts
324 .recognitionText$ = "'sgc_ToneProt.currentTestWord$': "
325 .choiceText$ = replace_regex$(sgc_ToneProt.choiceReference$, "6", "\?", 0)
326 .feedbackText$ = "----"
328 # Separate tone from pronunciation errors
329 currentToneWord$ = replace_regex$(sgc_ToneProt.currentTestWord$, "[a-z]+", "\*", 0)
330 choiceToneReference$ = replace_regex$(sgc_ToneProt.choiceReference$, "[a-z]+", "\*", 0)
332 # Determine what should be told to the student
333 if sgc_ToneProt.registerUsed$ = "Low"
334 .recognitionText$ = .recognitionText$ + "???"
335 for i from 1 to numberOfFeedbackRows
336 select Table ToneFeedback
337 .toneOne$ = Get value... 'i' T1
338 .toneTwo$ = Get value... 'i' T2
339 .toneText$ = Get value... 'i' Feedback
342 .feedbackText$ = .toneText$
346 elsif rangeUsed$ = "Narrow"
347 .recognitionText$ = .recognitionText$ + "???"
348 for i from 1 to numberOfFeedbackRows
349 select Table ToneFeedback
350 .toneOne$ = Get value... 'i' T1
351 .toneTwo$ = Get value... 'i' T2
352 .toneText$ = Get value... 'i' Feedback
354 if .toneOne$ = "Narrow"
355 .feedbackText$ = .toneText$
359 elsif sgc_ToneProt.registerUsed$ = "High"
360 .recognitionText$ = .recognitionText$ + .choiceText$
361 for i from 1 to numberOfFeedbackRows
362 select Table ToneFeedback
363 .toneOne$ = Get value... 'i' T1
364 .toneTwo$ = Get value... 'i' T2
365 .toneText$ = Get value... 'i' Feedback
367 if .toneOne$ = "High"
368 .feedbackText$ = .toneText$
372 elsif rangeUsed$ = "Wide"
373 .recognitionText$ = .recognitionText$ + .choiceText$
374 for i from 1 to numberOfFeedbackRows
375 select Table ToneFeedback
376 .toneOne$ = Get value... 'i' T1
377 .toneTwo$ = Get value... 'i' T2
378 .toneText$ = Get value... 'i' Feedback
380 if .toneOne$ = "Wide"
381 .feedbackText$ = .toneText$
385 # Bad tones, first handle first syllable
386 elsif rindex_regex(sgc_ToneProt.choiceReference$, "^[a-zA-Z]+6") > 0
387 .recognitionText$ = .recognitionText$ + .choiceText$
389 for i from 1 to numberOfFeedbackRows
390 select Table ToneFeedback
391 .toneOne$ = Get value... 'i' T1
392 .toneTwo$ = Get value... 'i' T2
393 .toneText$ = Get value... 'i' Feedback
398 .recognitionText$ = .recognitionText$ + " ('.toneText$')"
400 elsif rindex_regex(sgc_ToneProt.currentTestWord$, "^[a-zA-Z]+'.toneOne$'") > 0 and .toneTwo$ = "-"
401 .feedbackText$ = .feedbackText$ + .toneText$ + " "
404 # Bad tones, then handle second syllable
405 elsif rindex_regex(sgc_ToneProt.choiceReference$, "[a-zA-Z]+6$") > 0
406 .recognitionText$ = .recognitionText$ + .choiceText$
408 for i from 1 to numberOfFeedbackRows
409 select Table ToneFeedback
410 .toneOne$ = Get value... 'i' T1
411 .toneTwo$ = Get value... 'i' T2
412 .toneText$ = Get value... 'i' Feedback
417 .recognitionText$ = .recognitionText$ + " ('.toneText$')"
419 elsif rindex_regex(sgc_ToneProt.currentTestWord$, "[a-zA-Z]+'.toneOne$'$") > 0 and .toneTwo$ = "-"
420 .feedbackText$ = .feedbackText$ + .toneText$ + " "
423 # Just plain wrong tones
424 elsif currentToneWord$ <> choiceToneReference$
425 .recognitionText$ = .recognitionText$ + .choiceText$
426 for i from 1 to numberOfFeedbackRows
427 select Table ToneFeedback
428 .toneOne$ = Get value... 'i' T1
429 .toneTwo$ = Get value... 'i' T2
430 .toneText$ = Get value... 'i' Feedback
432 if rindex_regex(sgc_ToneProt.currentTestWord$, "^[a-zA-Z]+'.toneOne$'$") > 0 and .toneTwo$ = "-"
433 .feedbackText$ = .toneText$
434 elsif rindex_regex(sgc_ToneProt.currentTestWord$, "^[a-zA-Z]+'.toneOne$'[a-zA-Z]+'.toneTwo$'$") > 0
435 .feedbackText$ = .toneText$
436 elsif .toneOne$ = "Wrong"
437 .recognitionText$ = .recognitionText$ + " ('.toneText$')"
443 .recognitionText$ = .recognitionText$ + .choiceText$
444 for i from 1 to numberOfFeedbackRows
445 select Table ToneFeedback
446 .toneOne$ = Get value... 'i' T1
447 .toneTwo$ = Get value... 'i' T2
448 .toneText$ = Get value... 'i' Feedback
450 if .toneOne$ = "Correct"
451 .feedbackText$ = .toneText$
457 if toneScript.syllableCount > 2
458 for .i from 1 to numberOfFeedbackRows
459 select Table ToneFeedback
460 .toneOne$ = Get value... '.i' T1
461 .toneTwo$ = Get value... '.i' T2
462 .toneText$ = Get value... '.i' Feedback
464 if .toneOne$ = "Long"
465 .feedbackText$ = .feedbackText$ + " " + .toneText$
473 Create Table with column names... Feedback 3 Text
474 Set string value... 1 Text '.recognitionText$'
475 Set string value... 2 Text '.feedbackText$'
476 Set string value... 3 Text '.label$'
479 select Table ToneFeedback
483 freqTop = 1.5 * sgc_ToneProt.upperRegisterInput
485 # Replace recorded sound with new sound
486 if not fileReadable(sgc_ToneProt.currentSound$)
487 select Sound 'sgc_ToneProt.currentSound$'
490 Copy... 'sgc_ToneProt.currentSound$'
496 plus Pitch 'sgc_ToneProt.currentTestWord$'