3 # SpeakGoodChinese: SGC_ToneRecognizer.praat processes student utterances
4 # and generates a report on their tone production
6 # Copyright (C) 2007-2010 R.J.J.H. van Son
7 # The SpeakGoodChinese team are:
8 # Guangqin Chen, Zhonyan Chen, Stefan de Koning, Eveline van Hagen,
9 # Rob van Son, Dennis Vierkant, David Weenink
11 # This program is free software; you can redistribute it and/or modify
12 # it under the terms of the GNU General Public License as published by
13 # the Free Software Foundation; either version 2 of the License, or
14 # (at your option) any later version.
16 # This program is distributed in the hope that it will be useful,
17 # but WITHOUT ANY WARRANTY; without even the implied warranty of
18 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 # GNU General Public License for more details.
21 # You should have received a copy of the GNU General Public License
22 # along with this program; if not, write to the Free Software
23 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
26 # include ToneRecognition.praat
27 # include ToneScript.praat
30 procedure sgc_ToneProt sgc_ToneProt.currentSound$ sgc_ToneProt.pinyin$ sgc_ToneProt.register sgc_ToneProt.proficiency sgc_ToneProt.language$
31 # Remove if included in main program!
32 sgc_ToneProt.viewportMargin = 5
34 sgc_ToneProt.precision = 3
35 if sgc_ToneProt.proficiency
36 sgc_ToneProt.precision = 1.5
38 # Stick to the raw recognition results or not
39 sgc_ToneProt.ultraStrict = sgc_ToneProt.proficiency
42 # Read and select the feedbacktext
43 call loadTable ToneFeedback_'sgc_ToneProt.language$'
44 Rename... ToneFeedback
45 numberOfFeedbackRows = Get number of rows
48 if sgc_ToneProt.pinyin$ <> ""
49 if index_regex(sgc_ToneProt.pinyin$, "[0-9]") <=0
50 sgc_ToneProt.pinyin$ = sgc_ToneProt.pinyin$+"0"
53 sgc_ToneProt.pinyin$ = replace_regex$(sgc_ToneProt.pinyin$, "^\s*(.+)\s*$", "\1", 1)
54 sgc_ToneProt.pinyin$ = replace_regex$(sgc_ToneProt.pinyin$, "5", "0", 0)
57 # Reduction (lower sgc_ToneProt.register and narrow range) means errors
58 # The oposite mostly not. Asymmetry alows more room upward
59 # than downward (asymmetry = 2 => highBoundaryFactor ^ 2)
62 # Kill octave jumps: DANGEROUS
66 sgc_ToneProt.minimumPitch = 50
67 sgc_ToneProt.maximumPitch = 500
68 if sgc_ToneProt.register > 400
69 sgc_ToneProt.minimumPitch = 60
70 sgc_ToneProt.maximumPitch = 600
71 elsif sgc_ToneProt.register > 250
72 sgc_ToneProt.minimumPitch = 50
73 sgc_ToneProt.maximumPitch = 500
75 sgc_ToneProt.minimumPitch = 40
76 sgc_ToneProt.maximumPitch = 400
79 sgc_ToneProt.currentTestWord$ = sgc_ToneProt.pinyin$
81 sgc_ToneProt.precisionFactor = 2^(sgc_ToneProt.precision/12)
82 highBoundaryFactor = sgc_ToneProt.precisionFactor ^ asymmetry
83 lowBoundaryFactor = 1/sgc_ToneProt.precisionFactor
85 # Generate reference example
86 # Start with a range of 1 octave and a speed factor of 1
89 sgc_ToneProt.upperRegisterInput = sgc_ToneProt.register
90 call toneScript 'sgc_ToneProt.currentTestWord$' 'sgc_ToneProt.upperRegisterInput' 1 1 CorrectPitch
92 select Pitch 'sgc_ToneProt.currentTestWord$'
93 sgc_ToneProt.durationModel = Get total duration
94 maximumModelFzero = Get quantile... 0 0 0.95 Hertz
95 minimumModelFzero = Get quantile... 0 0 0.05 Hertz
96 if maximumModelFzero = undefined
99 if minimumModelFzero = undefined
100 minimumModelFzero = 0
102 sgc_ToneProt.modelPitchRange = 2
103 if minimumModelFzero > 0
104 sgc_ToneProt.modelPitchRange = maximumModelFzero / minimumModelFzero
106 sgc_ToneProt.modelPitchRange = 0
110 if fileReadable(sgc_ToneProt.currentSound$)
111 Read from file... 'sgc_ToneProt.currentSound$'
114 select Sound 'sgc_ToneProt.currentSound$'
120 durationSource = Get total duration
121 call convert2Pitch 'sgc_ToneProt.minimumPitch' 'sgc_ToneProt.maximumPitch'
122 te.recordedPitch = convert2Pitch.object
123 Rename... SourcePitch
125 # It is rather dangerous to kill Octave errors, so be careful
126 if killOctaveJumps > 0
129 Rename... SourcePitch
130 te.recordedPitch = selected("Pitch")
131 select Pitch OldSource
135 # Remove all pitch points outside a band around the upper sgc_ToneProt.register
136 select te.recordedPitch
137 upperCutOff = 1.7*sgc_ToneProt.upperRegisterInput
138 lowerCutOff = sgc_ToneProt.upperRegisterInput/4
139 Formula... if self > 'upperCutOff' then -1 else self endif
140 Formula... if self < 'lowerCutOff' then -1 else self endif
143 select te.recordedPitch
144 maximumRecFzero = Get quantile... 0 0 0.95 Hertz
145 timeMaximum = Get time of maximum... 0 0 Hertz Parabolic
146 minimumRecFzero = Get quantile... 0 0 0.05 Hertz
147 timeMinimum = Get time of minimum... 0 0 Hertz Parabolic
148 if maximumRecFzero = undefined
149 # Determine what should be told to the student
150 .recognitionText$ = "'sgc_ToneProt.currentTestWord$': ???"
151 for i from 1 to numberOfFeedbackRows
152 select Table ToneFeedback
153 .toneOne$ = Get value... 'i' T1
154 .toneTwo$ = Get value... 'i' T2
155 .toneText$ = Get value... 'i' Feedback
158 if .toneOne$ = "NoSound"
159 .feedbackText$ = .toneText$
163 #exit Error, nothing recorded
167 if minimumRecFzero > 0
168 recPitchRange = maximumRecFzero / minimumRecFzero
170 sgc_ToneProt.newUpperRegister = maximumRecFzero / maximumModelFzero * sgc_ToneProt.upperRegisterInput
171 sgc_ToneProt.newToneRange = recPitchRange / sgc_ToneProt.modelPitchRange
172 if sgc_ToneProt.newUpperRegister = undefined
173 sgc_ToneProt.newUpperRegister = sgc_ToneProt.upperRegisterInput
175 if sgc_ToneProt.newToneRange = undefined
176 sgc_ToneProt.newToneRange = 1
179 sgc_ToneProt.registerUsed$ = "OK"
181 # Advanced speakers must not speak too High, or too "Dramatic"
182 # Beginning speakers also not too Low or too Narrow ranges
183 if sgc_ToneProt.newUpperRegister > highBoundaryFactor * sgc_ToneProt.upperRegisterInput
184 sgc_ToneProt.newUpperRegister = highBoundaryFactor * sgc_ToneProt.upperRegisterInput
185 sgc_ToneProt.registerUsed$ = "High"
186 elsif not sgc_ToneProt.proficiency and sgc_ToneProt.newUpperRegister < lowBoundaryFactor * sgc_ToneProt.upperRegisterInput
187 sgc_ToneProt.newUpperRegister = lowBoundaryFactor * sgc_ToneProt.upperRegisterInput
188 sgc_ToneProt.registerUsed$ = "Low"
191 if sgc_ToneProt.newToneRange > highBoundaryFactor
192 sgc_ToneProt.newToneRange = highBoundaryFactor
194 elsif not sgc_ToneProt.proficiency and sgc_ToneProt.newToneRange < lowBoundaryFactor and not sgc_ToneProt.proficiency
195 # Don't do this for advanced speakers
196 sgc_ToneProt.newToneRange = lowBoundaryFactor
197 rangeUsed$ = "Narrow"
201 if sgc_ToneProt.durationModel > spacing
202 speedFactor = (durationSource - spacing) / (sgc_ToneProt.durationModel - spacing)
206 sgc_ToneProt.newUpperRegister = round(sgc_ToneProt.newUpperRegister)
208 # Remove all pitch points outside a band around the upper sgc_ToneProt.register
209 select te.recordedPitch
210 upperCutOff = 1.5*sgc_ToneProt.newUpperRegister
211 lowerCutOff = sgc_ToneProt.newUpperRegister/3
212 Formula... if self > 'upperCutOff' then -1 else self endif
213 Formula... if self < 'lowerCutOff' then -1 else self endif
215 if killOctaveJumps > 0
216 Rename... OldSourcePitch
218 Rename... SourcePitch
219 te.recordedPitch = selected("Pitch")
220 select Pitch OldSourcePitch
224 # It is good to have the lowest and highest pitch frequencies
225 select te.recordedPitch
226 timeMaximum = Get time of maximum... 0 0 Hertz Parabolic
227 timeMinimum = Get time of minimum... 0 0 Hertz Parabolic
229 # Clean up the old example pitch
230 select Pitch 'sgc_ToneProt.currentTestWord$'
233 # Do the tone recognition
234 call FreeToneRecognition 'sgc_ToneProt.currentTestWord$' "REUSEPITCH" "" 'sgc_ToneProt.newUpperRegister' 'sgc_ToneProt.newToneRange' 'speedFactor'
235 call toneScript 'sgc_ToneProt.currentTestWord$' 'sgc_ToneProt.upperRegisterInput' 'sgc_ToneProt.newToneRange' 'speedFactor' CorrectPitch
238 originalRecognizedWord$ = sgc_ToneProt.choiceReference$
239 if sgc_ToneProt.ultraStrict = 0
240 # First syllable: 2<->3 (6) exchanges (incl 6)
241 if rindex_regex(sgc_ToneProt.currentTestWord$, "^[a-zA-Z]+2[a-zA-Z]+[0-4]$") > 0
242 if rindex_regex(sgc_ToneProt.choiceReference$, "^[a-zA-Z]+[36][a-zA-Z]+[0-4]$") > 0
243 sgc_ToneProt.choiceReference$ = replace_regex$(sgc_ToneProt.choiceReference$, "[36]([a-zA-Z]+[0-4])$", "2\1", 0)
245 elsif rindex_regex(sgc_ToneProt.currentTestWord$, "^[a-zA-Z]+3[a-zA-Z]+[0-4]$") > 0
246 if rindex_regex(sgc_ToneProt.choiceReference$, "^[a-zA-Z]+[26][a-zA-Z]+[0-4]$") > 0
247 sgc_ToneProt.choiceReference$ = replace_regex$(sgc_ToneProt.choiceReference$, "[26]([a-zA-Z]+[0-4])$", "3\1", 0)
249 # A single second tone is often misidentified as a neutral tone,
250 # A real neutral tone would be too low or too narrow and be discarded
251 # Leaves us with erroneous tone 4
252 elsif rindex_regex(sgc_ToneProt.currentTestWord$, "^[a-zA-Z]+2$") > 0
253 if rindex_regex(sgc_ToneProt.choiceReference$, "^[a-zA-Z]+0$") > 0 and timeMinimum < timeMaximum
254 sgc_ToneProt.choiceReference$ = replace_regex$(sgc_ToneProt.choiceReference$, "0", "2", 0)
256 # A single fourth tone is often misidentified as a neutral tone,
257 # A real neutral tone would be too low or too narrow and be discarded
258 # Leaves us with erroneous tones 2 and 3
259 elsif rindex_regex(sgc_ToneProt.currentTestWord$, "^[a-zA-Z]+4$") > 0
260 if rindex_regex(sgc_ToneProt.choiceReference$, "^[a-zA-Z]+0$") > 0 and timeMaximum < timeMinimum
261 sgc_ToneProt.choiceReference$ = replace_regex$(sgc_ToneProt.choiceReference$, "0", "4", 0)
265 # Second (last) syllable, 0<->6 exchanges and 2<->3
266 # A recognized 0 after a 4 can be a 2: 4-0 => 4-2
267 if rindex_regex(sgc_ToneProt.currentTestWord$, "[a-zA-Z]+[4][a-zA-Z]+2$") > 0
268 if rindex_regex(sgc_ToneProt.choiceReference$, "[a-zA-Z]+[4][a-zA-Z]+[0]$") > 0
269 sgc_ToneProt.choiceReference$ = replace_regex$(sgc_ToneProt.choiceReference$, "[0]$", "2", 0)
272 # A final 6 after a valid tone is often a recognition error
273 # A final 6 can be a 0
274 if rindex_regex(sgc_ToneProt.currentTestWord$, "[a-zA-Z]+[0-9][a-zA-Z]+0$") > 0
275 if rindex_regex(sgc_ToneProt.choiceReference$, "[a-zA-Z]+[0-4][a-zA-Z]+6$") > 0
276 sgc_ToneProt.choiceReference$ = replace_regex$(sgc_ToneProt.choiceReference$, "6$", "0", 0)
278 # Second (last) syllable, 2<->3 exchanges after [23] tones
279 # A recognized 6 (or 3) after a valid tone [1-4] is mostly wrong, can be a 2
280 elsif rindex_regex(sgc_ToneProt.currentTestWord$, "[a-zA-Z]+[1-4][a-zA-Z]+2$") > 0
281 if rindex_regex(sgc_ToneProt.choiceReference$, "[a-zA-Z]+[1-4][a-zA-Z]+[36]$") > 0
282 sgc_ToneProt.choiceReference$ = replace_regex$(sgc_ToneProt.choiceReference$, "[36]$", "2", 0)
284 # A recognized 6 after a [23] is mostly wrong, can be a 3
285 elsif rindex_regex(sgc_ToneProt.currentTestWord$, "[a-zA-Z]+[23][a-zA-Z]+3$") > 0
286 if rindex_regex(sgc_ToneProt.choiceReference$, "[a-zA-Z]+[23][a-zA-Z]+[26]$") > 0
287 sgc_ToneProt.choiceReference$ = replace_regex$(sgc_ToneProt.choiceReference$, "[26]$", "3", 0)
289 # A recognized 6 after a [3] is mostly wrong, can be a 1
290 elsif rindex_regex(sgc_ToneProt.currentTestWord$, "[a-zA-Z]+[3][a-zA-Z]+1$") > 0
291 if rindex_regex(sgc_ToneProt.choiceReference$, "[a-zA-Z]+[3][a-zA-Z]+[6]$") > 0
292 sgc_ToneProt.choiceReference$ = replace_regex$(sgc_ToneProt.choiceReference$, "[6]$", "1", 0)
296 # Clean up odd things constructed with special cases
297 # Target is 3-3, but recognized is 2-3, which is CORRECT. Change it into 3-3
298 if rindex_regex(sgc_ToneProt.currentTestWord$, "[a-zA-Z]+[3][a-zA-Z]+[3]$") > 0
299 if rindex_regex(sgc_ToneProt.choiceReference$, "[a-zA-Z]+[2][a-zA-Z]+[3]$") > 0
300 sgc_ToneProt.choiceReference$ = replace_regex$(sgc_ToneProt.choiceReference$, "[2]([a-zA-Z]+[3])$", "3\1", 0)
305 # If wrong, then undo all changes
306 if sgc_ToneProt.currentTestWord$ != sgc_ToneProt.choiceReference$
307 sgc_ToneProt.choiceReference$ = originalRecognizedWord$
310 sgc_ToneProt.toneChoiceReference$ = sgc_ToneProt.choiceReference$
312 ###############################################
316 ###############################################
317 result$ = "'tab$''sgc_ToneProt.currentTestWord$''tab$''sgc_ToneProt.choiceReference$''tab$''sgc_ToneProt.newUpperRegister''tab$''sgc_ToneProt.newToneRange''tab$''speedFactor''tab$''sgc_ToneProt.registerUsed$''tab$''rangeUsed$'"
318 if sgc_ToneProt.currentTestWord$ = sgc_ToneProt.toneChoiceReference$
319 result$ = "Correct:"+result$
321 result$ = "Wrong:"+result$
324 # Initialize result texts
325 .recognitionText$ = "'sgc_ToneProt.currentTestWord$': "
326 .choiceText$ = replace_regex$(sgc_ToneProt.choiceReference$, "6", "\?", 0)
327 .feedbackText$ = "----"
329 # Separate tone from pronunciation errors
330 currentToneWord$ = replace_regex$(sgc_ToneProt.currentTestWord$, "[a-z]+", "\*", 0)
331 choiceToneReference$ = replace_regex$(sgc_ToneProt.choiceReference$, "[a-z]+", "\*", 0)
333 # Determine what should be told to the student
334 if sgc_ToneProt.registerUsed$ = "Low"
335 .recognitionText$ = .recognitionText$ + "???"
336 for i from 1 to numberOfFeedbackRows
337 select Table ToneFeedback
338 .toneOne$ = Get value... 'i' T1
339 .toneTwo$ = Get value... 'i' T2
340 .toneText$ = Get value... 'i' Feedback
343 .feedbackText$ = .toneText$
347 elsif rangeUsed$ = "Narrow"
348 .recognitionText$ = .recognitionText$ + "???"
349 for i from 1 to numberOfFeedbackRows
350 select Table ToneFeedback
351 .toneOne$ = Get value... 'i' T1
352 .toneTwo$ = Get value... 'i' T2
353 .toneText$ = Get value... 'i' Feedback
355 if .toneOne$ = "Narrow"
356 .feedbackText$ = .toneText$
360 elsif sgc_ToneProt.registerUsed$ = "High"
361 .recognitionText$ = .recognitionText$ + .choiceText$
362 for i from 1 to numberOfFeedbackRows
363 select Table ToneFeedback
364 .toneOne$ = Get value... 'i' T1
365 .toneTwo$ = Get value... 'i' T2
366 .toneText$ = Get value... 'i' Feedback
368 if .toneOne$ = "High"
369 .feedbackText$ = .toneText$
373 elsif rangeUsed$ = "Wide"
374 .recognitionText$ = .recognitionText$ + .choiceText$
375 for i from 1 to numberOfFeedbackRows
376 select Table ToneFeedback
377 .toneOne$ = Get value... 'i' T1
378 .toneTwo$ = Get value... 'i' T2
379 .toneText$ = Get value... 'i' Feedback
381 if .toneOne$ = "Wide"
382 .feedbackText$ = .toneText$
386 # Bad tones, first handle first syllable
387 elsif rindex_regex(sgc_ToneProt.choiceReference$, "^[a-zA-Z]+6") > 0
388 .recognitionText$ = .recognitionText$ + .choiceText$
390 for i from 1 to numberOfFeedbackRows
391 select Table ToneFeedback
392 .toneOne$ = Get value... 'i' T1
393 .toneTwo$ = Get value... 'i' T2
394 .toneText$ = Get value... 'i' Feedback
399 .recognitionText$ = .recognitionText$ + " ('.toneText$')"
401 elsif rindex_regex(sgc_ToneProt.currentTestWord$, "^[a-zA-Z]+'.toneOne$'") > 0 and .toneTwo$ = "-"
402 .feedbackText$ = .feedbackText$ + .toneText$ + " "
405 # Bad tones, then handle second syllable
406 elsif rindex_regex(sgc_ToneProt.choiceReference$, "[a-zA-Z]+6$") > 0
407 .recognitionText$ = .recognitionText$ + .choiceText$
409 for i from 1 to numberOfFeedbackRows
410 select Table ToneFeedback
411 .toneOne$ = Get value... 'i' T1
412 .toneTwo$ = Get value... 'i' T2
413 .toneText$ = Get value... 'i' Feedback
418 .recognitionText$ = .recognitionText$ + " ('.toneText$')"
420 elsif rindex_regex(sgc_ToneProt.currentTestWord$, "[a-zA-Z]+'.toneOne$'$") > 0 and .toneTwo$ = "-"
421 .feedbackText$ = .feedbackText$ + .toneText$ + " "
424 # Just plain wrong tones
425 elsif currentToneWord$ <> choiceToneReference$
426 .recognitionText$ = .recognitionText$ + .choiceText$
427 for i from 1 to numberOfFeedbackRows
428 select Table ToneFeedback
429 .toneOne$ = Get value... 'i' T1
430 .toneTwo$ = Get value... 'i' T2
431 .toneText$ = Get value... 'i' Feedback
433 if rindex_regex(sgc_ToneProt.currentTestWord$, "^[a-zA-Z]+'.toneOne$'$") > 0 and .toneTwo$ = "-"
434 .feedbackText$ = .toneText$
435 elsif rindex_regex(sgc_ToneProt.currentTestWord$, "^[a-zA-Z]+'.toneOne$'[a-zA-Z]+'.toneTwo$'$") > 0
436 .feedbackText$ = .toneText$
437 elsif .toneOne$ = "Wrong"
438 .recognitionText$ = .recognitionText$ + " ('.toneText$')"
444 .recognitionText$ = .recognitionText$ + .choiceText$
445 for i from 1 to numberOfFeedbackRows
446 select Table ToneFeedback
447 .toneOne$ = Get value... 'i' T1
448 .toneTwo$ = Get value... 'i' T2
449 .toneText$ = Get value... 'i' Feedback
451 if .toneOne$ = "Correct"
452 .feedbackText$ = .toneText$
461 Create Table with column names... Feedback 3 Text
462 Set string value... 1 Text '.recognitionText$'
463 Set string value... 2 Text '.feedbackText$'
464 Set string value... 3 Text '.label$'
467 select Table ToneFeedback
471 freqTop = 1.5 * sgc_ToneProt.upperRegisterInput
473 # Replace recorded sound with new sound
474 if not fileReadable(sgc_ToneProt.currentSound$)
475 select Sound 'sgc_ToneProt.currentSound$'
478 Copy... 'sgc_ToneProt.currentSound$'
484 plus Pitch 'sgc_ToneProt.currentTestWord$'