Handle broken wordlists and audio files
[sgc2.git] / ToneProt / SGC_ToneProt.praat
blob97584e10cba6f2ce72e21a65bfa99e805bbc3548
1 #! praat
3 #     SpeakGoodChinese: SGC_ToneRecognizer.praat processes student utterances 
4 #     and generates a report on their tone production
5 #     
6 #     Copyright (C) 2007-2010  R.J.J.H. van Son
7 #     The SpeakGoodChinese team are:
8 #     Guangqin Chen, Zhonyan Chen, Stefan de Koning, Eveline van Hagen, 
9 #     Rob van Son, Dennis Vierkant, David Weenink
10
11 #     This program is free software; you can redistribute it and/or modify
12 #     it under the terms of the GNU General Public License as published by
13 #     the Free Software Foundation; either version 2 of the License, or
14 #     (at your option) any later version.
15
16 #     This program is distributed in the hope that it will be useful,
17 #     but WITHOUT ANY WARRANTY; without even the implied warranty of
18 #     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19 #     GNU General Public License for more details.
20
21 #     You should have received a copy of the GNU General Public License
22 #     along with this program; if not, write to the Free Software
23 #     Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
24
25 # Needs:
26 # include ToneRecognition.praat
27 # include ToneScript.praat
28 # procedure loadTable
30 procedure sgc_ToneProt sgc_ToneProt.currentSound$ sgc_ToneProt.pinyin$ sgc_ToneProt.register sgc_ToneProt.proficiency sgc_ToneProt.language$
31         # Remove if included in main program!
32         sgc_ToneProt.viewportMargin = 5
34         sgc_ToneProt.precision = 3
35         if sgc_ToneProt.proficiency
36                 sgc_ToneProt.precision = 1.5
37         endif
38         # Stick to the raw recognition results or not
39         sgc_ToneProt.ultraStrict = sgc_ToneProt.proficiency
41         
42         # Read and select the feedbacktext
43         call loadTable ToneFeedback_'sgc_ToneProt.language$'
44         Rename... ToneFeedback
45         numberOfFeedbackRows = Get number of rows
47         # Clean up input
48         if sgc_ToneProt.pinyin$ <> ""
49         sgc_ToneProt.pinyin$ = replace_regex$(sgc_ToneProt.pinyin$, "^\s*(.+)\s*$", "\1", 1)
50         sgc_ToneProt.pinyin$ = replace_regex$(sgc_ToneProt.pinyin$, "5", "0", 0)
51         endif
53         # Reduction (lower sgc_ToneProt.register and narrow range) means errors
54         # The oposite mostly not. Asymmetry alows more room upward
55         # than downward (asymmetry = 2 => highBoundaryFactor ^ 2)
56         asymmetry = 2
58         # Kill octave jumps: DANGEROUS
59         killOctaveJumps = 0
61         # Limit pitch range
62         sgc_ToneProt.minimumPitch = 50
63         sgc_ToneProt.maximumPitch = 500
64         if sgc_ToneProt.register > 400
65         sgc_ToneProt.minimumPitch = 60
66         sgc_ToneProt.maximumPitch = 600
67         elsif sgc_ToneProt.register > 250
68         sgc_ToneProt.minimumPitch = 50
69         sgc_ToneProt.maximumPitch = 500
70         else
71         sgc_ToneProt.minimumPitch = 40
72         sgc_ToneProt.maximumPitch = 400
73         endif
75         sgc_ToneProt.currentTestWord$ = sgc_ToneProt.pinyin$
76         spacing = 0.5
77         sgc_ToneProt.precisionFactor = 2^(sgc_ToneProt.precision/12)
78         highBoundaryFactor = sgc_ToneProt.precisionFactor ^ asymmetry
79         lowBoundaryFactor = 1/sgc_ToneProt.precisionFactor
81         # Generate reference example
82         # Start with a range of 1 octave and a speed factor of 1
83         toneRange = 1.0
84         speedFactor = 1.0
85         sgc_ToneProt.upperRegisterInput = sgc_ToneProt.register
86         call toneScript 'sgc_ToneProt.currentTestWord$' 'sgc_ToneProt.upperRegisterInput' 1 1 CorrectPitch
87         # Get range and top
88         select Pitch 'sgc_ToneProt.currentTestWord$'
89         sgc_ToneProt.durationModel = Get total duration
90         maximumModelFzero = Get quantile... 0 0 0.95 Hertz
91         minimumModelFzero = Get quantile... 0 0 0.05 Hertz
92         sgc_ToneProt.modelPitchRange = 2
93         if minimumModelFzero > 0
94         sgc_ToneProt.modelPitchRange = maximumModelFzero / minimumModelFzero
95         endif
97         # Get the sounds
98         if fileReadable(sgc_ToneProt.currentSound$)
99         Read from file... 'sgc_ToneProt.currentSound$'
100         Rename... Source
101         else
102         select Sound 'sgc_ToneProt.currentSound$'
103         Copy... Source
104         endif
106         # Calculate pitch
107         select Sound Source
108         durationSource = Get total duration
109         call convert2Pitch 'sgc_ToneProt.minimumPitch' 'sgc_ToneProt.maximumPitch'
110         te.recordedPitch = convert2Pitch.object
111         Rename... SourcePitch
113         # It is rather dangerous to kill Octave errors, so be careful
114         if killOctaveJumps > 0
115         Rename... OldSource
116         Kill octave jumps
117         Rename... SourcePitch
118         te.recordedPitch = selected("Pitch")
119         select Pitch OldSource
120         Remove
121         endif
123         # Remove all pitch points outside a band around the upper sgc_ToneProt.register
124         select te.recordedPitch
125         upperCutOff = 1.7*sgc_ToneProt.upperRegisterInput
126         lowerCutOff = sgc_ToneProt.upperRegisterInput/4
127         Formula... if self > 'upperCutOff' then -1 else self endif
128         Formula... if self < 'lowerCutOff' then -1 else self endif
130         # Get range and top
131         select te.recordedPitch
132         maximumRecFzero = Get quantile... 0 0 0.95 Hertz
133         timeMaximum = Get time of maximum... 0 0 Hertz Parabolic
134         minimumRecFzero = Get quantile... 0 0 0.05 Hertz
135         timeMinimum = Get time of minimum... 0 0 Hertz Parabolic
136         if maximumRecFzero = undefined
137         # Determine what should be told to the student
138         .recognitionText$ =  "'sgc_ToneProt.currentTestWord$': ???"
139         for i from 1 to numberOfFeedbackRows
140                 select Table ToneFeedback
141                 .toneOne$ = Get value... 'i' T1
142                 .toneTwo$ = Get value... 'i' T2
143                 .toneText$ = Get value... 'i' Feedback
144                         .label$ = "Unknown"
146                 if .toneOne$ = "NoSound"
147                 .feedbackText$ = .toneText$
148                 endif
149         endfor
151         #exit Error, nothing recorded
152                 goto END
153         endif
154         recPitchRange = 2
155         if minimumRecFzero > 0
156            recPitchRange = maximumRecFzero / minimumRecFzero
157         endif
158         sgc_ToneProt.newUpperRegister = maximumRecFzero / maximumModelFzero * sgc_ToneProt.upperRegisterInput
159         sgc_ToneProt.newToneRange = recPitchRange / sgc_ToneProt.modelPitchRange
161         sgc_ToneProt.registerUsed$ = "OK"
162         rangeUsed$ = "OK"
163         # Advanced speakers must not speak too High, or too "Dramatic"
164         # Beginning speakers also not too Low or too Narrow ranges
165         if sgc_ToneProt.newUpperRegister > highBoundaryFactor * sgc_ToneProt.upperRegisterInput
166            sgc_ToneProt.newUpperRegister = highBoundaryFactor * sgc_ToneProt.upperRegisterInput
167            sgc_ToneProt.registerUsed$ = "High"
168         elsif not sgc_ToneProt.proficiency and sgc_ToneProt.newUpperRegister < lowBoundaryFactor * sgc_ToneProt.upperRegisterInput
169            sgc_ToneProt.newUpperRegister = lowBoundaryFactor * sgc_ToneProt.upperRegisterInput
170            sgc_ToneProt.registerUsed$ = "Low"
171         endif
172         
173         if sgc_ToneProt.newToneRange > highBoundaryFactor
174            sgc_ToneProt.newToneRange = highBoundaryFactor
175            rangeUsed$ = "Wide"
176         elsif not sgc_ToneProt.proficiency and sgc_ToneProt.newToneRange < lowBoundaryFactor and not sgc_ToneProt.proficiency
177                 # Don't do this for advanced speakers
178            sgc_ToneProt.newToneRange = lowBoundaryFactor
179            rangeUsed$ = "Narrow"
180         endif
182         # Duration 
183         if sgc_ToneProt.durationModel > spacing
184            speedFactor = (durationSource - spacing) / (sgc_ToneProt.durationModel - spacing)
185         endif
187         # Round values
188         sgc_ToneProt.newUpperRegister = round(sgc_ToneProt.newUpperRegister)
190         # Remove all pitch points outside a band around the upper sgc_ToneProt.register
191         select te.recordedPitch
192         upperCutOff = 1.5*sgc_ToneProt.newUpperRegister
193         lowerCutOff = sgc_ToneProt.newUpperRegister/3
194         Formula... if self > 'upperCutOff' then -1 else self endif
195         Formula... if self < 'lowerCutOff' then -1 else self endif
197         if killOctaveJumps > 0
198         Rename... OldSourcePitch
199         Kill octave jumps
200         Rename... SourcePitch
201         te.recordedPitch = selected("Pitch")
202         select Pitch OldSourcePitch
203         Remove
204         endif
206         # It is good to have the lowest and highest pitch frequencies
207         select te.recordedPitch
208         timeMaximum = Get time of maximum... 0 0 Hertz Parabolic
209         timeMinimum = Get time of minimum... 0 0 Hertz Parabolic
211         # Clean up the old example pitch
212         select Pitch 'sgc_ToneProt.currentTestWord$'
213         Remove
215         # Do the tone recognition
216         call FreeToneRecognition 'sgc_ToneProt.currentTestWord$' "REUSEPITCH" "" 'sgc_ToneProt.newUpperRegister' 'sgc_ToneProt.newToneRange' 'speedFactor'
217         call toneScript 'sgc_ToneProt.currentTestWord$' 'sgc_ToneProt.upperRegisterInput' 'sgc_ToneProt.newToneRange' 'speedFactor' CorrectPitch
219         # Special cases
220         originalRecognizedWord$ = sgc_ToneProt.choiceReference$
221         if  sgc_ToneProt.ultraStrict = 0
222         # First syllable: 2<->3 (6) exchanges (incl 6)
223         if rindex_regex(sgc_ToneProt.currentTestWord$, "^[a-zA-Z]+2[a-zA-Z]+[0-4]$") > 0
224                 if rindex_regex(sgc_ToneProt.choiceReference$, "^[a-zA-Z]+[36][a-zA-Z]+[0-4]$") > 0
225                 sgc_ToneProt.choiceReference$ = replace_regex$(sgc_ToneProt.choiceReference$, "[36]([a-zA-Z]+[0-4])$", "2\1", 0)
226                 endif
227         elsif rindex_regex(sgc_ToneProt.currentTestWord$, "^[a-zA-Z]+3[a-zA-Z]+[0-4]$") > 0
228                 if rindex_regex(sgc_ToneProt.choiceReference$, "^[a-zA-Z]+[26][a-zA-Z]+[0-4]$") > 0
229                 sgc_ToneProt.choiceReference$ = replace_regex$(sgc_ToneProt.choiceReference$, "[26]([a-zA-Z]+[0-4])$", "3\1", 0)
230                 endif
231         # A single second tone is often misidentified as a neutral tone, 
232         # A real neutral tone would be too low or too narrow and be discarded
233         # Leaves us with erroneous tone 4
234         elsif rindex_regex(sgc_ToneProt.currentTestWord$, "^[a-zA-Z]+2$") > 0
235                 if rindex_regex(sgc_ToneProt.choiceReference$, "^[a-zA-Z]+0$") > 0 and timeMinimum < timeMaximum
236                 sgc_ToneProt.choiceReference$ = replace_regex$(sgc_ToneProt.choiceReference$, "0", "2", 0)
237                 endif
238         # A single fourth tone is often misidentified as a neutral tone, 
239         # A real neutral tone would be too low or too narrow and be discarded
240         # Leaves us with erroneous tones 2 and 3
241         elsif rindex_regex(sgc_ToneProt.currentTestWord$, "^[a-zA-Z]+4$") > 0
242                 if rindex_regex(sgc_ToneProt.choiceReference$, "^[a-zA-Z]+0$") > 0 and timeMaximum < timeMinimum
243                 sgc_ToneProt.choiceReference$ = replace_regex$(sgc_ToneProt.choiceReference$, "0", "4", 0)
244                 endif
245         endif
247         # Second (last) syllable, 0<->6 exchanges and 2<->3
248         # A recognized 0 after a 4 can be a 2: 4-0 => 4-2
249         if rindex_regex(sgc_ToneProt.currentTestWord$, "[a-zA-Z]+[4][a-zA-Z]+2$") > 0
250                 if rindex_regex(sgc_ToneProt.choiceReference$, "[a-zA-Z]+[4][a-zA-Z]+[0]$") > 0
251                 sgc_ToneProt.choiceReference$ = replace_regex$(sgc_ToneProt.choiceReference$, "[0]$", "2", 0)
252                 endif
253         endif
254         # A final 6 after a valid tone is often a recognition error
255         # A final 6 can be a 0
256         if rindex_regex(sgc_ToneProt.currentTestWord$, "[a-zA-Z]+[0-9][a-zA-Z]+0$") > 0
257                 if rindex_regex(sgc_ToneProt.choiceReference$, "[a-zA-Z]+[0-4][a-zA-Z]+6$") > 0
258                 sgc_ToneProt.choiceReference$ = replace_regex$(sgc_ToneProt.choiceReference$, "6$", "0", 0)
259                 endif
260         # Second (last) syllable, 2<->3 exchanges after [23] tones
261         # A recognized 6 (or 3) after a valid tone [1-4] is mostly wrong, can be a 2
262         elsif rindex_regex(sgc_ToneProt.currentTestWord$, "[a-zA-Z]+[1-4][a-zA-Z]+2$") > 0
263                 if rindex_regex(sgc_ToneProt.choiceReference$, "[a-zA-Z]+[1-4][a-zA-Z]+[36]$") > 0
264                 sgc_ToneProt.choiceReference$ = replace_regex$(sgc_ToneProt.choiceReference$, "[36]$", "2", 0)
265                 endif
266         # A recognized 6 after a [23] is mostly wrong, can be a 3
267         elsif rindex_regex(sgc_ToneProt.currentTestWord$, "[a-zA-Z]+[23][a-zA-Z]+3$") > 0
268                 if rindex_regex(sgc_ToneProt.choiceReference$, "[a-zA-Z]+[23][a-zA-Z]+[26]$") > 0
269                 sgc_ToneProt.choiceReference$ = replace_regex$(sgc_ToneProt.choiceReference$, "[26]$", "3", 0)
270                 endif
271         # A recognized 6 after a [3] is mostly wrong, can be a 1
272         elsif rindex_regex(sgc_ToneProt.currentTestWord$, "[a-zA-Z]+[3][a-zA-Z]+1$") > 0
273                 if rindex_regex(sgc_ToneProt.choiceReference$, "[a-zA-Z]+[3][a-zA-Z]+[6]$") > 0
274                 sgc_ToneProt.choiceReference$ = replace_regex$(sgc_ToneProt.choiceReference$, "[6]$", "1", 0)
275                 endif
276         endif
278         # Clean up odd things constructed with special cases
279         # Target is 3-3, but recognized is 2-3, which is CORRECT. Change it into 3-3
280         if rindex_regex(sgc_ToneProt.currentTestWord$, "[a-zA-Z]+[3][a-zA-Z]+[3]$") > 0
281                 if rindex_regex(sgc_ToneProt.choiceReference$, "[a-zA-Z]+[2][a-zA-Z]+[3]$") > 0
282                 sgc_ToneProt.choiceReference$ = replace_regex$(sgc_ToneProt.choiceReference$, "[2]([a-zA-Z]+[3])$", "3\1", 0)
283                 endif
284         endif
285         endif
287         # If wrong, then undo all changes
288         if sgc_ToneProt.currentTestWord$ != sgc_ToneProt.choiceReference$
289         sgc_ToneProt.choiceReference$ = originalRecognizedWord$
290         endif
292         sgc_ToneProt.toneChoiceReference$ = sgc_ToneProt.choiceReference$
294         ###############################################
295         #
296         # Report
297         #
298         ###############################################
299         result$ = "'tab$''sgc_ToneProt.currentTestWord$''tab$''sgc_ToneProt.choiceReference$''tab$''sgc_ToneProt.newUpperRegister''tab$''sgc_ToneProt.newToneRange''tab$''speedFactor''tab$''sgc_ToneProt.registerUsed$''tab$''rangeUsed$'"
300         if sgc_ToneProt.currentTestWord$ = sgc_ToneProt.toneChoiceReference$
301            result$ = "Correct:"+result$
302         else
303            result$ = "Wrong:"+result$
304         endif
306         # Initialize result texts
307         .recognitionText$ =  "'sgc_ToneProt.currentTestWord$': "
308         .choiceText$ = replace_regex$(sgc_ToneProt.choiceReference$, "6", "\?", 0)
309         .feedbackText$ = "----"
311         # Separate tone from pronunciation errors
312         currentToneWord$ = replace_regex$(sgc_ToneProt.currentTestWord$, "[a-z]+", "\*", 0)
313         choiceToneReference$ = replace_regex$(sgc_ToneProt.choiceReference$, "[a-z]+", "\*", 0)
315         # Determine what should be told to the student
316         if sgc_ToneProt.registerUsed$ = "Low"
317         .recognitionText$ = .recognitionText$ + "???"
318         for i from 1 to numberOfFeedbackRows
319                 select Table ToneFeedback
320                 .toneOne$ = Get value... 'i' T1
321                 .toneTwo$ = Get value... 'i' T2
322                 .toneText$ = Get value... 'i' Feedback
324                 if .toneOne$ = "Low"
325                 .feedbackText$ = .toneText$
326                                 .label$ = .toneOne$
327                 endif
328         endfor
329         elsif rangeUsed$ = "Narrow"
330         .recognitionText$ = .recognitionText$ + "???"
331         for i from 1 to numberOfFeedbackRows
332                 select Table ToneFeedback
333                 .toneOne$ = Get value... 'i' T1
334                 .toneTwo$ = Get value... 'i' T2
335                 .toneText$ = Get value... 'i' Feedback
337                 if .toneOne$ = "Narrow"
338                 .feedbackText$ = .toneText$
339                                 .label$ = .toneOne$
340                 endif
341         endfor
342         elsif sgc_ToneProt.registerUsed$ = "High"
343         .recognitionText$ = .recognitionText$ + .choiceText$
344         for i from 1 to numberOfFeedbackRows
345                 select Table ToneFeedback
346                 .toneOne$ = Get value... 'i' T1
347                 .toneTwo$ = Get value... 'i' T2
348                 .toneText$ = Get value... 'i' Feedback
350                 if .toneOne$ = "High"
351                 .feedbackText$ = .toneText$
352                                 .label$ = .toneOne$
353                 endif
354         endfor
355         elsif rangeUsed$ = "Wide"
356         .recognitionText$ = .recognitionText$ + .choiceText$
357         for i from 1 to numberOfFeedbackRows
358                 select Table ToneFeedback
359                 .toneOne$ = Get value... 'i' T1
360                 .toneTwo$ = Get value... 'i' T2
361                 .toneText$ = Get value... 'i' Feedback
363                 if .toneOne$ = "Wide"
364                 .feedbackText$ = .toneText$
365                                 .label$ = .toneOne$
366                 endif
367         endfor
368         # Bad tones, first handle first syllable
369         elsif rindex_regex(sgc_ToneProt.choiceReference$, "^[a-zA-Z]+6") > 0
370         .recognitionText$ = .recognitionText$ + .choiceText$
371         # First syllable
372         for i from 1 to numberOfFeedbackRows
373                 select Table ToneFeedback
374                 .toneOne$ = Get value... 'i' T1
375                 .toneTwo$ = Get value... 'i' T2
376                 .toneText$ = Get value... 'i' Feedback
378                 # 
379                 .feedbackText$ = ""
380                 if .toneOne$ = "6"
381                 .recognitionText$ = .recognitionText$ + " ('.toneText$')"
382                                 .label$ = .toneOne$
383                 elsif rindex_regex(sgc_ToneProt.currentTestWord$, "^[a-zA-Z]+'.toneOne$'") > 0 and .toneTwo$ = "-"
384                 .feedbackText$ = .feedbackText$ + .toneText$ + " "
385                 endif
386         endfor
387         # Bad tones, then handle second syllable
388         elsif rindex_regex(sgc_ToneProt.choiceReference$, "[a-zA-Z]+6$") > 0
389         .recognitionText$ = .recognitionText$ + .choiceText$
390         # Last syllable
391         for i from 1 to numberOfFeedbackRows
392                 select Table ToneFeedback
393                 .toneOne$ = Get value... 'i' T1
394                 .toneTwo$ = Get value... 'i' T2
395                 .toneText$ = Get value... 'i' Feedback
397                 # 
398                 .feedbackText$ = ""
399                 if .toneOne$ = "6"
400                 .recognitionText$ = .recognitionText$ + " ('.toneText$')"
401                                 .label$ = .toneOne$
402                 elsif rindex_regex(sgc_ToneProt.currentTestWord$, "[a-zA-Z]+'.toneOne$'$") > 0 and .toneTwo$ = "-"
403                 .feedbackText$ = .feedbackText$ + .toneText$ + " "
404                 endif
405         endfor
406         # Just plain wrong tones
407         elsif currentToneWord$ <> choiceToneReference$
408         .recognitionText$ = .recognitionText$ + .choiceText$
409         for i from 1 to numberOfFeedbackRows
410                 select Table ToneFeedback
411                 .toneOne$ = Get value... 'i' T1
412                 .toneTwo$ = Get value... 'i' T2
413                 .toneText$ = Get value... 'i' Feedback
415                 if rindex_regex(sgc_ToneProt.currentTestWord$, "^[a-zA-Z]+'.toneOne$'$") > 0 and .toneTwo$ = "-"
416                 .feedbackText$ = .toneText$
417                 elsif rindex_regex(sgc_ToneProt.currentTestWord$, "^[a-zA-Z]+'.toneOne$'[a-zA-Z]+'.toneTwo$'$") > 0
418                 .feedbackText$ = .toneText$
419                 elsif .toneOne$ = "Wrong"
420                 .recognitionText$ = .recognitionText$ + " ('.toneText$')"
421                                 .label$ = .toneOne$
422                 endif
423         endfor
424         # Correct
425         else
426         .recognitionText$ = .recognitionText$ + .choiceText$
427         for i from 1 to numberOfFeedbackRows
428                 select Table ToneFeedback
429                 .toneOne$ = Get value... 'i' T1
430                 .toneTwo$ = Get value... 'i' T2
431                 .toneText$ = Get value... 'i' Feedback
433                 if .toneOne$ = "Correct"
434                 .feedbackText$ = .toneText$
435                                 .label$ = .toneOne$
436                 endif
437         endfor
438         endif
440         label END
442         # Write out result
443         Create Table with column names... Feedback 3 Text
444         Set string value... 1 Text '.recognitionText$'
445         Set string value... 2 Text '.feedbackText$'
446         Set string value... 3 Text '.label$'
448         # Clean up
449         select Table ToneFeedback
450         Remove
452         # Show pitch tracks
453     freqTop = 1.5 * sgc_ToneProt.upperRegisterInput
455         # Replace recorded sound with new sound
456         if not fileReadable(sgc_ToneProt.currentSound$)
457         select Sound 'sgc_ToneProt.currentSound$'
458                 Remove
459                 select Sound Source
460         Copy... 'sgc_ToneProt.currentSound$'
461         endif
464         # Clean up
465         select Sound Source
466         plus Pitch 'sgc_ToneProt.currentTestWord$'
467         Remove
468 endproc