3 # Copyright (c) 2019 Philippe Mathieu-Daudé <f4bug@amsat.org>
5 # This work is licensed under the terms of the GNU GPL, version 2 or
6 # later. See the COPYING file in the top-level directory.
11 from avocado
.utils
import process
12 from avocado
.utils
.path
import find_command
, CmdNotFoundError
14 def tesseract_available(expected_version
):
16 find_command('tesseract')
17 except CmdNotFoundError
:
19 res
= process
.run('tesseract --version')
21 version
= res
.stdout_text
.split()[1]
23 version
= res
.stderr_text
.split()[1]
24 return int(version
.split('.')[0]) >= expected_version
26 match
= re
.match(r
'tesseract\s(\d)', res
)
29 # now this is guaranteed to be a digit
30 return int(match
.groups()[0]) >= expected_version
33 def tesseract_ocr(image_path
, tesseract_args
='', tesseract_version
=3):
34 console_logger
= logging
.getLogger('tesseract')
35 console_logger
.debug(image_path
)
36 if tesseract_version
== 4:
37 tesseract_args
+= ' --oem 1'
38 proc
= process
.run("tesseract {} {} stdout".format(tesseract_args
,
41 for line
in proc
.stdout_text
.split('\n'):
44 console_logger
.debug(sline
)