1*576fffbcSThomas Huth# ...
2*576fffbcSThomas Huth#
3*576fffbcSThomas Huth# Copyright (c) 2019 Philippe Mathieu-Daudé <f4bug@amsat.org>
4*576fffbcSThomas Huth#
5*576fffbcSThomas Huth# This work is licensed under the terms of the GNU GPL, version 2 or
6*576fffbcSThomas Huth# later. See the COPYING file in the top-level directory.
7*576fffbcSThomas Huth
8*576fffbcSThomas Huthimport re
9*576fffbcSThomas Huthimport logging
10*576fffbcSThomas Huth
11*576fffbcSThomas Huthfrom . import has_cmd, run_cmd
12*576fffbcSThomas Huth
13*576fffbcSThomas Huthdef tesseract_available(expected_version):
14*576fffbcSThomas Huth    if not has_cmd('tesseract'):
15*576fffbcSThomas Huth        return False
16*576fffbcSThomas Huth    (stdout, stderr, ret) = run_cmd([ 'tesseract', '--version'])
17*576fffbcSThomas Huth    if ret:
18*576fffbcSThomas Huth        return False
19*576fffbcSThomas Huth    version = stdout.split()[1]
20*576fffbcSThomas Huth    return int(version.split('.')[0]) >= expected_version
21*576fffbcSThomas Huth
22*576fffbcSThomas Huthdef tesseract_ocr(image_path, tesseract_args=''):
23*576fffbcSThomas Huth    console_logger = logging.getLogger('console')
24*576fffbcSThomas Huth    console_logger.debug(image_path)
25*576fffbcSThomas Huth    (stdout, stderr, ret) = run_cmd(['tesseract', image_path,
26*576fffbcSThomas Huth                                     'stdout'])
27*576fffbcSThomas Huth    if ret:
28*576fffbcSThomas Huth        return None
29*576fffbcSThomas Huth    lines = []
30*576fffbcSThomas Huth    for line in stdout.split('\n'):
31*576fffbcSThomas Huth        sline = line.strip()
32*576fffbcSThomas Huth        if len(sline):
33*576fffbcSThomas Huth            console_logger.debug(sline)
34*576fffbcSThomas Huth            lines += [sline]
35*576fffbcSThomas Huth    return lines
36