1576fffbcSThomas Huth# ...
2576fffbcSThomas Huth#
3576fffbcSThomas Huth# Copyright (c) 2019 Philippe Mathieu-Daudé <f4bug@amsat.org>
4576fffbcSThomas Huth#
5576fffbcSThomas Huth# This work is licensed under the terms of the GNU GPL, version 2 or
6576fffbcSThomas Huth# later. See the COPYING file in the top-level directory.
7576fffbcSThomas Huth
8576fffbcSThomas Huthimport re
9576fffbcSThomas Huthimport logging
10576fffbcSThomas Huth
11576fffbcSThomas Huthfrom . import has_cmd, run_cmd
12576fffbcSThomas Huth
13576fffbcSThomas Huthdef tesseract_available(expected_version):
14*59d10024SThomas Huth    (has_tesseract, _) = has_cmd('tesseract')
15*59d10024SThomas Huth    if not has_tesseract:
16576fffbcSThomas Huth        return False
17576fffbcSThomas Huth    (stdout, stderr, ret) = run_cmd([ 'tesseract', '--version'])
18576fffbcSThomas Huth    if ret:
19576fffbcSThomas Huth        return False
20576fffbcSThomas Huth    version = stdout.split()[1]
21576fffbcSThomas Huth    return int(version.split('.')[0]) >= expected_version
22576fffbcSThomas Huth
23576fffbcSThomas Huthdef tesseract_ocr(image_path, tesseract_args=''):
24576fffbcSThomas Huth    console_logger = logging.getLogger('console')
25576fffbcSThomas Huth    console_logger.debug(image_path)
26576fffbcSThomas Huth    (stdout, stderr, ret) = run_cmd(['tesseract', image_path,
27576fffbcSThomas Huth                                     'stdout'])
28576fffbcSThomas Huth    if ret:
29576fffbcSThomas Huth        return None
30576fffbcSThomas Huth    lines = []
31576fffbcSThomas Huth    for line in stdout.split('\n'):
32576fffbcSThomas Huth        sline = line.strip()
33576fffbcSThomas Huth        if len(sline):
34576fffbcSThomas Huth            console_logger.debug(sline)
35576fffbcSThomas Huth            lines += [sline]
36576fffbcSThomas Huth    return lines
37