1# ...
2#
3# Copyright (c) 2019 Philippe Mathieu-Daudé <f4bug@amsat.org>
4#
5# This work is licensed under the terms of the GNU GPL, version 2 or
6# later. See the COPYING file in the top-level directory.
7
8import re
9import logging
10
11from . import has_cmd, run_cmd
12
13def tesseract_available(expected_version):
14    (has_tesseract, _) = has_cmd('tesseract')
15    if not has_tesseract:
16        return False
17    (stdout, stderr, ret) = run_cmd([ 'tesseract', '--version'])
18    if ret:
19        return False
20    version = stdout.split()[1]
21    return int(version.split('.')[0]) >= expected_version
22
23def tesseract_ocr(image_path, tesseract_args=''):
24    console_logger = logging.getLogger('console')
25    console_logger.debug(image_path)
26    (stdout, stderr, ret) = run_cmd(['tesseract', image_path,
27                                     'stdout'])
28    if ret:
29        return None
30    lines = []
31    for line in stdout.split('\n'):
32        sline = line.strip()
33        if len(sline):
34            console_logger.debug(sline)
35            lines += [sline]
36    return lines
37