1# ...
2#
3# Copyright (c) 2019 Philippe Mathieu-Daudé <f4bug@amsat.org>
4#
5# This work is licensed under the terms of the GNU GPL, version 2 or
6# later. See the COPYING file in the top-level directory.
7
8import re
9import logging
10
11from . import has_cmd, run_cmd
12
13def tesseract_available(expected_version):
14    if not has_cmd('tesseract'):
15        return False
16    (stdout, stderr, ret) = run_cmd([ 'tesseract', '--version'])
17    if ret:
18        return False
19    version = stdout.split()[1]
20    return int(version.split('.')[0]) >= expected_version
21
22def tesseract_ocr(image_path, tesseract_args=''):
23    console_logger = logging.getLogger('console')
24    console_logger.debug(image_path)
25    (stdout, stderr, ret) = run_cmd(['tesseract', image_path,
26                                     'stdout'])
27    if ret:
28        return None
29    lines = []
30    for line in stdout.split('\n'):
31        sline = line.strip()
32        if len(sline):
33            console_logger.debug(sline)
34            lines += [sline]
35    return lines
36