# ... # # Copyright (c) 2019 Philippe Mathieu-Daudé # # This work is licensed under the terms of the GNU GPL, version 2 or # later. See the COPYING file in the top-level directory. import re import logging from . import has_cmd, run_cmd def tesseract_available(expected_version): (has_tesseract, _) = has_cmd('tesseract') if not has_tesseract: return False (stdout, stderr, ret) = run_cmd([ 'tesseract', '--version']) if ret: return False version = stdout.split()[1] return int(version.split('.')[0]) >= expected_version def tesseract_ocr(image_path, tesseract_args=''): console_logger = logging.getLogger('console') console_logger.debug(image_path) (stdout, stderr, ret) = run_cmd(['tesseract', image_path, 'stdout']) if ret: return None lines = [] for line in stdout.split('\n'): sline = line.strip() if len(sline): console_logger.debug(sline) lines += [sline] return lines