1*576fffbcSThomas Huth# ... 2*576fffbcSThomas Huth# 3*576fffbcSThomas Huth# Copyright (c) 2019 Philippe Mathieu-Daudé <f4bug@amsat.org> 4*576fffbcSThomas Huth# 5*576fffbcSThomas Huth# This work is licensed under the terms of the GNU GPL, version 2 or 6*576fffbcSThomas Huth# later. See the COPYING file in the top-level directory. 7*576fffbcSThomas Huth 8*576fffbcSThomas Huthimport re 9*576fffbcSThomas Huthimport logging 10*576fffbcSThomas Huth 11*576fffbcSThomas Huthfrom . import has_cmd, run_cmd 12*576fffbcSThomas Huth 13*576fffbcSThomas Huthdef tesseract_available(expected_version): 14*576fffbcSThomas Huth if not has_cmd('tesseract'): 15*576fffbcSThomas Huth return False 16*576fffbcSThomas Huth (stdout, stderr, ret) = run_cmd([ 'tesseract', '--version']) 17*576fffbcSThomas Huth if ret: 18*576fffbcSThomas Huth return False 19*576fffbcSThomas Huth version = stdout.split()[1] 20*576fffbcSThomas Huth return int(version.split('.')[0]) >= expected_version 21*576fffbcSThomas Huth 22*576fffbcSThomas Huthdef tesseract_ocr(image_path, tesseract_args=''): 23*576fffbcSThomas Huth console_logger = logging.getLogger('console') 24*576fffbcSThomas Huth console_logger.debug(image_path) 25*576fffbcSThomas Huth (stdout, stderr, ret) = run_cmd(['tesseract', image_path, 26*576fffbcSThomas Huth 'stdout']) 27*576fffbcSThomas Huth if ret: 28*576fffbcSThomas Huth return None 29*576fffbcSThomas Huth lines = [] 30*576fffbcSThomas Huth for line in stdout.split('\n'): 31*576fffbcSThomas Huth sline = line.strip() 32*576fffbcSThomas Huth if len(sline): 33*576fffbcSThomas Huth console_logger.debug(sline) 34*576fffbcSThomas Huth lines += [sline] 35*576fffbcSThomas Huth return lines 36