【Python】画像から文字列を認識する(OCR)【コード】

PythonでTesseract-OCRを使って画像から文字認識をします。

まず、Tesseract-OCRをインストールします。(リンクは後日更新)

私はWindowsを使用しておりますので公式サイトからインストーラーをダウンロードしてインストールしました。

ここからはVisual Studio 2022での作業になります。

pyocrを環境に加えます。

import pyocr

いろいろ書きます。

#環境変数「PATH」にTesseract-OCRのパスを設定。
#Windowsの環境変数に設定している場合は不要。
path='C:\\Program Files\\Tesseract-OCR\\'
os.environ['PATH'] = os.environ['PATH'] + path

#pyocrにTesseractを指定する。
pyocr.tesseract.TESSERACT_CMD = r'C:\Program Files\Tesseract-OCR\tesseract.exe'
tools = pyocr.get_available_tools()
if len(tools) == 0:
    print("No OCR tool found")
    exit(1)
for tool in tools :
    print(tool.get_name())
tool = tools[0]
langs = tool.get_available_languages()
print(langs)

画像の変換関数を用意しておきます。

import cv2
from PIL import Image

class Converter(object):
    """ """
    def cv2pil(image):
        """ OpenCV -> PIL """
        new_image = image.copy()
        if new_image.ndim == 2:  # モノクロ
            pass
        elif new_image.shape[2] == 3:  # カラー
            new_image = cv2.cvtColor(new_image, cv2.COLOR_BGR2RGB)
        elif new_image.shape[2] == 4:  # 透過
            new_image = cv2.cvtColor(new_image, cv2.COLOR_BGRA2RGBA)
        new_image = Image.fromarray(new_image)
        return new_image
    
    def pil2cv(image):
        """ PIL -> OpenCV """
        new_image = np.array(image, dtype=np.uint8)
        if new_image.ndim == 2:  # モノクロ
            pass
        elif new_image.shape[2] == 3:  # カラー
            new_image = cv2.cvtColor(new_image, cv2.COLOR_RGB2BGR)
        elif new_image.shape[2] == 4:  # 透過
            new_image = cv2.cvtColor(new_image, cv2.COLOR_RGBA2BGRA)
        return new_image

文字を認識するコードを書いてみました。

    #画像の文字を抽出
    #temp_image = Image.open("target.png")
    temp_image = Converter.cv2pil(image)
    #builder = pyocr.builders.TextBuilder(tesseract_layout=6)
    #text = tool.image_to_string(temp_image, lang="jpn", builder=builder)
    #builder = pyocr.builders.WordBoxBuilder(tesseract_layout=6)
    builder = pyocr.builders.LineBoxBuilder(tesseract_layout=6)
    wordbox = tool.image_to_string(temp_image, lang="jpn", builder=builder)
    print("-------------------------------\n")
    #print(text)
    for word in wordbox:
        #print(word.content)
        print(word.content.replace(" ", ""))
        print(word.position)
        #result += word.content
        result += word.content.replace(" ", "")
        result += "\r\n"
    print("-------------------------------\n")

    #debug view
    if False:
        for word in wordbox:
            cv2.rectangle(image, word.position[0], word.position[1], (0, 0, 255), 2)         # image自体にrect上書きしているので注意！！
        cv2.imshow("Image", image)
        cv2.waitKey(5000)
        cv2.destroyAllWindows()

以上！