Text Spotting and Reading

For text spotting and reading you need a model to detect parts of an image that contain text you want to read and a optical character recognition (ocr) model. For the latter you can either use our pretrained ocr model or create your own in the [DENK Vision AI Hub].

Result

import denk_sdk
import cv2


model_path_spotting = "models/spotting.denk"
model_path_ocr = "models/ocr.denk"
device = denk_sdk.Device.CPU # or GPU0, ...
token = ""
example_image = cv2.imread("test_image.jpg", cv2.IMREAD_COLOR)


# spotting network
spotting_pipeline = denk_sdk.InferencePipeline(token=token, device=device)
spotting = spotting_pipeline.add_model(model_path_spotting)
spotting.filter_by_confidence(0.5)

# ocr network
ocr_pipeline = denk_sdk.InferencePipeline(token=token, device=device)
ocr = ocr_pipeline.add_model(model_path_ocr)


# find valid texts for ocr
results = spotting.run(example_image).get_results()


# for each box run ocr
print()
for m in results.object_detection_models:
    for cl_idx, cl in enumerate(m.classes):
        color = [cl.class_color.blue, cl.class_color.green, cl.class_color.red]
        for obj, ocrtext in zip(cl.objects, results.optical_character_recognition_models):
            if obj.confidence < 0.5:
                continue

            text = f"{ocrtext.text}: {obj.confidence*100:.1f}%"
            box = ftr.bounding_box

            x1 = box.top_left.x
            y1 = box.top_left.y
            x2 = box.bottom_right.x
            y2 = box.bottom_right.y

            cv2.rectangle(image, (x1, y1), (x2, y2), color, 4)
            cv2.putText(
                example_image,
                text,
                (x1, y1 - 10),
                cv2.FONT_HERSHEY_SIMPLEX,
                1,
                color,
                4,
                cv2.LINE_AA,
            )

cv2.imwrite("output_image.jpg", example_image)