Skip to main content

Text Spotting and Reading

For text spotting and reading you need a model to detect parts of an image that contain text you want to read and a optical character recognition (ocr) model. For the latter you can either use our pretrained ocr model or create your own in the [DENK Vision AI Hub].

Result

import denk_sdk
import cv2


model_path_spotting = "models/spotting.denk"
model_path_ocr = "models/ocr.denk"
device = denk_sdk.Device.CPU # or GPU0, ...
token = ""
example_image = cv2.imread("test_image.jpg", cv2.IMREAD_COLOR)


# Spotting network
spotting_pipeline = denk_sdk.InferencePipeline(token=token, device=device)
spotting_model = spotting_pipeline.add_model(model_path_spotting)
spotting_model.post.filter_by_confidence(0.5)

# OCR network
ocr_pipeline = denk_sdk.InferencePipeline(token=token, device=device)
ocr_model = ocr_pipeline.add_model(model_path_ocr)


# Find texts for OCR
od_pipeline_results = spotting_pipeline.run(example_image).get_results()


example_image_with_results = example_image.copy()

# For each bounding box run OCR
for od_model_results in od_pipeline_results.object_detection_models:
for cl_idx, cl in enumerate(od_model_results.classes):
color = [cl.class_color.blue, cl.class_color.green, cl.class_color.red]

for obj in cl.objects:
x1 = obj.bounding_box.top_left.x
y1 = obj.bounding_box.top_left.y
x2 = obj.bounding_box.bottom_right.x
y2 = obj.bounding_box.bottom_right.y

image_cutout = example_image[y1:y2,x1:x2]

ocr_pipeline_results = ocr_pipeline.run(image_cutout).get_results()

if len(ocr_pipeline_results.optical_character_recognition_models) == 0:
continue

text = ocr_pipeline_results.optical_character_recognition_models[0].text

print(f"Found text: {text}")

example_image_with_results = cv2.rectangle(
img=example_image_with_results,
pt1=(x1, y1),
pt2=(x2, y2),
color=color,
thickness=2
)

example_image_with_results = cv2.putText(
img=example_image_with_results,
text=text,
org=(x1, y1),
fontFace=cv2.FONT_HERSHEY_SIMPLEX,
fontScale=1,
color=color,
thickness=2
)

cv2.imwrite("output_image.jpg", example_image_with_results)