Text Spotting and Reading
For text spotting and reading you need a model to detect parts of an image that contain text you want to read and a optical character recognition (ocr) model. For the latter you can either use our pretrained ocr model or create your own in the [DENK Vision AI Hub].
import denk_sdk
import cv2
model_path_spotting = "models/spotting.denk"
model_path_ocr = "models/ocr.denk"
device = denk_sdk.Device.CPU # or GPU0, ...
token = ""
example_image = cv2.imread("test_image.jpg", 1)
# spotting network
spotting_pipeline = denk_sdk.InferencePipeline(token=token, device=denk_sdk.Device.CPU)
spotting = spotting_pipeline.add_model(model_path_spotting)
spotting.set_confidence_filter(0.5)
# ocr network
ocr_pipeline = denk_sdk.InferencePipeline(token=token, device=denk_sdk.Device.CPU)
ocr = ocr_pipeline.add_model(model_path_ocr)
# find valid texts for ocr
results = spotting.run(example_image).get_results()
# for each box run ocr
print()
for m in results.object_detection_models:
for cl_idx, cl in enumerate(m.classes):
color = [cl.class_color.blue, cl.class_color.green, cl.class_color.red]
for obj, ocrtext in zip(cl.objects, results.optical_character_recognition_models):
if obj.confidence < 0.5:
continue
text = f"{ocrtext.text}: {obj.confidence*100:.1f}%"
box = ftr.bounding_box
x1 = box.top_left.x
y1 = box.top_left.y
x2 = box.bottom_right.x
y2 = box.bottom_right.y
cv2.rectangle(image, (x1, y1), (x2, y2), color, 4)
cv2.putText(
example_image,
text,
(x1, y1 - 10),
cv2.FONT_HERSHEY_SIMPLEX,
1,
color,
4,
cv2.LINE_AA,
)
cv2.imwrite("output_image.jpg", example_image)