62 lines
1.6 KiB
Python
62 lines
1.6 KiB
Python
import cv2
|
|
import numpy as np
|
|
from ultralytics import YOLO
|
|
import json
|
|
|
|
IMAGE_PATH = 'data/test.jpg'
|
|
OUTPUT_METADATA = 'data/overlays_metadata.json'
|
|
OUTPUT_IMAGE = 'data/overlays_visualized.jpg'
|
|
OUTPUT_MASK = 'data/mask.png'
|
|
|
|
# Load image
|
|
image = cv2.imread(IMAGE_PATH)
|
|
if image is None:
|
|
raise FileNotFoundError(f"Không tìm thấy ảnh: {IMAGE_PATH}")
|
|
|
|
# get the size of the image
|
|
h, w = image.shape[:2]
|
|
|
|
# create mask
|
|
mask = np.zeros((h, w), dtype=np.uint8)
|
|
|
|
# Load YOLOv8 model
|
|
model = YOLO('model/yolo8_seg-speed-bubble.pt')
|
|
|
|
# Run detection
|
|
results = model(IMAGE_PATH)
|
|
|
|
overlays = []
|
|
|
|
for result in results:
|
|
# detect speech bubbles
|
|
boxes = result.boxes
|
|
for box in boxes:
|
|
x1, y1, x2, y2 = [int(coordinate) for coordinate in box.xyxy[0]]
|
|
conf = float(box.conf[0])
|
|
# save metadata
|
|
overlays.append({
|
|
"type": "textbox",
|
|
"position": [x1, y1],
|
|
"size": [x2 - x1, y2 - y1],
|
|
"confidence": conf
|
|
})
|
|
# draw bounding box (BGR: 255,0,0)
|
|
cv2.rectangle(image, (x1, y1), (x2, y2), (255, 0, 0), 2)
|
|
|
|
# create mask
|
|
if hasattr(result, 'masks') and result.masks is not None:
|
|
for m in result.masks.data:
|
|
m = m.cpu().numpy().astype(np.uint8) * 255
|
|
# resize mask to image size
|
|
m_resized = cv2.resize(m, (w, h), interpolation=cv2.INTER_NEAREST)
|
|
mask = cv2.bitwise_or(mask, m_resized)
|
|
|
|
|
|
# metadata
|
|
with open(OUTPUT_METADATA, "w") as f:
|
|
json.dump(overlays, f, indent=2)
|
|
|
|
# save image
|
|
cv2.imwrite(OUTPUT_IMAGE, image)
|
|
cv2.imwrite(OUTPUT_MASK, mask)
|