-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtext_to_timecode.py
136 lines (117 loc) · 5.11 KB
/
text_to_timecode.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
#!/usr/bin/env python3
import cv2
import pytesseract
from PIL import Image
import numpy as np
from datetime import timedelta
import argparse
import sys
import cv_helper
import typer
from icecream import ic
import os
from pathlib import Path
app = typer.Typer()
def timestamp_to_string(timestamp_ms):
"""Convert milliseconds to HH:MM:SS format"""
td = timedelta(milliseconds=timestamp_ms)
hours = td.seconds // 3600
minutes = (td.seconds % 3600) // 60
seconds = td.seconds % 60
return f"{hours:02d}:{minutes:02d}:{seconds:02d}"
class TextDetector:
def __init__(self, frames_per_second=1, output_file=None):
self.last_text = None
self.frames_per_second = frames_per_second
self.output_file = output_file
self.output_fp = None
# Configure pytesseract path if needed
pytesseract.pytesseract.tesseract_cmd = '/opt/homebrew/bin/tesseract'
def create(self, input_video):
"""Initialize the processor with video properties"""
self.fps = input_video.get(cv2.CAP_PROP_FPS)
self.frame_count = 0
# Calculate how many frames to skip to achieve desired frames_per_second
self.frame_skip = int(self.fps / self.frames_per_second)
ic(f"Video FPS: {self.fps}, Processing {self.frames_per_second} FPS, Skipping every {self.frame_skip} frames")
# Create output directory if it doesn't exist
if self.output_file:
os.makedirs(os.path.dirname(self.output_file), exist_ok=True)
# Open output file in write mode
self.output_fp = open(self.output_file, 'w')
def destroy(self):
"""Cleanup resources"""
cv2.destroyAllWindows()
if self.output_fp:
self.output_fp.close()
def normalize_text(self, text):
"""Normalize text for comparison by removing extra spaces and lowercasing"""
return ' '.join(text.lower().split())
def frame(self, idx, frame):
"""Process a single frame"""
self.frame_count = idx
# Process frames based on desired frames_per_second
if idx % self.frame_skip == 0:
# Convert frame to grayscale
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
# Apply inverted thresholding for white text on black background
thresh = cv2.adaptiveThreshold(
gray,
255,
cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY_INV, # White text on black background
11,
2
)
# Convert OpenCV image to PIL Image for Tesseract
pil_image = Image.fromarray(thresh)
try:
# Get text and confidence data
data = pytesseract.image_to_data(pil_image, output_type=pytesseract.Output.DICT)
# Process all detected text blocks
text_blocks = []
for i, conf in enumerate(data['conf']):
try:
conf_val = float(conf)
if conf_val > 25: # Confidence threshold
text = data['text'][i].strip()
if text and sum(1 for c in text if c.isalpha()) >= 4:
text_blocks.append(text)
except ValueError:
continue # Skip invalid confidence values
# Combine text blocks
text = ' '.join(text_blocks).strip()
# Only output if text is found and different from last text
if text:
normalized_text = self.normalize_text(text)
if not self.last_text or normalized_text != self.normalize_text(self.last_text):
timestamp_ms = (idx / self.fps) * 1000
output_line = f"{timestamp_to_string(timestamp_ms)} -> {text}\n"
if self.output_fp:
self.output_fp.write(output_line)
self.output_fp.flush()
else:
print(output_line, end='')
self.last_text = text
except Exception as e:
print(f"Error processing frame {idx}: {e}", file=sys.stderr)
@app.command()
def process_video(
video_path: str = typer.Argument("input.mp4", help="Path to the video file to process"),
fps: float = typer.Option(1/3, help="Number of frames to process per second"),
output_file: str = typer.Option(
os.path.expanduser("~/tmp/timecode.txt"),
help="Path to output file (default: ~/tmp/timecode.txt)"
)
):
"""
Process a video file and extract text with timestamps.
Outputs timestamps and detected text in the format: HH:MM:SS -> text
"""
ic(f"Processing video for text: {video_path}")
ic(f"Output file: {output_file}")
input_video = cv_helper.cv2_video(video_path)
detector = TextDetector(frames_per_second=fps, output_file=output_file)
cv_helper.process_video(input_video, detector)
if __name__ == "__main__":
app()