-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathpredict_stream.py
31 lines (24 loc) · 1.02 KB
/
predict_stream.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
from tensorflow.keras.models import load_model
import json
from tensorflow.keras.preprocessing.text import tokenizer_from_json
import pandas as pd
import app.algorithms.process_predictions as pp
MAXWORDS = 3000
model = load_model("./data/models/nb_stream_fasttext_10k.h5")
with open("./data/tokenizer_stream_10k.json") as f:
json_obj = json.load(f)
tokenizer = tokenizer_from_json(json_obj)
vid = "cnpUNEWP1i8"
channel = "MentourPilot" #For file naming only
transcript, full_text, captionCount = pp.processVideo(vid)
predictions = pp.getPredictions(model,tokenizer,full_text)
df = pd.DataFrame(predictions)
words = full_text.split(" ")
df["text"] = words + ["N/A"]*(len(predictions)-len(words))
df.to_csv(f"./examples/{channel}_{vid}.csv", index = False)
sponsorTimestamps = pp.getTimestamps(transcript, captionCount, predictions, words)
print(sponsorTimestamps)
with open(f"./examples/{channel}_{vid}.txt", 'w') as file:
file.write("Timestamps:\n")
for ts in sponsorTimestamps:
file.write('%s\n' % str(ts))