chore: renamed intro script, added reverse text-to-speech model

ptmrio · Jul 1, 2024 · 52f2ce4 · 52f2ce4
1 parent cccb3f3
commit 52f2ce4
Show file tree

Hide file tree

Showing 7 changed files with 93 additions and 2 deletions.
diff --git a/dist/text-to-speech.exe b/dist/text-to-speech.exe
diff --git a/dist/video-subtitler.exe b/dist/video-subtitler.exe
diff --git a/dist/whisper-transcribe.exe b/dist/whisper-transcribe.exe
diff --git a/text-to-speech.py b/text-to-speech.py
@@ -0,0 +1,53 @@
+import os
+import argparse
+from openai import OpenAI
+from dotenv import load_dotenv
+
+def generate_speech(client, text, model="tts-1", voice="alloy", response_format="mp3", speed=1.0):
+    response = client.audio.speech.create(
+        model=model,
+        input=text,
+        voice=voice,
+        response_format=response_format,
+        speed=speed
+    )
+    return response.content
+
+def save_audio_file(audio_content, output_path):
+    with open(output_path, "wb") as audio_file:
+        audio_file.write(audio_content)
+
+def main():
+    parser = argparse.ArgumentParser(description="Generate speech using OpenAI's TTS model")
+    parser.add_argument('--text', type=str, required=True, help="Text to generate audio for")
+    parser.add_argument('--model', type=str, default="tts-1", help="Model to use for speech generation")
+    parser.add_argument('--voice', type=str, default="alloy", help="Voice to use for speech generation")
+    parser.add_argument('--response_format', type=str, default="mp3", help="Format of the generated audio")
+    parser.add_argument('--speed', type=float, default=1.0, help="Speed of the generated audio")
+    parser.add_argument('--output', type=str, required=True, help="Path to save the generated audio file")
+
+    args = parser.parse_args()
+
+    # Load environment variables from .env file
+    load_dotenv()
+
+    # Initialize OpenAI client
+    client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))
+
+    # Generate speech
+    audio_content = generate_speech(
+        client=client,
+        text=args.text,
+        model=args.model,
+        voice=args.voice,
+        response_format=args.response_format,
+        speed=args.speed
+    )
+
+    # Save the audio file
+    save_audio_file(audio_content, args.output)
+
+    print(f"Audio saved to {args.output}")
+
+if __name__ == "__main__":
+    main()
diff --git a/whisper-transcribe.spec → text-to-speech.spec b/whisper-transcribe.spec → text-to-speech.spec
@@ -2,7 +2,7 @@
 
 
 a = Analysis(
-    ['whisper-transcribe.py'],
+    ['text-to-speech.py'],
     pathex=[],
     binaries=[],
     datas=[],
@@ -22,7 +22,7 @@ exe = EXE(
     a.binaries,
     a.datas,
     [],
-    name='whisper-transcribe',
+    name='text-to-speech',
     debug=False,
     bootloader_ignore_signals=False,
     strip=False,

diff --git a/whisper-transcribe.py → video-subtitler.py b/whisper-transcribe.py → video-subtitler.py
diff --git a/video-subtitler.spec b/video-subtitler.spec
@@ -0,0 +1,38 @@
+# -*- mode: python ; coding: utf-8 -*-
+
+
+a = Analysis(
+    ['video-subtitler.py'],
+    pathex=[],
+    binaries=[],
+    datas=[],
+    hiddenimports=[],
+    hookspath=[],
+    hooksconfig={},
+    runtime_hooks=[],
+    excludes=[],
+    noarchive=False,
+    optimize=0,
+)
+pyz = PYZ(a.pure)
+
+exe = EXE(
+    pyz,
+    a.scripts,
+    a.binaries,
+    a.datas,
+    [],
+    name='video-subtitler',
+    debug=False,
+    bootloader_ignore_signals=False,
+    strip=False,
+    upx=True,
+    upx_exclude=[],
+    runtime_tmpdir=None,
+    console=True,
+    disable_windowed_traceback=False,
+    argv_emulation=False,
+    target_arch=None,
+    codesign_identity=None,
+    entitlements_file=None,
+)