from dotenv import load_dotenv
from openai import OpenAI
import base64
import cv2
import os

load_dotenv()

# Rufe den API-Key sicher ab
api_key = os.getenv("OPENAI_API_KEY")
if not api_key:
    raise ValueError("API Key ist nicht gesetzt. Pruefe deine .env Datei oder die Umgebungsvariablen.")

client = OpenAI(api_key=api_key)

# Open the video file or read from URL
video = cv2.VideoCapture("uploads/KI-in-der-Lehre-3.mp4")

# Extract the frames from the video
base64Frames = []
i=0
while video.isOpened():
    success, frame = video.read()
    i+=1
    if not success or i>5:
        break
    _, buffer = cv2.imencode(".jpg", frame)
    base64Frames.append(base64.b64encode(buffer).decode("utf-8"))

video.release()

# Create the object to send to OpenAI. We will send the first 10 frames as a list of base64 strings
images = [{"image": frame, "resize":768} for frame in base64Frames[2::3]]

# Call the openai chat endpoint, with the GPT-4 vision model
response = client.chat.completions.create(
    model="gpt-4-vision-preview",
    max_tokens= 200,
    messages=[{"role": "user", "content": ["Dies sind die Frames aus einem Video. Gib mir die Texte aus den Frames wieder.", *images]}
    ])


print(response.choices[0].message.content)