ad4r5hgs commited on
Commit
36c0282
·
verified ·
1 Parent(s): 51a59d6

Upload 2 files

Browse files
Files changed (2) hide show
  1. main.py +88 -0
  2. requirements.txt +0 -0
main.py ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from clarifai.client.model import Model
3
+ import base64
4
+ from dotenv import load_dotenv
5
+ from PIL import Image
6
+ from io import BytesIO
7
+
8
+ load_dotenv()
9
+ import os
10
+
11
+ clarifai_pat = os.getenv("CLARIFAI_PAT")
12
+ cohere_api_key = os.getenv("COHERE")
13
+
14
+ def generate_image(user_description, api_key):
15
+ prompt = f"You are a professional comic artist. Based on the below user's description and content, create a proper story comic: {user_description}"
16
+ inference_params = dict(quality="standard", size="1024x1024")
17
+ model_prediction = Model(
18
+ f"https://clarifai.com/openai/dall-e/models/dall-e-3?api_key={api_key}"
19
+ ).predict_by_bytes(
20
+ prompt.encode(), input_type="text", inference_params=inference_params
21
+ )
22
+ output_base64 = model_prediction.outputs[0].data.image.base64
23
+ with open("generated_image.png", "wb") as f:
24
+ f.write(output_base64)
25
+ return "generated_image.png"
26
+
27
+ def understand_image(base64_image, api_key):
28
+ prompt = "Analyze the content of this image and write a creative, engaging story that brings the scene to life. Describe the characters, setting, and actions in a way that would captivate a young audience:"
29
+ inference_params = dict(temperature=0.2, image_base64=base64_image, api_key=api_key)
30
+ model_prediction = Model(
31
+ "https://clarifai.com/openai/chat-completion/models/gpt-4-vision"
32
+ ).predict_by_bytes(
33
+ prompt.encode(), input_type="text", inference_params=inference_params
34
+ )
35
+ return model_prediction.outputs[0].data.text.raw
36
+
37
+ def encode_image(image_path):
38
+ with open(image_path, "rb") as image_file:
39
+ return base64.b64encode(image_file.read()).decode("utf-8")
40
+
41
+ def text_to_speech(input_text, api_key):
42
+ inference_params = dict(voice="alloy", speed=1.0, api_key=api_key)
43
+ model_prediction = Model(
44
+ "https://clarifai.com/openai/tts/models/openai-tts-1"
45
+ ).predict_by_bytes(
46
+ input_text.encode(), input_type="text", inference_params=inference_params
47
+ )
48
+ audio_base64 = model_prediction.outputs[0].data.audio.base64
49
+ return audio_base64
50
+
51
+ def main():
52
+ st.set_page_config(page_title="Interactive Media Creator", layout="wide")
53
+ st.title("Interactive Media Creator")
54
+
55
+ with st.sidebar:
56
+ st.header("Controls")
57
+ image_description = st.text_area("Description for Image Generation", height=100)
58
+ generate_image_btn = st.button("Generate Image")
59
+
60
+ col1, col2 = st.columns(2)
61
+
62
+ with col1:
63
+ st.header("Comic Art")
64
+ if generate_image_btn and image_description:
65
+ with st.spinner("Generating image..."):
66
+ image_path = generate_image(image_description, clarifai_pat)
67
+ if image_path:
68
+ st.image(
69
+ image_path,
70
+ caption="Generated Comic Image",
71
+ use_column_width=True,
72
+ )
73
+ st.success("Image generated!")
74
+ else:
75
+ st.error("Failed to generate image.")
76
+
77
+ with col2:
78
+ st.header("Story")
79
+ if generate_image_btn and image_description:
80
+ with st.spinner("Creating a story..."):
81
+ base64_image = encode_image(image_path)
82
+ understood_text = understand_image(base64_image, cohere_api_key)
83
+ audio_base64 = text_to_speech(understood_text, cohere_api_key)
84
+ st.audio(audio_base64, format="audio/mp3")
85
+ st.success("Audio generated from image understanding!")
86
+
87
+ if __name__ == "__main__":
88
+ main()
requirements.txt ADDED
Binary file (2.12 kB). View file