Spaces:

ad4r5hgs
/

image-audio-gen

Sleeping

App Files Files Community

ad4r5hgs commited on Jun 25, 2024

Commit

36c0282

verified ·

1 Parent(s): 51a59d6

Upload 2 files

Browse files

Files changed (2) hide show

main.py +88 -0
requirements.txt +0 -0

main.py ADDED Viewed

	@@ -0,0 +1,88 @@

+import streamlit as st
+from clarifai.client.model import Model
+import base64
+from dotenv import load_dotenv
+from PIL import Image
+from io import BytesIO
+load_dotenv()
+import os
+clarifai_pat = os.getenv("CLARIFAI_PAT")
+cohere_api_key = os.getenv("COHERE")
+def generate_image(user_description, api_key):
+    prompt = f"You are a professional comic artist. Based on the below user's description and content, create a proper story comic: {user_description}"
+    inference_params = dict(quality="standard", size="1024x1024")
+    model_prediction = Model(
+        f"https://clarifai.com/openai/dall-e/models/dall-e-3?api_key={api_key}"
+    ).predict_by_bytes(
+        prompt.encode(), input_type="text", inference_params=inference_params
+    )
+    output_base64 = model_prediction.outputs[0].data.image.base64
+    with open("generated_image.png", "wb") as f:
+        f.write(output_base64)
+    return "generated_image.png"
+def understand_image(base64_image, api_key):
+    prompt = "Analyze the content of this image and write a creative, engaging story that brings the scene to life. Describe the characters, setting, and actions in a way that would captivate a young audience:"
+    inference_params = dict(temperature=0.2, image_base64=base64_image, api_key=api_key)
+    model_prediction = Model(
+        "https://clarifai.com/openai/chat-completion/models/gpt-4-vision"
+    ).predict_by_bytes(
+        prompt.encode(), input_type="text", inference_params=inference_params
+    )
+    return model_prediction.outputs[0].data.text.raw
+def encode_image(image_path):
+    with open(image_path, "rb") as image_file:
+        return base64.b64encode(image_file.read()).decode("utf-8")
+def text_to_speech(input_text, api_key):
+    inference_params = dict(voice="alloy", speed=1.0, api_key=api_key)
+    model_prediction = Model(
+        "https://clarifai.com/openai/tts/models/openai-tts-1"
+    ).predict_by_bytes(
+        input_text.encode(), input_type="text", inference_params=inference_params
+    )
+    audio_base64 = model_prediction.outputs[0].data.audio.base64
+    return audio_base64
+def main():
+    st.set_page_config(page_title="Interactive Media Creator", layout="wide")
+    st.title("Interactive Media Creator")
+    with st.sidebar:
+        st.header("Controls")
+        image_description = st.text_area("Description for Image Generation", height=100)
+        generate_image_btn = st.button("Generate Image")
+    col1, col2 = st.columns(2)
+    with col1:
+        st.header("Comic Art")
+        if generate_image_btn and image_description:
+            with st.spinner("Generating image..."):
+                image_path = generate_image(image_description, clarifai_pat)
+                if image_path:
+                    st.image(
+                        image_path,
+                        caption="Generated Comic Image",
+                        use_column_width=True,
+                    )
+                    st.success("Image generated!")
+                else:
+                    st.error("Failed to generate image.")
+    with col2:
+        st.header("Story")
+        if generate_image_btn and image_description:
+            with st.spinner("Creating a story..."):
+                base64_image = encode_image(image_path)
+                understood_text = understand_image(base64_image, cohere_api_key)
+                audio_base64 = text_to_speech(understood_text, cohere_api_key)
+                st.audio(audio_base64, format="audio/mp3")
+                st.success("Audio generated from image understanding!")
+if __name__ == "__main__":
+    main()

requirements.txt ADDED Viewed

Binary file (2.12 kB). View file