Spaces:

koey811
/

assignment1

Running

App Files Files Community

assignment1 / app.py

koey811

Update app.py

4d55f3a verified about 17 hours ago

raw

history blame

3.13 kB

	import streamlit as st
	from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
	from gtts import gTTS
	import io
	from PIL import Image

	# Install PyTorch
	try:
	import torch
	except ImportError:
	st.warning("PyTorch is not installed. Installing PyTorch...")
	import subprocess
	subprocess.run(["pip", "install", "torch"])
	st.success("PyTorch has been successfully installed!")
	import torch

	# Install Optimum library
	try:
	import optimum
	except ImportError:
	st.warning("Optimum library is not installed. Installing optimum...")
	import subprocess
	subprocess.run(["pip", "install", "optimum"])
	st.success("Optimum library has been successfully installed!")
	import optimum

	# Load the image captioning model
	caption_model = pipeline("image-to-text", model="unography/blip-large-long-cap")

	story_generator = pipeline("text-generation", model="TheBloke/storytime-13B-GPTQ")

	def generate_caption(image):
	# Generate the caption for the uploaded image
	caption = caption_model(image)[0]["generated_text"]
	return caption

	def generate_story(caption):
	# Generate the story based on the caption using the GPT-2 model
	prompt = f"Write a short, simple children's story inspired by the image of {caption}. Here's the story:\n\n"
	story = story_generator(prompt, max_length=500, num_return_sequences=1)[0]["generated_text"]

	# Extract the story text from the generated output
	story = story.strip()

	# Post-process the story (example: remove inappropriate words)
	inappropriate_words = ["violence", "horror", "scary", "adult", "death", "gun", "shoot"]
	for word in inappropriate_words:
	story = story.replace(word, "")

	# Limit the story to approximately 100 words
	words = story.split()
	if len(words) > 100:
	story = " ".join(words[:100]) + "..."

	return story

	def convert_to_audio(story):
	# Convert the story to audio using gTTS
	tts = gTTS(text=story, lang="en")
	audio_bytes = io.BytesIO()
	tts.write_to_fp(audio_bytes)
	audio_bytes.seek(0)
	return audio_bytes

	def main():
	st.title("Storytelling Application")

	# File uploader for the image (restricted to JPG)
	uploaded_image = st.file_uploader("Upload an image", type=["jpg"])

	if uploaded_image is not None:
	# Convert the uploaded image to PIL image
	image = Image.open(uploaded_image)

	# Display the uploaded image
	st.image(image, caption="Uploaded Image", use_container_width=True)

	# Generate the caption for the image
	caption = generate_caption(image)
	st.subheader("Generated Caption:")
	st.write(caption)

	# Generate the story based on the caption using the GPT-2 model
	story = generate_story(caption)
	st.subheader("Generated Story:")
	st.write(story)

	# Convert the story to audio
	audio_bytes = convert_to_audio(story)

	# Display the audio player
	st.audio(audio_bytes, format="audio/mp3")

	if __name__ == "__main__":
	main()