Spaces:

aakash0563
/

Gemini-vision

Running

App Files Files Community

aakash0563 commited on Feb 18, 2024

Commit

8b4a847

verified ·

1 Parent(s): a5372c2

Update app.py

Browse files

Files changed (1) hide show

app.py +74 -74

app.py CHANGED Viewed

@@ -1,97 +1,97 @@
-# import google.generativeai as genai
-# from PIL import Image
-# import gradio as gr
-# import numpy as np
-# import os
-# GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
-# # Now you can use hugging_face_api_key in your code
-# genai.configure(api_key=GOOGLE_API_KEY)
-# model = genai.GenerativeModel('gemini-pro-vision')
-# def process_image_and_text(image, text):
-#   # Assuming image is the input from Gradio
-#   if text:
-#     image_array = np.asarray(image.data)  # Convert memoryview to NumPy array
-#     image = Image.fromarray(image_array.astype('uint8'), 'RGB')  # Now you can use astype
-#     response = model.generate_content([text, image])
-#     return response.text
-#   else:
-#     image_array = np.asarray(image.data)  # Convert memoryview to NumPy array
-#     image = Image.fromarray(image_array.astype('uint8'), 'RGB')  # Now you can use astype
-#     response = model.generate_content(["Tell me about this image in bulletin format", image])
-#     return response.text
-# iface = gr.Interface(
-#     process_image_and_text,
-#     inputs=["image", "textbox"],  # Specify image and text inputs
-#     outputs="textbox",          # Specify text output
-#     title="Image and Text Processor",  # Set the app title
-# )
-# iface.launch(debug=True, share=True)  # Launch the Gradio app
-import google.generativeai as genai
-import os
-import os
-from pdf2image import convert_from_path
-from PIL import Image
-import pdf2image
-import numpy as np
-GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
-# Now you can use hugging_face_api_key in your code
-genai.configure(api_key=GOOGLE_API_KEY)
-import gradio as gr
-# print(llm.predict("Who is the PM of India?"))
-model = genai.GenerativeModel('gemini-pro-vision')
-def process_image_and_text(images):
-    response = {}
-    for i,image in enumerate(images):
-        # # Assuming image is the input from Gradio
-        # image_array = np.asarray(image.data)  # Convert memoryview to NumPy array
-        # image = Image.fromarray(image_array.astype('uint8'), 'RGB')  # Now you can use astype
-        response = model.generate_content(["You are act as a tutor Solve all the question in the image in step by step: ", image])
-        response[i] = response.text
-    return response
-def input_pdf_setup(uploaded_pdf):
-    # Convert PDF pages to images
-    images = convert_from_path(uploaded_pdf, dpi=200)
-    return images
-def extract_answer(uploaded_pdf):
-    """Retrieves answers from processed images and presents them clearly."""
-    images = input_pdf_setup(uploaded_pdf)
-    responses = process_image_and_text(images=images)
-    # Present results in a user-friendly format
-    answers = []
-    for i, response in enumerate(responses.values()):
-        answers.append(f"Answer for question {i+1}:\n {response}")
-    return "\n".join(answers)
-# Create Gradio interface
-iface = gr.Interface(
-    fn=extract_answer,
-    inputs="file",
-    outputs="text",
-    title="Question-Answering with Gemstone.ai",
-    description="Upload a PDF containing questions, and get step-by-step answers!",
-    allow_flagging=True,
-)
-# Launch the Gradio application
-iface.launch(share=True, debug=True)

+import google.generativeai as genai
+from PIL import Image
+import gradio as gr
+import numpy as np
+import os
+GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
+# Now you can use hugging_face_api_key in your code
+genai.configure(api_key=GOOGLE_API_KEY)
+model = genai.GenerativeModel('gemini-pro-vision')
+def process_image_and_text(image, text):
+  # Assuming image is the input from Gradio
+  if text:
+    image_array = np.asarray(image.data)  # Convert memoryview to NumPy array
+    image = Image.fromarray(image_array.astype('uint8'), 'RGB')  # Now you can use astype
+    response = model.generate_content([text, image])
+    return response.text
+  else:
+    image_array = np.asarray(image.data)  # Convert memoryview to NumPy array
+    image = Image.fromarray(image_array.astype('uint8'), 'RGB')  # Now you can use astype
+    response = model.generate_content(["Tell me about this image in bulletin format", image])
+    return response.text
+iface = gr.Interface(
+    process_image_and_text,
+    inputs=["image", "textbox"],  # Specify image and text inputs
+    outputs="textbox",          # Specify text output
+    title="Image and Text Processor",  # Set the app title
+)
+iface.launch(debug=True, share=True)  # Launch the Gradio app
+# import google.generativeai as genai
+# import os
+# import os
+# from pdf2image import convert_from_path
+# from PIL import Image
+# import pdf2image
+# import numpy as np
+# GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
+# # Now you can use hugging_face_api_key in your code
+# genai.configure(api_key=GOOGLE_API_KEY)
+# import gradio as gr
+# # print(llm.predict("Who is the PM of India?"))
+# model = genai.GenerativeModel('gemini-pro-vision')
+# def process_image_and_text(images):
+#     response = {}
+#     for i,image in enumerate(images):
+#         # # Assuming image is the input from Gradio
+#         # image_array = np.asarray(image.data)  # Convert memoryview to NumPy array
+#         # image = Image.fromarray(image_array.astype('uint8'), 'RGB')  # Now you can use astype
+#         response = model.generate_content(["You are act as a tutor Solve all the question in the image in step by step: ", image])
+#         response[i] = response.text
+#     return response
+# def input_pdf_setup(uploaded_pdf):
+#     # Convert PDF pages to images
+#     images = convert_from_path(uploaded_pdf, dpi=200)
+#     return images
+# def extract_answer(uploaded_pdf):
+#     """Retrieves answers from processed images and presents them clearly."""
+#     images = input_pdf_setup(uploaded_pdf)
+#     responses = process_image_and_text(images=images)
+#     # Present results in a user-friendly format
+#     answers = []
+#     for i, response in enumerate(responses.values()):
+#         answers.append(f"Answer for question {i+1}:\n {response}")
+#     return "\n".join(answers)
+# # Create Gradio interface
+# iface = gr.Interface(
+#     fn=extract_answer,
+#     inputs="file",
+#     outputs="text",
+#     title="Question-Answering with Gemstone.ai",
+#     description="Upload a PDF containing questions, and get step-by-step answers!",
+#     allow_flagging=True,
+# )
+# # Launch the Gradio application
+# iface.launch(share=True, debug=True)