MariaK's picture
Create app.py
641bde8 verified
raw
history blame
2.97 kB
import gradio as gr
import json
import os
# Load source_connectors and destination_connectors from JSON files
with open('source_connectors.json', 'r') as f:
source_connectors = json.load(f)
with open('destination_connectors.json', 'r') as f:
destination_connectors = json.load(f)
def generate_documentation_link(source, destination):
return f"[{source['source_connector']} documentation]({source['docs']}) | [{destination['destination_connector']} documentation]({destination['docs']})"
def generate_code(source, destination, chunking, embedding):
source_connector = source_connectors[source]
destination_connector = destination_connectors[destination]
# Ensure proper indentation for source and destination configs
indented_source_configs = '\n'.join(
' ' + line
for line in source_connector['configs'].strip().split('\n'))
indented_destination_configs = '\n'.join(
' ' + line
for line in destination_connector['configs'].strip().split('\n'))
code = f'''
import os
from unstructured_ingest.v2.pipeline.pipeline import Pipeline
from unstructured_ingest.v2.interfaces import ProcessorConfig
from unstructured_ingest.v2.processes.partitioner import PartitionerConfig
{source_connector['imports']}
{destination_connector['imports']}
from unstructured_ingest.v2.processes.chunker import ChunkerConfig
from unstructured_ingest.v2.processes.embedder import EmbedderConfig
if __name__ == "__main__":
Pipeline.from_configs(
context=ProcessorConfig(),
{indented_source_configs}
partitioner_config=PartitionerConfig(
partition_by_api=True,
api_key=os.getenv("UNSTRUCTURED_API_KEY"),
partition_endpoint=os.getenv("UNSTRUCTURED_API_URL"),
strategy="hi_res",
),
{'chunker_config=ChunkerConfig(chunking_strategy="by_title"),' if chunking else '# Chunking is disabled'}
{'embedder_config=EmbedderConfig(embedding_provider="' + embedding + '")' if embedding else '# Embedding is disabled'}
{indented_destination_configs}
).run()
'''
doc_link = generate_documentation_link(source_connector, destination_connector)
return code, doc_link
demo = gr.Interface(
fn=generate_code,
inputs=[
gr.Dropdown(list(source_connectors.keys()),
label="Get unstructured documents from:"),
gr.Dropdown(list(destination_connectors.keys()),
label="Upload RAG-ready documents to:"),
gr.Checkbox(label="Check to enable chunking"),
gr.Dropdown(["langchain-openai", "langchain-huggingface"],
label="Embedding provider:")
],
outputs=[
gr.Code(language="python", label="Generated Code"),
gr.Markdown(label="Documentation Links")
],
title="Unstructured-Ingest Code Generator",
description="Generate code for the unstructured-ingest library based on your inputs.")
demo.launch()