|
import gradio as gr |
|
import json |
|
import os |
|
|
|
|
|
with open('source_connectors.json', 'r') as f: |
|
source_connectors = json.load(f) |
|
|
|
with open('destination_connectors.json', 'r') as f: |
|
destination_connectors = json.load(f) |
|
|
|
def generate_documentation_link(source, destination): |
|
return f"[{source['source_connector']} documentation]({source['docs']}) | [{destination['destination_connector']} documentation]({destination['docs']})" |
|
|
|
def generate_code(source, destination, chunking, embedding): |
|
source_connector = source_connectors[source] |
|
destination_connector = destination_connectors[destination] |
|
|
|
|
|
indented_source_configs = '\n'.join( |
|
' ' + line |
|
for line in source_connector['configs'].strip().split('\n')) |
|
indented_destination_configs = '\n'.join( |
|
' ' + line |
|
for line in destination_connector['configs'].strip().split('\n')) |
|
|
|
code = f''' |
|
import os |
|
from unstructured_ingest.v2.pipeline.pipeline import Pipeline |
|
from unstructured_ingest.v2.interfaces import ProcessorConfig |
|
from unstructured_ingest.v2.processes.partitioner import PartitionerConfig |
|
{source_connector['imports']} |
|
{destination_connector['imports']} |
|
from unstructured_ingest.v2.processes.chunker import ChunkerConfig |
|
from unstructured_ingest.v2.processes.embedder import EmbedderConfig |
|
|
|
if __name__ == "__main__": |
|
Pipeline.from_configs( |
|
context=ProcessorConfig(), |
|
{indented_source_configs} |
|
partitioner_config=PartitionerConfig( |
|
partition_by_api=True, |
|
api_key=os.getenv("UNSTRUCTURED_API_KEY"), |
|
partition_endpoint=os.getenv("UNSTRUCTURED_API_URL"), |
|
strategy="hi_res", |
|
), |
|
{'chunker_config=ChunkerConfig(chunking_strategy="by_title"),' if chunking else '# Chunking is disabled'} |
|
{'embedder_config=EmbedderConfig(embedding_provider="' + embedding + '")' if embedding else '# Embedding is disabled'} |
|
{indented_destination_configs} |
|
).run() |
|
''' |
|
doc_link = generate_documentation_link(source_connector, destination_connector) |
|
return code, doc_link |
|
|
|
demo = gr.Interface( |
|
fn=generate_code, |
|
inputs=[ |
|
gr.Dropdown(list(source_connectors.keys()), |
|
label="Get unstructured documents from:"), |
|
gr.Dropdown(list(destination_connectors.keys()), |
|
label="Upload RAG-ready documents to:"), |
|
gr.Checkbox(label="Check to enable chunking"), |
|
gr.Dropdown(["langchain-openai", "langchain-huggingface"], |
|
label="Embedding provider:") |
|
], |
|
outputs=[ |
|
gr.Code(language="python", label="Generated Code"), |
|
gr.Markdown(label="Documentation Links") |
|
], |
|
title="Unstructured-Ingest Code Generator", |
|
description="Generate code for the unstructured-ingest library based on your inputs.") |
|
|
|
|
|
demo.launch() |
|
|