File size: 2,683 Bytes
cc1cc95
a90affe
cc1cc95
a90affe
 
cc1cc95
a90affe
 
 
cc1cc95
 
 
f2a9e61
cc1cc95
 
 
 
a90affe
cc1cc95
 
 
 
a90affe
cc1cc95
 
 
 
 
 
 
 
 
 
 
 
 
a90affe
 
cc1cc95
 
 
 
 
 
 
 
a90affe
 
 
cc1cc95
 
 
 
a90affe
cc1cc95
 
 
a90affe
cc1cc95
 
 
 
 
 
481b009
cc1cc95
 
 
 
 
 
 
 
 
 
481b009
cc1cc95
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import streamlit as st
import torch
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
from transformers.utils import logging

# Set up logging
logging.set_verbosity_info()
logger = logging.get_logger("transformers")

# Model names
original_model_name = 't5-small'
fine_tuned_model_name = 'daljeetsingh/sql_ft_t5small_kag'

# Load models and tokenizer
tokenizer = AutoTokenizer.from_pretrained(original_model_name)
original_model = AutoModelForSeq2SeqLM.from_pretrained(original_model_name, torch_dtype=torch.bfloat16)
fine_tuned_model = AutoModelForSeq2SeqLM.from_pretrained(fine_tuned_model_name, torch_dtype=torch.bfloat16)

# Move models to GPU
device = 'cuda' if torch.cuda.is_available() else 'cpu'
original_model.to(device)
fine_tuned_model.to(device)

def generate_sql_query(prompt):
    """
    Generate SQL queries using both the original and fine-tuned models.
    """
    inputs = tokenizer(prompt, return_tensors='pt').to(device)
    try:
        # Generate output from the original model
        original_output = original_model.generate(
            inputs["input_ids"], 
            max_new_tokens=200,
        )
        original_sql = tokenizer.decode(
            original_output[0], 
            skip_special_tokens=True
        )

        # Generate output from the fine-tuned model
        fine_tuned_output = fine_tuned_model.generate(
            inputs["input_ids"], 
            max_new_tokens=200,
        )
        fine_tuned_sql = tokenizer.decode(
            fine_tuned_output[0], 
            skip_special_tokens=True
        )

        return original_sql, fine_tuned_sql
    except Exception as e:
        logger.error(f"Error: {str(e)}")
        return f"Error: {str(e)}", None

# Streamlit App Interface
st.title("SQL Query Generation")
st.markdown("This application generates SQL queries based on your input prompt.")

# Input prompt
prompt = st.text_area(
    "Enter your prompt here...",
    value="Find all employees who joined after 2020.",
    height=150
)

# Generate button
if st.button("Generate"):
    if prompt:
        original_sql, fine_tuned_sql = generate_sql_query(prompt)
        st.subheader("Original Model Output")
        st.text_area("Original SQL Query", value=original_sql, height=200)
        st.subheader("Fine-Tuned Model Output")
        st.text_area("Fine-Tuned SQL Query", value=fine_tuned_sql, height=200)
    else:
        st.warning("Please enter a prompt to generate SQL queries.")

# Examples
st.sidebar.title("Examples")
st.sidebar.markdown("""
- **Example 1**: Find all employees who joined after 2020.
- **Example 2**: Retrieve the names of customers who purchased product X in the last month.
""")