Spaces:

atifsial123
/

Engineer

Sleeping

App Files Files Community

atifsial123 commited on Sep 3, 2024

Commit

c68cde2

verified ·

1 Parent(s): 5e0fc5d

Update app.py

Browse files

Files changed (1) hide show

app.py +37 -82

app.py CHANGED Viewed

@@ -1,100 +1,55 @@
 import os
-import subprocess
-# Function to install a package if it is not already installed
-def install(package):
-    subprocess.check_call([os.sys.executable, "-m", "pip", "install", package])
-# Ensure the necessary packages are installed
-install("transformers")
-install("torch")
-install("pandas")
-install("scikit-learn")
-install("gradio")
-import os
 import pandas as pd
 import gradio as gr
 from transformers import AutoModel, AutoTokenizer
 import torch
-from sklearn.model_selection import train_test_split
-# Function to convert a list to a DataFrame
-def list_to_dataframe(data_list):
-    df = pd.DataFrame(data_list)
-    return df
-# Load your dataset from a file
-def load_dataset(file_path=None):
-    if file_path is None:
-        file_path = '/content/Valid-part-2.xlsx'  # Default path if the file is uploaded manually to Colab
-    # Check if the file exists
-    if file_path and not os.path.exists(file_path):
-        print(f"File not found at '{file_path}', using default list data...")
-        # Fallback to a default list if file is not found
-        default_data = [
-            {'text': 'Example sentence 1', 'label': 'label1'},
-            {'text': 'Example sentence 2', 'label': 'label2'},
-        ]
-        return list_to_dataframe(default_data)
-    try:
-        df = pd.read_excel(file_path)
-        print("Columns in the dataset:", df.columns.tolist())
-        return df
-    except Exception as e:
-        print(f"Error loading dataset: {e}")
-        return None
-# Preprocess the data
-def preprocess_data(df):
-    # Add your preprocessing steps here
     return df
-# Train your model
-def train_model(df):
-    train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)
-    # Load your pre-trained model and tokenizer from Hugging Face
-    tokenizer = AutoTokenizer.from_pretrained("Alibaba-NLP/gte-multilingual-base", trust_remote_code=True)
-    model = AutoModel.from_pretrained("Alibaba-NLP/gte-multilingual-base", trust_remote_code=True)
-    # Training code placeholder
-    return model
-# Define the Gradio interface function
-def predict(input_text):
-    tokenizer = AutoTokenizer.from_pretrained("Alibaba-NLP/gte-multilingual-base", trust_remote_code=True)
-    model = AutoModel.from_pretrained("Alibaba-NLP/gte-multilingual-base", trust_remote_code=True)
-    inputs = tokenizer(input_text, return_tensors="pt")
     with torch.no_grad():
         outputs = model(**inputs)
-    return outputs.last_hidden_state
-# Build the Gradio interface
-def build_interface(file_path=None):
-    df = load_dataset(file_path)
-    if df is None:
-        return None
-    df = preprocess_data(df)
-    model = train_model(df)
-    iface = gr.Interface(
-        fn=predict,
-        inputs=gr.Textbox(lines=2, placeholder="Enter text here..."),
-        outputs="text"
-    )
-    return iface
 # Run the Gradio interface
 if __name__ == "__main__":
-    file_path = None  # Change this to your specific file path if needed
-    iface = build_interface(file_path=file_path)
-    if iface:
-        iface.launch()
-    else:
-        print("Failed to build the Gradio interface. Please check the dataset and model.")

 import os
 import pandas as pd
 import gradio as gr
 from transformers import AutoModel, AutoTokenizer
 import torch
+# Load the dataset containing PEC numbers and names
+def load_dataset(file_path='PEC_Numbers_and_Names.xlsx'):
+    df = pd.read_excel(file_path)
     return df
+# Load the model and tokenizer from Hugging Face
+tokenizer = AutoTokenizer.from_pretrained("Alibaba-NLP/gte-multilingual-base", trust_remote_code=True)
+model = AutoModel.from_pretrained("Alibaba-NLP/gte-multilingual-base", trust_remote_code=True)
+# Define the function to get the name based on the PEC number
+def get_name(pec_number, df):
+    result = df[df['PEC No.'] == pec_number]
+    if not result.empty:
+        return result.iloc[0]['Name']
+    else:
+        return "PEC Number not found."
+# Function to process the PEC number using the Hugging Face model
+def process_with_model(pec_number):
+    inputs = tokenizer(pec_number, return_tensors="pt")
     with torch.no_grad():
         outputs = model(**inputs)
+    # Here, we simply return the last hidden state as a string representation
+    # In a real application, you might want to use this in a more meaningful way
+    return outputs.last_hidden_state.mean(dim=1).squeeze().tolist()
+# Combine both functions to create a prediction
+def predict(pec_number):
+    name = get_name(pec_number, df)
+    model_output = process_with_model(pec_number)
+    return f"Name: {name}\nModel Output: {model_output}"
+# Load the dataset
+df = load_dataset()
+# Build the Gradio interface
+iface = gr.Interface(
+    fn=predict,
+    inputs=gr.Textbox(lines=1, placeholder="Enter PEC Number..."),
+    outputs="text",
+    title="PEC Number Lookup with Model Integration",
+    description="Enter a PEC number to retrieve the corresponding name and process it with a Hugging Face model."
+)
 # Run the Gradio interface
 if __name__ == "__main__":
+    iface.launch()