atifsial123 commited on
Commit
c68cde2
·
verified ·
1 Parent(s): 5e0fc5d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -82
app.py CHANGED
@@ -1,100 +1,55 @@
1
  import os
2
- import subprocess
3
-
4
- # Function to install a package if it is not already installed
5
- def install(package):
6
- subprocess.check_call([os.sys.executable, "-m", "pip", "install", package])
7
-
8
- # Ensure the necessary packages are installed
9
- install("transformers")
10
- install("torch")
11
- install("pandas")
12
- install("scikit-learn")
13
- install("gradio")
14
- import os
15
  import pandas as pd
16
  import gradio as gr
17
  from transformers import AutoModel, AutoTokenizer
18
  import torch
19
- from sklearn.model_selection import train_test_split
20
-
21
- # Function to convert a list to a DataFrame
22
- def list_to_dataframe(data_list):
23
- df = pd.DataFrame(data_list)
24
- return df
25
-
26
- # Load your dataset from a file
27
- def load_dataset(file_path=None):
28
- if file_path is None:
29
- file_path = '/content/Valid-part-2.xlsx' # Default path if the file is uploaded manually to Colab
30
-
31
- # Check if the file exists
32
- if file_path and not os.path.exists(file_path):
33
- print(f"File not found at '{file_path}', using default list data...")
34
- # Fallback to a default list if file is not found
35
- default_data = [
36
- {'text': 'Example sentence 1', 'label': 'label1'},
37
- {'text': 'Example sentence 2', 'label': 'label2'},
38
- ]
39
- return list_to_dataframe(default_data)
40
-
41
- try:
42
- df = pd.read_excel(file_path)
43
- print("Columns in the dataset:", df.columns.tolist())
44
- return df
45
- except Exception as e:
46
- print(f"Error loading dataset: {e}")
47
- return None
48
 
49
- # Preprocess the data
50
- def preprocess_data(df):
51
- # Add your preprocessing steps here
52
  return df
53
 
54
- # Train your model
55
- def train_model(df):
56
- train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)
57
-
58
- # Load your pre-trained model and tokenizer from Hugging Face
59
- tokenizer = AutoTokenizer.from_pretrained("Alibaba-NLP/gte-multilingual-base", trust_remote_code=True)
60
- model = AutoModel.from_pretrained("Alibaba-NLP/gte-multilingual-base", trust_remote_code=True)
61
 
62
- # Training code placeholder
63
- return model
 
 
 
 
 
64
 
65
- # Define the Gradio interface function
66
- def predict(input_text):
67
- tokenizer = AutoTokenizer.from_pretrained("Alibaba-NLP/gte-multilingual-base", trust_remote_code=True)
68
- model = AutoModel.from_pretrained("Alibaba-NLP/gte-multilingual-base", trust_remote_code=True)
69
-
70
- inputs = tokenizer(input_text, return_tensors="pt")
71
  with torch.no_grad():
72
  outputs = model(**inputs)
73
-
74
- return outputs.last_hidden_state
 
75
 
76
- # Build the Gradio interface
77
- def build_interface(file_path=None):
78
- df = load_dataset(file_path)
79
- if df is None:
80
- return None
 
 
 
81
 
82
- df = preprocess_data(df)
83
- model = train_model(df)
84
-
85
- iface = gr.Interface(
86
- fn=predict,
87
- inputs=gr.Textbox(lines=2, placeholder="Enter text here..."),
88
- outputs="text"
89
- )
90
- return iface
91
 
92
  # Run the Gradio interface
93
  if __name__ == "__main__":
94
- file_path = None # Change this to your specific file path if needed
95
- iface = build_interface(file_path=file_path)
96
- if iface:
97
- iface.launch()
98
- else:
99
- print("Failed to build the Gradio interface. Please check the dataset and model.")
100
 
 
1
  import os
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  import pandas as pd
3
  import gradio as gr
4
  from transformers import AutoModel, AutoTokenizer
5
  import torch
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
+ # Load the dataset containing PEC numbers and names
8
+ def load_dataset(file_path='PEC_Numbers_and_Names.xlsx'):
9
+ df = pd.read_excel(file_path)
10
  return df
11
 
12
+ # Load the model and tokenizer from Hugging Face
13
+ tokenizer = AutoTokenizer.from_pretrained("Alibaba-NLP/gte-multilingual-base", trust_remote_code=True)
14
+ model = AutoModel.from_pretrained("Alibaba-NLP/gte-multilingual-base", trust_remote_code=True)
 
 
 
 
15
 
16
+ # Define the function to get the name based on the PEC number
17
+ def get_name(pec_number, df):
18
+ result = df[df['PEC No.'] == pec_number]
19
+ if not result.empty:
20
+ return result.iloc[0]['Name']
21
+ else:
22
+ return "PEC Number not found."
23
 
24
+ # Function to process the PEC number using the Hugging Face model
25
+ def process_with_model(pec_number):
26
+ inputs = tokenizer(pec_number, return_tensors="pt")
 
 
 
27
  with torch.no_grad():
28
  outputs = model(**inputs)
29
+ # Here, we simply return the last hidden state as a string representation
30
+ # In a real application, you might want to use this in a more meaningful way
31
+ return outputs.last_hidden_state.mean(dim=1).squeeze().tolist()
32
 
33
+ # Combine both functions to create a prediction
34
+ def predict(pec_number):
35
+ name = get_name(pec_number, df)
36
+ model_output = process_with_model(pec_number)
37
+ return f"Name: {name}\nModel Output: {model_output}"
38
+
39
+ # Load the dataset
40
+ df = load_dataset()
41
 
42
+ # Build the Gradio interface
43
+ iface = gr.Interface(
44
+ fn=predict,
45
+ inputs=gr.Textbox(lines=1, placeholder="Enter PEC Number..."),
46
+ outputs="text",
47
+ title="PEC Number Lookup with Model Integration",
48
+ description="Enter a PEC number to retrieve the corresponding name and process it with a Hugging Face model."
49
+ )
 
50
 
51
  # Run the Gradio interface
52
  if __name__ == "__main__":
53
+ iface.launch()
54
+
 
 
 
 
55