atifsial123 commited on
Commit
a89484e
·
verified ·
1 Parent(s): 1586e22

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -32
app.py CHANGED
@@ -16,67 +16,62 @@ import gradio as gr
16
  from transformers import AutoModel, AutoTokenizer
17
  import torch
18
 
 
 
 
 
19
  # Load the dataset containing PEC numbers and names
20
- def load_dataset(file_path='PEC_Numbers_and_Names.xlsx'):
21
- if not os.path.exists(file_path):
 
 
22
  raise FileNotFoundError(f"File not found: {file_path}")
23
- df = pd.read_excel(file_path)
24
  return df
25
 
26
- # Debugging function to get PEC number based on the name
27
- def get_pec_number(name, df):
28
- print("Column names in DataFrame:", df.columns.tolist()) # Print the column names
29
- print(f"Looking for Name: '{name}'")
30
 
31
- # Normalize the input and dataset
32
- df['Name'] = df['Name'].str.strip().str.lower()
33
- name = name.strip().str.lower()
34
-
35
- result = df[df['Name'] == name]
36
 
37
  if not result.empty:
38
- print(f"Found PEC Number: {result.iloc[0]['PEC No.']}")
39
- return result.iloc[0]['PEC No.']
40
  else:
41
- print("Name not found.")
42
- return "Name not found."
43
 
44
- # Function to process the name using the Hugging Face model
45
- def process_with_model(name):
46
- inputs = tokenizer(name, return_tensors="pt")
47
  with torch.no_grad():
48
  outputs = model(**inputs)
49
  return outputs.last_hidden_state.mean(dim=1).squeeze().tolist()
50
 
51
  # Combine both functions to create a prediction
52
- def predict(name, file):
53
  try:
54
  # Load the dataset from the uploaded file if provided
55
  if file is not None:
56
  df = pd.read_excel(file.name)
57
  else:
58
- df = load_dataset()
59
 
60
- pec_number = get_pec_number(name, df)
61
- model_output = process_with_model(name)
62
- return f"PEC Number: {pec_number}\nModel Output: {model_output}"
63
  except FileNotFoundError as e:
64
  return str(e)
65
 
66
- # Load the model and tokenizer from Hugging Face
67
- tokenizer = AutoTokenizer.from_pretrained("Alibaba-NLP/gte-multilingual-base", trust_remote_code=True)
68
- model = AutoModel.from_pretrained("Alibaba-NLP/gte-multilingual-base", trust_remote_code=True)
69
-
70
  # Build the Gradio interface with file upload option
71
  iface = gr.Interface(
72
  fn=predict,
73
  inputs=[
74
- gr.Textbox(lines=1, placeholder="Enter Name..."),
75
- gr.File(label="Upload PEC Numbers and Names file (optional)")
76
  ],
77
  outputs="text",
78
- title="Name to PEC Number Lookup with Model Integration",
79
- description="Enter a name to retrieve the corresponding PEC number and process it with a Hugging Face model. Optionally, upload the Excel file if not found."
80
  )
81
 
82
  # Run the Gradio interface
 
16
  from transformers import AutoModel, AutoTokenizer
17
  import torch
18
 
19
+ # Load the model and tokenizer from Hugging Face
20
+ tokenizer = AutoTokenizer.from_pretrained("Alibaba-NLP/gte-multilingual-base", trust_remote_code=True)
21
+ model = AutoModel.from_pretrained("Alibaba-NLP/gte-multilingual-base", trust_remote_code=True)
22
+
23
  # Load the dataset containing PEC numbers and names
24
+ def load_dataset(file_path=None):
25
+ if file_path and os.path.exists(file_path):
26
+ df = pd.read_excel(file_path)
27
+ else:
28
  raise FileNotFoundError(f"File not found: {file_path}")
 
29
  return df
30
 
31
+ # Function to get the name based on the PEC number
32
+ def get_name(pec_number, df):
33
+ df['PEC No.'] = df['PEC No.'].str.strip().str.upper()
34
+ pec_number = pec_number.strip().upper()
35
 
36
+ result = df[df['PEC No.'] == pec_number]
 
 
 
 
37
 
38
  if not result.empty:
39
+ return result.iloc[0]['Name']
 
40
  else:
41
+ return "PEC Number not found."
 
42
 
43
+ # Function to process the PEC number using the Hugging Face model
44
+ def process_with_model(pec_number):
45
+ inputs = tokenizer(pec_number, return_tensors="pt")
46
  with torch.no_grad():
47
  outputs = model(**inputs)
48
  return outputs.last_hidden_state.mean(dim=1).squeeze().tolist()
49
 
50
  # Combine both functions to create a prediction
51
+ def predict(pec_number, file):
52
  try:
53
  # Load the dataset from the uploaded file if provided
54
  if file is not None:
55
  df = pd.read_excel(file.name)
56
  else:
57
+ return "Please upload the PEC Numbers and Names file."
58
 
59
+ name = get_name(pec_number, df)
60
+ model_output = process_with_model(pec_number)
61
+ return f"Name: {name}\nModel Output: {model_output}"
62
  except FileNotFoundError as e:
63
  return str(e)
64
 
 
 
 
 
65
  # Build the Gradio interface with file upload option
66
  iface = gr.Interface(
67
  fn=predict,
68
  inputs=[
69
+ gr.Textbox(lines=1, placeholder="Enter PEC Number..."),
70
+ gr.File(label="Upload PEC Numbers and Names file")
71
  ],
72
  outputs="text",
73
+ title="PEC Number to Name Lookup",
74
+ description="Enter a PEC number to retrieve the corresponding name and process it with a Hugging Face model. Please upload the Excel file."
75
  )
76
 
77
  # Run the Gradio interface