Spaces:

fkonovalenko
/

llm4career

Sleeping

llm4career / ml.py

first commit

a7da221 7 months ago

1.54 kB

	import pandas as pd
	import torch
	import re

	from llm import TransformerRegrModel


	class VacancyAnalyzer:
	def __init__(self, transformer_path: str, inputs: dict):
	self.transformer_path = transformer_path
	self.inputs = pd.DataFrame(inputs, index=[0]).drop(columns=['conversion', 'conversion_class', 'id'], axis=1)
	self.cat_features = ['profession', 'grade', 'location']
	self.text_features = ['emp_brand', 'mandatory', 'additional', 'comp_stages', 'work_conditions']
	self.device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

	def __cleaner__(self, txt: str) -> str:
	txt = re.sub(r'\_(.*?)\_', r'', txt)
	txt = re.sub(r'([\n\t]*)', r'', txt)
	return txt

	def classify(self) -> tuple:
	df = self.inputs[self.text_features]
	description = df[self.text_features[0]].values[0] + ' '
	for t in self.text_features[1:]:
	description += df[t].values[0]
	description += ' '
	description = self.__cleaner__(description)
	if len(description) < 100:
	return 'Too short text', 'unknown'
	tbert = TransformerRegrModel('rubert', 3)
	tbert.load_state_dict(torch.load(self.transformer_path, map_location=torch.device(self.device)))
	tbert.to(self.device)
	tbert.eval()
	with torch.no_grad():
	outputs, _, _ = tbert(description)
	prediction = torch.argmax(outputs, 1).cpu().numpy()
	return 'Text analyzing finished', prediction