Yomna35 commited on
Commit
cd66fc0
1 Parent(s): 1c70dbf

Upload 3 files

Browse files
Files changed (3) hide show
  1. main.py +110 -0
  2. main.yml +27 -0
  3. requirements.txt +13 -0
main.py ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, request, jsonify
2
+ from langchain_community.llms import LlamaCpp
3
+ import os
4
+ app = Flask(__name__)
5
+
6
+ n_gpu_layers = 0
7
+ n_batch = 1024
8
+
9
+
10
+ llm = LlamaCpp(
11
+ model_path="Phi-3-mini-4k-instruct-q4.gguf", # path to GGUF file
12
+ temperature=0.1,
13
+ n_gpu_layers=n_gpu_layers,
14
+ n_batch=n_batch,
15
+ verbose=True,
16
+ n_ctx=4096
17
+ )
18
+ file_size = os.stat('Phi-3-mini-4k-instruct-q4.gguf')
19
+ print("model size ====> :", file_size.st_size, "bytes")
20
+
21
+
22
+ @app.route('/', methods=['POST'])
23
+ def get_skills():
24
+ cv_body = request.json.get('cv_body')
25
+
26
+ # Simple inference example
27
+ output = llm(
28
+ f"<|user|>\n{cv_body}<|end|>\n<|assistant|>Can you list the skills mentioned in the CV?<|end|>",
29
+ max_tokens=256, # Generate up to 256 tokens
30
+ stop=["<|end|>"],
31
+ echo=True, # Whether to echo the prompt
32
+ )
33
+
34
+ return jsonify({'skills': output})
35
+
36
+ if __name__ == '__main__':
37
+ app.run()
38
+ from flask import Flask, request, jsonify
39
+ import nltk
40
+ from gensim.models import Word2Vec
41
+ import numpy as np
42
+ from sklearn.metrics.pairwise import cosine_similarity
43
+ import matplotlib.pyplot as plt
44
+ import io
45
+ import base64
46
+
47
+ nltk.download('punkt')
48
+
49
+ app = Flask(__name__)
50
+
51
+ texts = [
52
+ "This is a sample text.",
53
+ "Another example of text.",
54
+ "More texts to compare."
55
+ ]
56
+
57
+ tokenized_texts = [nltk.word_tokenize(text.lower()) for text in texts]
58
+
59
+ word_embeddings_model = Word2Vec(sentences=tokenized_texts, vector_size=100, window=5, min_count=1, workers=4)
60
+
61
+ def text_embedding(text):
62
+ words = nltk.word_tokenize(text.lower())
63
+ embeddings = [word_embeddings_model.wv[word] for word in words if word in word_embeddings_model.wv]
64
+ if embeddings:
65
+ return np.mean(embeddings, axis=0)
66
+ else:
67
+ return np.zeros(word_embeddings_model.vector_size)
68
+
69
+ @app.route('/process', methods=['POST'])
70
+ def process():
71
+ data = request.get_json()
72
+ input_text = data.get('input_text', '')
73
+
74
+ if not input_text:
75
+ return jsonify({'error': 'No input text provided'}), 400
76
+
77
+ input_embedding = text_embedding(input_text)
78
+ text_embeddings = [text_embedding(text) for text in texts]
79
+
80
+ similarities = cosine_similarity([input_embedding], text_embeddings).flatten()
81
+ similarities_percentages = [similarity * 100 for similarity in similarities]
82
+
83
+ fig, ax = plt.subplots(figsize=(10, 6))
84
+ texts_for_plotting = [f"Text {i+1}" for i in range(len(texts))]
85
+ ax.bar(texts_for_plotting, similarities_percentages)
86
+ ax.set_ylabel('Similarity (%)')
87
+ ax.set_xlabel('Texts')
88
+ ax.set_title('Similarity of Input Text with other texts')
89
+ plt.xticks(rotation=45, ha='right')
90
+ plt.tight_layout()
91
+
92
+ buf = io.BytesIO()
93
+ plt.savefig(buf, format='png')
94
+ buf.seek(0)
95
+ img_base64 = base64.b64encode(buf.read()).decode('utf-8')
96
+ plt.close()
97
+
98
+ sorted_indices = np.argsort(similarities)[::-1]
99
+ similar_texts = [(similarities[idx] * 100, texts[idx]) for idx in sorted_indices[:3]]
100
+
101
+ response = {
102
+ 'similarities': similarities_percentages,
103
+ 'plot': img_base64,
104
+ 'most_similar_texts': similar_texts
105
+ }
106
+
107
+ return jsonify(response)
108
+
109
+ if __name__ == '__main__':
110
+ app.run(host='0.0.0.0', port=8080, debug=True)
main.yml ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Python application
2
+
3
+ on:
4
+ push:
5
+ branches: [ main ]
6
+ pull_request:
7
+ branches: [ main ]
8
+
9
+ jobs:
10
+ build:
11
+ runs-on: ubuntu-latest
12
+
13
+ steps:
14
+ - uses: actions/checkout@v2
15
+
16
+ - name: Set up Python 3.x
17
+ uses: actions/setup-python@v2
18
+ with:
19
+ python-version: '3.x'
20
+
21
+ - name: Install dependencies
22
+ run: |
23
+ python -m pip install --upgrade pip
24
+ pip install -r requirements.txt
25
+
26
+ - name: Run the app
27
+ run: python app.py
requirements.txt ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ flask
2
+ langchain
3
+ matplotlib
4
+ numpy
5
+ gensim
6
+ scikit-learn
7
+ llama-cpp-python
8
+ huggingface-hub
9
+ langchain-experimental
10
+ scipy==1.10.1
11
+ gunicorn
12
+ langchain-community
13
+ nltk