hkhwilwh commited on
Commit
b00edcf
·
1 Parent(s): fcefbe5

Add files with Git LFS

Browse files
.devcontainer/devcontainer.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "Python 3",
3
+ // Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile
4
+ "image": "mcr.microsoft.com/devcontainers/python:1-3.11-bullseye",
5
+ "customizations": {
6
+ "codespaces": {
7
+ "openFiles": [
8
+ "README.md",
9
+ "app.py"
10
+ ]
11
+ },
12
+ "vscode": {
13
+ "settings": {},
14
+ "extensions": [
15
+ "ms-python.python",
16
+ "ms-python.vscode-pylance"
17
+ ]
18
+ }
19
+ },
20
+ "updateContentCommand": "[ -f packages.txt ] && sudo apt update && sudo apt upgrade -y && sudo xargs apt install -y <packages.txt; [ -f requirements.txt ] && pip3 install --user -r requirements.txt; pip3 install --user streamlit; echo '✅ Packages installed and Requirements met'",
21
+ "postAttachCommand": {
22
+ "server": "streamlit run app.py --server.enableCORS false --server.enableXsrfProtection false"
23
+ },
24
+ "portsAttributes": {
25
+ "8501": {
26
+ "label": "Application",
27
+ "onAutoForward": "openPreview"
28
+ }
29
+ },
30
+ "forwardPorts": [
31
+ 8501
32
+ ]
33
+ }
.gitignore ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ # Environment files
2
+ .env
3
+
4
+ # Python virtual environment
5
+ venv/
6
+
7
+ # Logs
8
+ *.log
__pycache__/agents.cpython-310.pyc ADDED
Binary file (4.21 kB). View file
 
__pycache__/agents.cpython-312.pyc ADDED
Binary file (4.62 kB). View file
 
__pycache__/config.cpython-310.pyc ADDED
Binary file (939 Bytes). View file
 
__pycache__/config.cpython-312.pyc ADDED
Binary file (1.01 kB). View file
 
__pycache__/document_exporter.cpython-310.pyc ADDED
Binary file (2.94 kB). View file
 
__pycache__/pdf_processor.cpython-310.pyc ADDED
Binary file (5.41 kB). View file
 
__pycache__/translator.cpython-310.pyc ADDED
Binary file (3.32 kB). View file
 
__pycache__/utils.cpython-310.pyc ADDED
Binary file (2.29 kB). View file
 
__pycache__/utils.cpython-312.pyc ADDED
Binary file (2.85 kB). View file
 
agents.py ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from crewai import Agent
2
+ from langchain.tools import Tool
3
+ from utils import create_uae_legal_tools, is_arabic
4
+ from config import LEGAL_CATEGORIES
5
+ from dotenv import load_dotenv
6
+ import os
7
+
8
+ # Load environment variables
9
+ load_dotenv()
10
+
11
+ # Validate API key
12
+ if not os.getenv('OPENAI_API_KEY'):
13
+ raise ValueError("OpenAI API key not found. Please set OPENAI_API_KEY in your environment variables.")
14
+
15
+ # Common LLM configuration
16
+ BASE_LLM_CONFIG = {
17
+ "config_list": [
18
+ {
19
+ "model": "gpt-4-1106-preview", # Using the latest GPT-4 Turbo model
20
+ "api_key": os.getenv('OPENAI_API_KEY'),
21
+ "temperature": 0.3, # Lower temperature for more consistent outputs
22
+ "max_tokens": 4000,
23
+ "presence_penalty": 0.0,
24
+ "frequency_penalty": 0.0,
25
+ "response_format": {"type": "text"}
26
+ }
27
+ ]
28
+ }
29
+
30
+ # Configuration for summarization tasks
31
+ SUMMARY_LLM_CONFIG = {
32
+ "config_list": [
33
+ {
34
+ "model": "gpt-4-1106-preview",
35
+ "api_key": os.getenv('OPENAI_API_KEY'),
36
+ "temperature": 0.2, # Even lower temperature for summaries
37
+ "max_tokens": 4000,
38
+ "presence_penalty": 0.0,
39
+ "frequency_penalty": 0.3, # Reduce repetition in summaries
40
+ "response_format": {"type": "text"}
41
+ }
42
+ ]
43
+ }
44
+
45
+ def create_judge_agent():
46
+ return Agent(
47
+ role='قاضي قانوني إماراتي',
48
+ goal='تقديم أحكام وتفسيرات قانونية دقيقة بناءً على القانون الإماراتي',
49
+ backstory="""
50
+ أنت قاضٍ متمرس في النظام القانوني الإماراتي مع خبرة تزيد عن 20 عاماً
51
+ ومعرفة عميقة بالقوانين واللوائح والسوابق القانونية الإماراتية.
52
+ دورك هو تحليل القضايا وتقديم أحكام عادلة ومسببة بناءً على القانون الإماراتي،
53
+ مع التركيز على تطبيق أحدث التشريعات والأحكام القضائية.
54
+ يمنع الرد على اي استفسار غير قانوني او خاص بغير المواضيع القانونية في دولة الامارات العربية المتحدة.
55
+ يرجى التأكد من أن جميع الردود على أسئلتي تستند إلى مصادر موثوقة، مع تضمين الاستشهادات والروابط المناسبة لتلك المصادر. أفضل الإجابات التفصيلية والمنظمة جيدًا والتي لا تعالج استفساري فحسب، بل توفر أيضًا سياقًا أو رؤى إضافية عند الاقتضاء. كن واضحًا وموجزًا، وإذا كان موضوع معين لا يؤثر بشكل مباشر على أهدافي أو دراستي، فيرجى إبلاغي بذلك. اذكر أيضًا المراجع في نهاية المقال
56
+ """,
57
+ verbose=True,
58
+ allow_delegation=False,
59
+ llm_config=BASE_LLM_CONFIG,
60
+ tools=create_uae_legal_tools()
61
+ )
62
+
63
+ def create_advocate_agent():
64
+ return Agent(
65
+ role='محامي إماراتي',
66
+ goal='تقديم التمثيل القانوني والمشورة المتخصصة بناءً على القانون الإماراتي',
67
+ backstory="""
68
+ أنت محامٍ ماهر في الإمارات العربية المتحدة مع خبرة 15 عاماً في مختلف
69
+ مجالات القانون الإماراتي. تخصصت في قضايا المحاكم الاتحادية والمحلية،
70
+ ولديك سجل حافل في تمثيل العملاء بنجاح. دورك هو تقديم المشورة القانونية
71
+ الدقيقة وضمان حماية حقوق العملاء وفقاً للقانون الإماراتي.
72
+ """,
73
+ verbose=True,
74
+ allow_delegation=False,
75
+ llm_config=BASE_LLM_CONFIG,
76
+ tools=create_uae_legal_tools()
77
+ )
78
+
79
+ def create_consultant_agent():
80
+ return Agent(
81
+ role='مستشار قضائي إماراتي',
82
+ goal='تقديم الاستشارات والتوجيه القانوني المتخصص في القانون الإماراتي',
83
+ backstory="""
84
+ أنت مستشار قضائي متمرس مع خبرة 18 عاماً ومعرفة شاملة بالنظام القانوني
85
+ والإجراءات القضائية في الإمارات العربية المتحدة. تخصصت في تقديم الاستشارات
86
+ للمؤسسات والأفراد، مع التركيز على الحلول العملية والوقائية. دورك هو تقديم
87
+ التوجيه الاستراتيجي والمشورة المتخصصة في المسائل القانونية المعقدة.
88
+ """,
89
+ verbose=True,
90
+ allow_delegation=False,
91
+ llm_config=BASE_LLM_CONFIG,
92
+ tools=create_uae_legal_tools()
93
+ )
app.py ADDED
@@ -0,0 +1,329 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from agents import create_judge_agent, create_advocate_agent, create_consultant_agent
3
+ from crewai import Task, Crew
4
+ from utils import is_arabic, format_legal_response
5
+ from config import LEGAL_CATEGORIES, DEFAULT_LANGUAGE
6
+
7
+ st.set_page_config(page_title="المساعد القانوني الإماراتي", layout="wide")
8
+
9
+ # Load custom CSS
10
+ with open('style.css') as f:
11
+ st.markdown(f'<style>{f.read()}</style>', unsafe_allow_html=True)
12
+
13
+ st.title("المساعد القانوني الإماراتي")
14
+ st.write("احصل على المساعدة القانونية من خبراء قانونيين إماراتيين مدعومين بالذكاء الاصطناعي")
15
+
16
+ # Add imports
17
+ from pdf_processor import PDFProcessor
18
+ from document_exporter import DocumentExporter
19
+ from translator import Translator
20
+
21
+ # Initialize components
22
+ if 'pdf_processor' not in st.session_state:
23
+ st.session_state.pdf_processor = PDFProcessor()
24
+ if 'document_exporter' not in st.session_state:
25
+ st.session_state.document_exporter = DocumentExporter()
26
+ if 'translator' not in st.session_state:
27
+ st.session_state.translator = Translator()
28
+
29
+ # Create a new tab for PDF upload
30
+ tab1, tab2, tab3, tab4 = st.tabs(["تحليل المستندات", "القاضي", "المحامي", "المستشار"])
31
+
32
+ # PDF Upload Tab
33
+ with tab1:
34
+ st.header("تحليل المستندات القانونية")
35
+
36
+ # Add service selection toggle
37
+ service_type = st.radio(
38
+ "اختر نوع الخدمة / Select Service",
39
+ ["تلخيص وتحليل المستند", "ترجمة المستند"],
40
+ horizontal=True
41
+ )
42
+
43
+ if service_type == "ترجمة المستند":
44
+ target_language = st.selectbox(
45
+ "اختر لغة الترجمة / Select Target Language",
46
+ ["العربية", "English", "中文", "हिंदी", "اردو"],
47
+ index=1
48
+ )
49
+
50
+ uploaded_file = st.file_uploader("قم بتحميل ملف PDF للتحليل", type=['pdf'])
51
+
52
+ if uploaded_file is not None:
53
+ # Check file size
54
+ file_size = len(uploaded_file.getvalue()) / (1024 * 1024) # Convert to MB
55
+ if file_size > 20: # 20MB limit
56
+ st.error("حجم الملف كبير جداً. الحد الأقصى المسموح به هو 20 ميجابايت.")
57
+ st.stop()
58
+
59
+ if service_type == "تلخيص وتحليل المستند":
60
+ # Create progress bar
61
+ progress_bar = st.progress(0)
62
+ status_text = st.empty()
63
+
64
+ def update_progress(message, progress):
65
+ status_text.text(message)
66
+ progress_bar.progress(progress)
67
+
68
+ st.session_state.pdf_processor.set_progress_callback(update_progress)
69
+
70
+ try:
71
+ # Process the uploaded PDF
72
+ results = st.session_state.pdf_processor.process_document(uploaded_file.read())
73
+
74
+ # Display results in collapsible sections
75
+ with st.expander("ملخص المستند", expanded=True):
76
+ st.write(results["summary"])
77
+
78
+ with st.expander("تحليل المخالفات القانونية", expanded=True):
79
+ st.markdown(results["legal_analysis"], unsafe_allow_html=True)
80
+
81
+ with st.expander("الخريطة التشريعية", expanded=True):
82
+ st.markdown(results["legislation_mapping"], unsafe_allow_html=True)
83
+
84
+ # Add export buttons in a container
85
+ st.markdown("### تحميل التحليل")
86
+ export_container = st.container()
87
+
88
+ col1, col2 = export_container.columns(2)
89
+ with col1:
90
+ pdf_button = st.download_button(
91
+ label="تحميل كملف PDF",
92
+ data=st.session_state.document_exporter.export_to_pdf(results),
93
+ file_name="legal_analysis.pdf",
94
+ mime="application/pdf",
95
+ key="pdf_download"
96
+ )
97
+
98
+ with col2:
99
+ word_button = st.download_button(
100
+ label="تحميل كملف Word",
101
+ data=st.session_state.document_exporter.export_to_word(results),
102
+ file_name="legal_analysis.docx",
103
+ mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
104
+ key="word_download"
105
+ )
106
+
107
+ except ValueError as ve:
108
+ st.error(f"خطأ في المدخلات: {str(ve)}")
109
+ except Exception as e:
110
+ st.error(f"حدث خطأ غير متوقع: {str(e)}")
111
+ st.error("يرجى المحاولة مرة أخرى أو الاتصال بالدعم الفني")
112
+ finally:
113
+ # Clear progress bar and status
114
+ progress_bar.empty()
115
+ status_text.empty()
116
+
117
+ else: # Translation service
118
+ with st.spinner("جاري تحليل المستند..."):
119
+ try:
120
+ # Extract text from PDF
121
+ text = st.session_state.pdf_processor.extract_text_from_pdf(uploaded_file.read())
122
+
123
+ if not text.strip():
124
+ st.error("لم يتم العثور على نص قابل للقراءة في المستند")
125
+ st.stop()
126
+
127
+ # Detect source language
128
+ source_lang = st.session_state.translator.detect_language(text)
129
+ st.info(f"تم اكتشاف لغة المستند: {st.session_state.translator.get_language_name(source_lang)}")
130
+
131
+ # Map language names to codes
132
+ lang_map = {
133
+ "العربية": "arabic",
134
+ "English": "english",
135
+ "中文": "chinese",
136
+ "हिंदी": "hindi",
137
+ "اردو": "urdu"
138
+ }
139
+
140
+ target_lang = lang_map[target_language]
141
+
142
+ # Check if source and target are the same
143
+ if source_lang == target_lang:
144
+ st.warning("لغة المصدر ولغة الهدف متطابقتان. يرجى اختيار لغة مختلفة للترجمة.")
145
+ st.stop()
146
+
147
+ with st.spinner("جاري الترجمة..."):
148
+ # Preprocess and translate the text
149
+ processed_text = st.session_state.translator.preprocess_text(text)
150
+ translated_text = st.session_state.translator.translate(
151
+ processed_text,
152
+ source_lang,
153
+ target_lang
154
+ )
155
+
156
+ # Display results
157
+ col1, col2 = st.columns(2)
158
+ with col1:
159
+ st.subheader("النص الأصلي / Original Text")
160
+ st.text_area("", value=text, height=300, key="original_text")
161
+
162
+ with col2:
163
+ st.subheader("النص المترجم / Translated Text")
164
+ st.text_area("", value=translated_text, height=300, key="translated_text")
165
+
166
+ # Add download buttons
167
+ st.markdown("### تحميل الترجمة")
168
+ download_col1, download_col2 = st.columns(2)
169
+
170
+ with download_col1:
171
+ st.download_button(
172
+ label="تحميل النص المترجم",
173
+ data=translated_text.encode(),
174
+ file_name=f"translated_document.txt",
175
+ mime="text/plain",
176
+ key="translation_download"
177
+ )
178
+
179
+ with download_col2:
180
+ # Create a simple HTML file with both texts
181
+ html_content = f"""
182
+ <html dir="auto">
183
+ <head>
184
+ <meta charset="UTF-8">
185
+ <style>
186
+ body {{ font-family: Arial, sans-serif; margin: 20px; }}
187
+ .text-container {{ margin-bottom: 20px; }}
188
+ h2 {{ color: #2c3e50; }}
189
+ </style>
190
+ </head>
191
+ <body>
192
+ <div class="text-container">
193
+ <h2>Original Text</h2>
194
+ <p>{text}</p>
195
+ </div>
196
+ <div class="text-container">
197
+ <h2>Translated Text</h2>
198
+ <p>{translated_text}</p>
199
+ </div>
200
+ </body>
201
+ </html>
202
+ """
203
+
204
+ st.download_button(
205
+ label="تحميل النصين معاً (HTML)",
206
+ data=html_content.encode(),
207
+ file_name="translation_with_original.html",
208
+ mime="text/html",
209
+ key="html_download"
210
+ )
211
+
212
+ except ValueError as ve:
213
+ st.error(f"خطأ في المدخلات: {str(ve)}")
214
+ except Exception as e:
215
+ st.error(f"حدث خطأ غير متوقع: {str(e)}")
216
+ st.error("يرجى المحاولة مرة أخرى أو الاتصال بالدعم الفني")
217
+
218
+ # Language selector
219
+ language = st.sidebar.selectbox(
220
+ "اختر اللغة / Select Language",
221
+ ["العربية", "English"],
222
+ index=0
223
+ )
224
+
225
+ # Legal category selector
226
+ selected_category = st.sidebar.selectbox(
227
+ "اختر الفئة القانونية / Select Legal Category",
228
+ list(LEGAL_CATEGORIES.values()),
229
+ index=0
230
+ )
231
+
232
+ # Initialize session state for chat history
233
+ if 'chat_history' not in st.session_state:
234
+ st.session_state.chat_history = []
235
+
236
+ # Create tabs for different agents
237
+ tab1, tab2, tab3 = st.tabs(["القاضي", "المحامي", "المستشار"])
238
+
239
+ def get_agent_response(agent, query, category):
240
+ # Prepare the task with context
241
+ task_description = f"""
242
+ تحليل والرد على الاستفسار التالي في مجال {category}:
243
+ {query}
244
+
245
+ يجب أن يكون الرد:
246
+ 1. مستنداً إلى القانون الإماراتي
247
+ 2. مدعوماً بالمراجع القانونية
248
+ 3. واضحاً ومفهوماً
249
+ 4. متوافقاً مع أحدث التشريعات
250
+ """
251
+
252
+ task = Task(
253
+ description=task_description,
254
+ agent=agent,
255
+ expected_output="تحليل قانوني ورد بناءً على القانون الإماراتي"
256
+
257
+ )
258
+
259
+ crew = Crew(
260
+ agents=[agent],
261
+ tasks=[task]
262
+ )
263
+
264
+ result = crew.kickoff()
265
+ return format_legal_response(result, 'ar' if is_arabic(query) else 'en')
266
+
267
+ # Judge Tab
268
+ with tab2:
269
+ st.header("استشارة القاضي الإماراتي")
270
+ judge_query = st.text_area("اكتب سؤالك القانوني للقاضي:", key="judge_input", placeholder="أدخل النص هنا...")
271
+ st.markdown(
272
+ """
273
+ <style>
274
+ .element-container textarea {
275
+ direction: rtl;
276
+ text-align: right;
277
+ }
278
+ </style>
279
+ """,
280
+ unsafe_allow_html=True
281
+ )
282
+ if st.button("الحصول على رأي القاضي", key="judge_button"):
283
+ if judge_query:
284
+ with st.spinner("القاضي يحلل قضيتك..."):
285
+ judge_agent = create_judge_agent()
286
+ response = get_agent_response(judge_agent, judge_query, selected_category)
287
+ st.session_state.chat_history.append(("القاضي", judge_query, response))
288
+ st.write("رد القاضي:")
289
+ st.markdown(response, unsafe_allow_html=True)
290
+
291
+ # Advocate Tab
292
+ with tab3:
293
+ st.header("استشارة المحامي الإماراتي")
294
+ advocate_query = st.text_area("اكتب سؤالك القانوني للمحامي:", key="advocate_input", placeholder="أدخل النص هنا...")
295
+ st.markdown(
296
+ """
297
+ <style>
298
+ .element-container textarea {
299
+ direction: rtl;
300
+ text-align: right;
301
+ }
302
+ </style>
303
+ """,
304
+ unsafe_allow_html=True
305
+ )
306
+ if st.button("الحصول على رأي المحامي", key="advocate_button"):
307
+ if advocate_query:
308
+ with st.spinner("المحامي يحلل قضيتك..."):
309
+ advocate_agent = create_advocate_agent()
310
+ response = get_agent_response(advocate_agent, advocate_query, selected_category)
311
+ st.session_state.chat_history.append(("المحامي", advocate_query, response))
312
+ st.write("رد المحامي:")
313
+ st.markdown(response, unsafe_allow_html=True)
314
+
315
+ # Consultant Tab
316
+ with tab4:
317
+ st.header("استشارة المستشار القضائي الإماراتي")
318
+ consultant_query = st.text_area("اكتب سؤالك القانوني للمستشار:", key="consultant_input", placeholder="أدخل النص هنا...")
319
+ st.markdown(
320
+ """
321
+ <style>
322
+ .element-container textarea {
323
+ direction: rtl;
324
+ text-align: right;
325
+ }
326
+ </style>
327
+ """,
328
+ unsafe_allow_html=True
329
+ )
changes.tar.gz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:acc5c62af6ddd4cd86812758b4f2f6c999eddbfb0e3a6e3d2ff2412ad9c991ab
3
+ size 5186
config.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from dotenv import load_dotenv
3
+
4
+ load_dotenv()
5
+
6
+ # OpenAI Configuration
7
+ OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
8
+
9
+ # Language Settings
10
+ DEFAULT_LANGUAGE = 'ar' # Arabic by default
11
+ SUPPORTED_LANGUAGES = ['ar', 'en']
12
+
13
+ # UAE Legal Resources
14
+ UAE_LEGAL_DOMAINS = [
15
+ 'https://elaws.moj.gov.ae',
16
+ 'https://www.mohre.gov.ae',
17
+ 'https://www.dm.gov.ae',
18
+ 'https://www.adjd.gov.ae',
19
+ 'https://www.dc.gov.ae'
20
+
21
+ ]
22
+
23
+ # Legal Categories
24
+ LEGAL_CATEGORIES = {
25
+ 'civil': 'القانون المدني',
26
+ 'criminal': 'القانون الجنائي',
27
+ 'commercial': 'القانون التجاري',
28
+ 'labor': 'قانون العمل',
29
+ 'family': 'قانون الأسرة',
30
+ 'property': 'قانون العقارات'
31
+ }
document_exporter.py ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ try:
2
+ from docx import Document
3
+ except ImportError:
4
+ from docx.api import Document
5
+ from reportlab.pdfgen import canvas
6
+ from reportlab.lib.pagesizes import letter
7
+ from reportlab.pdfbase import pdfmetrics
8
+ from reportlab.pdfbase.ttfonts import TTFont
9
+ import io
10
+ import arabic_reshaper
11
+ from bidi.algorithm import get_display
12
+
13
+ class DocumentExporter:
14
+ def __init__(self):
15
+ # Register Arabic font for PDF
16
+ try:
17
+ pdfmetrics.registerFont(TTFont('Arabic', 'fonts/NotoNaskhArabic-Regular.ttf'))
18
+ except:
19
+ pass # Fall back to default font if Arabic font is not available
20
+
21
+ def export_to_pdf(self, content: dict) -> bytes:
22
+ """Export the analysis results to PDF format."""
23
+ buffer = io.BytesIO()
24
+ c = canvas.Canvas(buffer, pagesize=letter)
25
+
26
+ # Set font for Arabic text
27
+ try:
28
+ c.setFont('Arabic', 14)
29
+ except:
30
+ c.setFont('Helvetica', 14)
31
+
32
+ y = 750 # Starting y position
33
+
34
+ # Add title
35
+ title = "تحليل المستند القانوني"
36
+ title = get_display(arabic_reshaper.reshape(title))
37
+ c.drawString(500, y, title)
38
+ y -= 30
39
+
40
+ # Add summary
41
+ summary_title = "ملخص المستند"
42
+ summary_title = get_display(arabic_reshaper.reshape(summary_title))
43
+ c.drawString(500, y, summary_title)
44
+ y -= 20
45
+
46
+ summary_text = get_display(arabic_reshaper.reshape(content['summary']))
47
+ # Wrap text to fit page width
48
+ words = summary_text.split()
49
+ line = ""
50
+ for word in words:
51
+ if c.stringWidth(line + word, 'Arabic', 12) < 500:
52
+ line += word + " "
53
+ else:
54
+ c.drawString(50, y, line)
55
+ y -= 15
56
+ line = word + " "
57
+ if line:
58
+ c.drawString(50, y, line)
59
+ y -= 30
60
+
61
+ # Add legal analysis
62
+ analysis_title = "تحليل المخالفات القانونية"
63
+ analysis_title = get_display(arabic_reshaper.reshape(analysis_title))
64
+ c.drawString(500, y, analysis_title)
65
+ y -= 20
66
+
67
+ analysis_text = get_display(arabic_reshaper.reshape(content['legal_analysis']))
68
+ words = analysis_text.split()
69
+ line = ""
70
+ for word in words:
71
+ if c.stringWidth(line + word, 'Arabic', 12) < 500:
72
+ line += word + " "
73
+ else:
74
+ c.drawString(50, y, line)
75
+ y -= 15
76
+ line = word + " "
77
+ if line:
78
+ c.drawString(50, y, line)
79
+ y -= 30
80
+
81
+ # Add legislation mapping
82
+ mapping_title = "الخريطة التشريعية"
83
+ mapping_title = get_display(arabic_reshaper.reshape(mapping_title))
84
+ c.drawString(500, y, mapping_title)
85
+ y -= 20
86
+
87
+ mapping_text = get_display(arabic_reshaper.reshape(content['legislation_mapping']))
88
+ words = mapping_text.split()
89
+ line = ""
90
+ for word in words:
91
+ if c.stringWidth(line + word, 'Arabic', 12) < 500:
92
+ line += word + " "
93
+ else:
94
+ c.drawString(50, y, line)
95
+ y -= 15
96
+ line = word + " "
97
+ if line:
98
+ c.drawString(50, y, line)
99
+
100
+ c.save()
101
+ return buffer.getvalue()
102
+
103
+ def export_to_word(self, content: dict) -> bytes:
104
+ """Export the analysis results to Word format."""
105
+ doc = Document()
106
+
107
+ # Add title
108
+ doc.add_heading("تحليل المستند القانوني", 0)
109
+
110
+ # Add summary section
111
+ doc.add_heading("ملخص المستند", level=1)
112
+ doc.add_paragraph(content['summary'])
113
+
114
+ # Add legal analysis section
115
+ doc.add_heading("تحليل المخالفات القانونية", level=1)
116
+ doc.add_paragraph(content['legal_analysis'])
117
+
118
+ # Add legislation mapping section
119
+ doc.add_heading("الخريطة التشريعية", level=1)
120
+ doc.add_paragraph(content['legislation_mapping'])
121
+
122
+ # Save to bytes
123
+ buffer = io.BytesIO()
124
+ doc.save(buffer)
125
+ return buffer.getvalue()
fonts/NotoNaskhArabic-Regular.ttf ADDED
Binary file (178 kB). View file
 
fonts/NotoNaskhArabic.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e9aa38292c3aef5a8e3bd28b30945638d49bbc028517c71c645d3bd90a957b0
3
+ size 21347
hassan_github ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ -----BEGIN OPENSSH PRIVATE KEY-----
2
+ b3BlbnNzaC1rZXktdjEAAAAABG5vbmUAAAAEbm9uZQAAAAAAAAABAAACFwAAAAdzc2gtcn
3
+ NhAAAAAwEAAQAAAgEAsFKJwQWnqkffXdE3djKsicWXO2zUwKsdyMCcpQNpHsjm8tp8Gzne
4
+ EaES6wrDV98UEA4KZyl3PsLcIrJE2srKaVpoaYe1UpJlVhTmo+oILmH0kt4321q1w6YQMT
5
+ 2SB6yfc2XnhhwRtHCzPhGFygI8tdPcwP4HPJJwQUZSt8wVGROLphrXBSrvnZNF3yAJa+4o
6
+ /J+bG6qLFvXqZnjdiOe1wY596bRs5KeuHM41cyRbmPtUlSPzaEy3Non7rTp0s+sL6pmIPY
7
+ gUQ5fdMy8svlJndjIJg348nHfW7DZUHSaMfQgCOXOhwTbY/mEFwfM0nz5IWKPr+kjliMkO
8
+ GHKizPhqBTwlCVPFw2sifV7305ON+yUptYczksjpv1dlw6SlUxxyTKkTbnV7n7YiNxwbTa
9
+ LkHfD283zFH5ziMkU1fuiGh9NPzPE/sa80dR6EwnB0XLBojyJGVD39jJ4X4wb2G+MJik7U
10
+ fgP22najOSSoK5xuFbW50rRlUp6n30bgrH4f/kiytQUAUhtB7YIt1YU9VUf/Z6S4ab1SL5
11
+ w+BLVekZVnigr/lYCTzTo+tIvkEVCbbiGAaof6EO5Zs4q7D22zXPm0E9eZb6d2V8/vPWPA
12
+ +gIjaTPseqEsnxj+2b6ewScPbYkHnI0oomTQwWIRAfrxpi+iyKc23mf4q6nAjyvRKXSVWD
13
+ UAAAdQVunz01bp89MAAAAHc3NoLXJzYQAAAgEAsFKJwQWnqkffXdE3djKsicWXO2zUwKsd
14
+ yMCcpQNpHsjm8tp8GzneEaES6wrDV98UEA4KZyl3PsLcIrJE2srKaVpoaYe1UpJlVhTmo+
15
+ oILmH0kt4321q1w6YQMT2SB6yfc2XnhhwRtHCzPhGFygI8tdPcwP4HPJJwQUZSt8wVGROL
16
+ phrXBSrvnZNF3yAJa+4o/J+bG6qLFvXqZnjdiOe1wY596bRs5KeuHM41cyRbmPtUlSPzaE
17
+ y3Non7rTp0s+sL6pmIPYgUQ5fdMy8svlJndjIJg348nHfW7DZUHSaMfQgCOXOhwTbY/mEF
18
+ wfM0nz5IWKPr+kjliMkOGHKizPhqBTwlCVPFw2sifV7305ON+yUptYczksjpv1dlw6SlUx
19
+ xyTKkTbnV7n7YiNxwbTaLkHfD283zFH5ziMkU1fuiGh9NPzPE/sa80dR6EwnB0XLBojyJG
20
+ VD39jJ4X4wb2G+MJik7UfgP22najOSSoK5xuFbW50rRlUp6n30bgrH4f/kiytQUAUhtB7Y
21
+ It1YU9VUf/Z6S4ab1SL5w+BLVekZVnigr/lYCTzTo+tIvkEVCbbiGAaof6EO5Zs4q7D22z
22
+ XPm0E9eZb6d2V8/vPWPA+gIjaTPseqEsnxj+2b6ewScPbYkHnI0oomTQwWIRAfrxpi+iyK
23
+ c23mf4q6nAjyvRKXSVWDUAAAADAQABAAACAAu9AvS5tqbMcB9jzUhuKTRm1iGbpjJJcgsq
24
+ X4NQzc/B2jYyu25olNMhoQvKxKR18nT7KlAh35FrEZKxwYm7VGxdG3RjF3wuyNZJP+2LqA
25
+ 3GcazRZHCTAmTLCmrsyWr/YAIjt50jAz66/gPU0M5ZBcepGhozDzJGIXkHAHzB9mmb9oER
26
+ al0qtZcM9erbzCTGTy46Re46lVXq+zblNwJlQqFnJhTH8TPrwdijFcXblsmBeekP4qV/7f
27
+ aQixPGP3y22i09GWfXRloVoyFEM7tb6w1gWWfKoKheBg3ltAXYyiOMw/ElNJCTYTDWLrbx
28
+ xQAQ+moER7J65eUMVYblItd4dj5UpCoprRsLjujXETT92N0FNm1H5TH8tw3Yuiwvkisetc
29
+ yo+kbe3JXD7UWNcW3y09zZ6cga/wc8KK6yiK+hd4zw4+rMB1LB/gtT8PmBHXUA4MvvQTvv
30
+ rlLFR6NYDIR8+V0UmOfXKyQu14pUyReHwv6WOEtM5SfzIZonXV/uICcle+k+OjNs5btO5V
31
+ oaDNWV9rqA+6ec0nPC/vMUlxlTg+kM2KX9lhwP7li8OzwberUcu7s1CxKuBJfFKXyrV+Yo
32
+ ZRHurlHSrlGkJH5KlA2TvixZjiF2CoRwJ6opFJMNyTNJkT7G7iFHBXX0rEsiwg2HqPzEOe
33
+ x9AnLUPWmiIkW6tyjlAAABADgR9qHep88KTbiyj+YgCVapkrCGQ7K9MAqjUf32mZhcWwv+
34
+ iDZn6AF5UTq4BuKgOt8snwShWnUHjC6HyxdTfAyC2U2Xjjc/hkL6st1V5b1B2iCweaKG4j
35
+ rnGUMqqvDrIA9H3lDfxNeKpfW9VympD4gtWkbXMQ9WqrSLihG5UzJOWvdXAVW3+sDDRXJY
36
+ kAqhBqA7pDYSFHOgNVJhEkKR/qnHI1CZn4Ts+OqSMwBssMTuwHvcfKWCgYlUBBQgD06vu6
37
+ +q9SSl8k15+b6vfu5Nk38j0E+Z3oqW4ZrBu+a1B5BelzGYud+b+X5yii19yXVoPAfvOcZb
38
+ wqqhnwDV3H6CHowAAAEBAODjL55u3FZsrmO0WakDDXpkBRTJF0LQnm6sl/D+TRuSTtURu3
39
+ KKm/LAT673heS90c3PP5Ja6aFwwZ+BTtqlX51CI/8kmqOWm25I03NXUfexbajm7tNEDHXt
40
+ SR2Db4HlzBK4Rv5f0ORQj9rrBOvaF4xkUl2P4l+Cm3McErVRNAB9E6Kvhpq6/OrzUOGudy
41
+ owYeC37CdjfGdVE73lP305hdu1bPdsHQkupB6PzG8QtfzvqGc2LPWS1BsgGWiHD/w8dqRO
42
+ WnY8xycZE5M8dIXPuPN6VNQGkVVUmREb0UcKwXnCaDwfSGxed1F2N60rggtK4BlZcvZSK8
43
+ mQK2xaRQGB9lsAAAEBAMi3VGhXgXmiXRghB8opmA4zUwRfOUjlwTj9akO87Wz1nvag8+Xt
44
+ 3j1dtb7CECIGobLuUTyrcbPdPB7JeibsVxEYposQqIqZ9YBWXrN6ciVzyFfq+SqEyKTizg
45
+ ir2aXbZLLeo5EP40H0PiDCPMKeiK5bkrGE0+tEK4mhUdG4tlqSS2quG+GgGXNW2799wX6p
46
+ vH9Ir1B1CDNmvIm9n6CG70+FIvKlr0AeSHyWJDc389ktTqsa3QrzPmMIookKpoLhnvmo0P
47
+ P1qLB02rdTZrFxUGRxvGkb0O/1dK3SBfl35+wIsKOZF/O1iE+5fRWNU6CHSv+ng5A5UHwE
48
+ J+0Afmn80K8AAAAYaGFzc2FuLmtod2lsZWhAZ21haWwuY29tAQID
49
+ -----END OPENSSH PRIVATE KEY-----
hassan_github.pub ADDED
@@ -0,0 +1 @@
 
 
1
+ ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQCwUonBBaeqR99d0Td2MqyJxZc7bNTAqx3IwJylA2keyOby2nwbOd4RoRLrCsNX3xQQDgpnKXc+wtwiskTaysppWmhph7VSkmVWFOaj6gguYfSS3jfbWrXDphAxPZIHrJ9zZeeGHBG0cLM+EYXKAjy109zA/gc8knBBRlK3zBUZE4umGtcFKu+dk0XfIAlr7ij8n5sbqosW9epmeN2I57XBjn3ptGzkp64czjVzJFuY+1SVI/NoTLc2ifutOnSz6wvqmYg9iBRDl90zLyy+Umd2MgmDfjycd9bsNlQdJox9CAI5c6HBNtj+YQXB8zSfPkhYo+v6SOWIyQ4YcqLM+GoFPCUJU8XDayJ9XvfTk437JSm1hzOSyOm/V2XDpKVTHHJMqRNudXuftiI3HBtNouQd8PbzfMUfnOIyRTV+6IaH00/M8T+xrzR1HoTCcHRcsGiPIkZUPf2MnhfjBvYb4wmKTtR+A/badqM5JKgrnG4VtbnStGVSnqffRuCsfh/+SLK1BQBSG0Htgi3VhT1VR/9npLhpvVIvnD4EtV6RlWeKCv+VgJPNOj60i+QRUJtuIYBqh/oQ7lmzirsPbbNc+bQT15lvp3ZXz+89Y8D6AiNpM+x6oSyfGP7Zvp7BJw9tiQecjSiiZNDBYhEB+vGmL6LIpzbeZ/irqcCPK9EpdJVYNQ== [email protected]
pdf_processor.py ADDED
@@ -0,0 +1,318 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import PyPDF2
2
+ import pytesseract
3
+ from pdf2image import convert_from_bytes
4
+ import arabic_reshaper
5
+ from bidi.algorithm import get_display
6
+ from transformers import pipeline
7
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
8
+ import io
9
+ import os
10
+ from typing import List, Dict
11
+ from agents import create_judge_agent, create_advocate_agent
12
+ from crewai import Task, Crew
13
+
14
+ class PDFProcessor:
15
+ def __init__(self):
16
+ self.text_splitter = RecursiveCharacterTextSplitter(
17
+ chunk_size=500, # Reduced chunk size for better memory management
18
+ chunk_overlap=50,
19
+ length_function=len,
20
+ separators=["\n\n", "\n", " ", ""]
21
+ )
22
+ # Initialize models with better memory management
23
+ self.summarizer = pipeline(
24
+ "summarization",
25
+ model="facebook/bart-large-cnn",
26
+ device_map="auto", # Automatically choose best device
27
+ torch_dtype=torch.float32, # Use float32 for better memory efficiency
28
+ batch_size=1 # Process one chunk at a time
29
+ )
30
+ self.progress_callback = None
31
+
32
+ # Configure torch for memory efficiency
33
+ if torch.backends.mps.is_available(): # For Mac M1/M2
34
+ torch.backends.mps.set_per_process_memory_fraction(0.7) # Use only 70% of available memory
35
+ elif torch.cuda.is_available(): # For CUDA devices
36
+ torch.cuda.empty_cache()
37
+ torch.cuda.set_per_process_memory_fraction(0.7)
38
+
39
+ def set_progress_callback(self, callback):
40
+ """Set a callback function to report progress."""
41
+ self.progress_callback = callback
42
+
43
+ def update_progress(self, message: str, progress: float):
44
+ """Update progress through callback if available."""
45
+ if self.progress_callback:
46
+ self.progress_callback(message, progress)
47
+
48
+ def extract_text_from_pdf(self, pdf_bytes: bytes) -> str:
49
+ """Extract text from PDF, handling both searchable and scanned PDFs with improved accuracy."""
50
+ text = ""
51
+ try:
52
+ # Try to extract text directly first using PyPDF2
53
+ pdf_reader = PyPDF2.PdfReader(io.BytesIO(pdf_bytes))
54
+ extracted_text = []
55
+
56
+ for page in pdf_reader.pages:
57
+ page_text = page.extract_text()
58
+ if page_text.strip():
59
+ extracted_text.append(page_text)
60
+
61
+ # If direct extraction yielded results, process it
62
+ if extracted_text:
63
+ text = "\n\n".join(extracted_text)
64
+ else:
65
+ # If no text was extracted, use OCR with improved settings
66
+ images = convert_from_bytes(pdf_bytes, dpi=300) # Higher DPI for better quality
67
+ for image in images:
68
+ # Configure tesseract for better Arabic text recognition
69
+ custom_config = r'--oem 1 --psm 3 -l ara+eng'
70
+ page_text = pytesseract.image_to_string(
71
+ image,
72
+ config=custom_config,
73
+ lang='ara+eng'
74
+ )
75
+ if page_text.strip():
76
+ extracted_text.append(page_text)
77
+
78
+ text = "\n\n".join(extracted_text)
79
+
80
+ # Clean up the text
81
+ text = self._clean_text(text)
82
+
83
+ # Handle Arabic text with improved reshaping
84
+ text = self._process_arabic_text(text)
85
+
86
+ except Exception as e:
87
+ raise Exception(f"Error processing PDF: {str(e)}")
88
+
89
+ return text
90
+
91
+ def _clean_text(self, text: str) -> str:
92
+ """Clean and normalize extracted text."""
93
+ # Remove control characters
94
+ text = "".join(char for char in text if char.isprintable() or char in "\n\r\t")
95
+
96
+ # Normalize whitespace
97
+ text = re.sub(r'\s+', ' ', text)
98
+ text = re.sub(r'\n\s*\n', '\n\n', text)
99
+
100
+ # Fix common OCR issues
101
+ text = re.sub(r'(?&lt;=[a-z])(?=[A-Z])', ' ', text)
102
+ text = re.sub(r'([.!?])\s*(?=[A-Z])', r'\1\n', text)
103
+
104
+ # Remove empty lines and extra whitespace
105
+ lines = [line.strip() for line in text.split('\n')]
106
+ text = '\n'.join(line for line in lines if line)
107
+
108
+ return text.strip()
109
+
110
+ def _process_arabic_text(self, text: str) -> str:
111
+ """Process Arabic text with improved handling."""
112
+ try:
113
+ # Configure arabic-reshaper for better text handling
114
+ configuration = {
115
+ 'delete_harakat': False,
116
+ 'support_ligatures': True,
117
+ 'RIAL SIGN': True
118
+ }
119
+
120
+ # Reshape Arabic text
121
+ reshaped_text = arabic_reshaper.reshape(text, configuration=configuration)
122
+
123
+ # Apply bidirectional algorithm
124
+ text = get_display(reshaped_text)
125
+
126
+ # Fix common Arabic text issues
127
+ text = re.sub(r'([ء-ي])\s+([ء-ي])', r'\1\2', text) # Remove spaces between Arabic letters
128
+ text = re.sub(r'[\u200B-\u200F\u202A-\u202E]', '', text) # Remove Unicode control characters
129
+
130
+ return text
131
+ except Exception as e:
132
+ print(f"Warning: Error in Arabic text processing: {str(e)}")
133
+ return text # Return original text if processing fails
134
+
135
+ def summarize_document(self, text: str) -> str:
136
+ """Generate a summary of the document with improved memory management."""
137
+ try:
138
+ # Split text into smaller chunks
139
+ chunks = self.text_splitter.split_text(text)
140
+ summaries = []
141
+
142
+ # Process chunks in batches to manage memory
143
+ batch_size = 3 # Process 3 chunks at a time
144
+ for i in range(0, len(chunks), batch_size):
145
+ # Clear GPU/MPS memory before processing new batch
146
+ if torch.cuda.is_available():
147
+ torch.cuda.empty_cache()
148
+ elif torch.backends.mps.is_available():
149
+ # Force garbage collection for MPS
150
+ import gc
151
+ gc.collect()
152
+
153
+ batch = chunks[i:i + batch_size]
154
+ for chunk in batch:
155
+ try:
156
+ # Generate summary with controlled length and parameters
157
+ summary = self.summarizer(
158
+ chunk,
159
+ max_length=130,
160
+ min_length=30,
161
+ do_sample=False,
162
+ num_beams=2, # Reduced beam search for memory efficiency
163
+ early_stopping=True
164
+ )
165
+ summaries.append(summary[0]['summary_text'])
166
+ except Exception as e:
167
+ print(f"Warning: Error summarizing chunk: {str(e)}")
168
+ # If summarization fails, include a portion of the original text
169
+ summaries.append(chunk[:200] + "...")
170
+
171
+ # Update progress
172
+ self.update_progress(
173
+ "جاري تلخيص المستند...",
174
+ min(0.3 + (i / len(chunks)) * 0.4, 0.7)
175
+ )
176
+
177
+ # Combine summaries intelligently
178
+ final_summary = " ".join(summaries)
179
+
180
+ # Clean up the final summary
181
+ final_summary = self._clean_text(final_summary)
182
+ final_summary = self._process_arabic_text(final_summary)
183
+
184
+ return final_summary
185
+
186
+ except Exception as e:
187
+ print(f"Error in summarization: {str(e)}")
188
+ # Fallback to a simple extractive summary
189
+ return self._create_extractive_summary(text)
190
+
191
+ def _create_extractive_summary(self, text: str, sentences_count: int = 5) -> str:
192
+ """Create a simple extractive summary as a fallback method."""
193
+ try:
194
+ # Split text into sentences
195
+ sentences = re.split(r'[.!?]\s+', text)
196
+
197
+ # Remove very short sentences and clean
198
+ sentences = [s.strip() for s in sentences if len(s.strip()) > 30]
199
+
200
+ if not sentences:
201
+ return text[:500] + "..." # Return truncated text if no good sentences
202
+
203
+ # Score sentences based on position and length
204
+ scored_sentences = []
205
+ for i, sentence in enumerate(sentences):
206
+ score = 0
207
+ # Prefer sentences from the beginning and end of the document
208
+ if i < len(sentences) * 0.3: # First 30%
209
+ score += 2
210
+ elif i > len(sentences) * 0.7: # Last 30%
211
+ score += 1
212
+
213
+ # Prefer medium-length sentences
214
+ if 50 <= len(sentence) <= 200:
215
+ score += 1
216
+
217
+ scored_sentences.append((score, sentence))
218
+
219
+ # Sort by score and select top sentences
220
+ scored_sentences.sort(reverse=True)
221
+ selected_sentences = [s[1] for s in scored_sentences[:sentences_count]]
222
+
223
+ # Sort sentences by their original order
224
+ selected_sentences.sort(key=lambda s: sentences.index(s))
225
+
226
+ # Join sentences and clean
227
+ summary = ". ".join(selected_sentences)
228
+ summary = self._clean_text(summary)
229
+ summary = self._process_arabic_text(summary)
230
+
231
+ return summary
232
+
233
+ except Exception as e:
234
+ print(f"Error in extractive summary: {str(e)}")
235
+ return text[:500] + "..." # Return truncated text as last resort
236
+
237
+ def analyze_legal_issues(self, text: str) -> Dict:
238
+ """Analyze legal issues in the document using the Judge agent."""
239
+ judge_agent = create_judge_agent()
240
+
241
+ task_description = f"""
242
+ تحليل المستند التالي وتحديد المخالفات القانونية المحتملة وفقاً للقوانين الإماراتية:
243
+ {text}
244
+
245
+ يجب أن يتضمن التحليل:
246
+ 1. المخالفات القانونية المحتملة
247
+ 2. المواد القانونية ذات الصلة
248
+ 3. التوصيات للتصحيح
249
+ """
250
+
251
+ task = Task(
252
+ description=task_description,
253
+ agent=judge_agent,
254
+ expected_output="تحليل قانوني شامل للمخالفات والتوصيات"
255
+ )
256
+
257
+ crew = Crew(agents=[judge_agent], tasks=[task])
258
+ result = crew.kickoff()
259
+ return {"legal_analysis": result}
260
+
261
+ def map_to_uae_legislation(self, text: str) -> Dict:
262
+ """Map document content to relevant UAE laws and regulations."""
263
+ advocate_agent = create_advocate_agent()
264
+
265
+ task_description = f"""
266
+ تحليل المستند التالي وربطه بالقوانين والتشريعات الإماراتية ذات الصلة:
267
+ {text}
268
+
269
+ يجب أن يتضمن التحليل:
270
+ 1. القوانين الإماراتية ذات الصلة
271
+ 2. المواد القانونية المحددة
272
+ 3. التفسير القانوني للعلاقة
273
+ """
274
+
275
+ task = Task(
276
+ description=task_description,
277
+ agent=advocate_agent,
278
+ expected_output="خريطة تفصيلية للقوانين والتشريعات ذات الصلة"
279
+ )
280
+
281
+ crew = Crew(agents=[advocate_agent], tasks=[task])
282
+ result = crew.kickoff()
283
+ return {"legislation_mapping": result}
284
+
285
+ def process_document(self, pdf_bytes: bytes) -> Dict:
286
+ """Process the document through all steps with progress tracking."""
287
+ try:
288
+ # Extract text from PDF
289
+ self.update_progress("استخراج النص من المستند...", 0.1)
290
+ text = self.extract_text_from_pdf(pdf_bytes)
291
+
292
+ if not text.strip():
293
+ raise ValueError("لم يتم العثور على نص قابل للقراءة في المستند")
294
+
295
+ # Generate summary
296
+ self.update_progress("إنشاء ملخص للمستند...", 0.3)
297
+ summary = self.summarize_document(text)
298
+
299
+ # Analyze legal issues
300
+ self.update_progress("تحليل القضايا القانونية...", 0.5)
301
+ legal_analysis = self.analyze_legal_issues(text)
302
+
303
+ # Map to UAE legislation
304
+ self.update_progress("ربط المستند بالتشريعات الإماراتية...", 0.7)
305
+ legislation_mapping = self.map_to_uae_legislation(text)
306
+
307
+ self.update_progress("اكتمل التحليل!", 1.0)
308
+
309
+ return {
310
+ "summary": summary,
311
+ "legal_analysis": legal_analysis["legal_analysis"],
312
+ "legislation_mapping": legislation_mapping["legislation_mapping"],
313
+ "raw_text": text # Include raw text for translation if needed
314
+ }
315
+
316
+ except Exception as e:
317
+ self.update_progress(f"حدث خطأ: {str(e)}", 0)
318
+ raise
requirements.txt ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Core app dependencies
2
+ streamlit>=1.20.0
3
+ fastapi>=0.100.0,<1.0.0
4
+ langchain>=0.94.0,<0.95.0
5
+ openai>=0.27.0
6
+ chromadb==0.4.24
7
+
8
+ # PDF and OCR processing
9
+ PyPDF2>=3.0.0
10
+ pytesseract
11
+ pdf2image
12
+ reportlab
13
+
14
+ # NLP and Transformers
15
+ transformers>=4.30.0,<5.0.0
16
+ torch>=2.0.0,<3.0.0
17
+ sentencepiece
18
+ sacremoses
19
+ langdetect
20
+
21
+ # Arabic text handling
22
+ arabic-reshaper
23
+ python-bidi
24
+ python-docx
25
+
26
+ # SQLite utilities
27
+ sqlite-utils
style.css ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /* Arabic font and RTL support */
2
+ @import url('https://fonts.googleapis.com/css2?family=Cairo:wght@400;700&display=swap');
3
+
4
+ .rtl-text {
5
+ direction: rtl;
6
+ text-align: right;
7
+ font-family: 'Cairo', sans-serif;
8
+ }
9
+
10
+ /* Custom styling for the app */
11
+ .stApp {
12
+ font-family: 'Cairo', sans-serif;
13
+ }
14
+
15
+ .stTextArea {
16
+ direction: rtl;
17
+ }
18
+
19
+ .stButton button {
20
+ font-family: 'Cairo', sans-serif;
21
+ direction: rtl;
22
+ }
23
+
24
+ .stTab {
25
+ font-family: 'Cairo', sans-serif;
26
+ }
27
+
28
+ /* Legal response formatting */
29
+ .legal-response {
30
+ background-color: #f8f9fa;
31
+ border-radius: 5px;
32
+ padding: 15px;
33
+ margin: 10px 0;
34
+ border-right: 4px solid #2e7d32;
35
+ }
36
+
37
+ .legal-reference {
38
+ color: #1976d2;
39
+ font-weight: bold;
40
+ }
41
+
42
+ /* Category badges */
43
+ .category-badge {
44
+ background-color: #e3f2fd;
45
+ color: #1976d2;
46
+ padding: 5px 10px;
47
+ border-radius: 15px;
48
+ font-size: 0.8em;
49
+ margin: 5px;
50
+ display: inline-block;
51
+ }
translator.py ADDED
@@ -0,0 +1,222 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import MarianMTModel, MarianTokenizer, pipeline
2
+ import torch
3
+ from langdetect import detect
4
+ import re
5
+
6
+ class Translator:
7
+ def __init__(self):
8
+ self.models = {}
9
+ self.tokenizers = {}
10
+ self.language_codes = {
11
+ 'arabic': 'ar',
12
+ 'english': 'en',
13
+ 'chinese': 'zh',
14
+ 'hindi': 'hi',
15
+ 'urdu': 'ur'
16
+ }
17
+
18
+ # Initialize models for each language pair
19
+ self._load_model('en', 'ar') # English to Arabic
20
+ self._load_model('ar', 'en') # Arabic to English
21
+ # Add other language pairs as needed
22
+
23
+ def _load_model(self, src_lang, tgt_lang):
24
+ """Load translation model for a specific language pair."""
25
+ model_name = f'Helsinki-NLP/opus-mt-{src_lang}-{tgt_lang}'
26
+ key = f'{src_lang}-{tgt_lang}'
27
+
28
+ if key not in self.models:
29
+ try:
30
+ self.tokenizers[key] = MarianTokenizer.from_pretrained(model_name)
31
+ self.models[key] = MarianMTModel.from_pretrained(model_name)
32
+ except Exception as e:
33
+ print(f"Error loading model for {key}: {str(e)}")
34
+
35
+ def translate(self, text: str, source_lang: str, target_lang: str) -> str:
36
+ """Translate text from source language to target language with improved handling."""
37
+ src_code = self.language_codes.get(source_lang.lower())
38
+ tgt_code = self.language_codes.get(target_lang.lower())
39
+
40
+ if not src_code or not tgt_code:
41
+ raise ValueError("Unsupported language")
42
+
43
+ key = f'{src_code}-{tgt_code}'
44
+
45
+ if key not in self.models:
46
+ self._load_model(src_code, tgt_code)
47
+
48
+ if key not in self.models:
49
+ raise ValueError(f"Translation model not available for {source_lang} to {target_lang}")
50
+
51
+ tokenizer = self.tokenizers[key]
52
+ model = self.models[key]
53
+
54
+ try:
55
+ # Preprocess text
56
+ text = self.preprocess_text(text)
57
+
58
+ # Split text into manageable chunks
59
+ chunks = self._split_text_into_chunks(text)
60
+ translated_chunks = []
61
+
62
+ for chunk in chunks:
63
+ # Clear GPU memory
64
+ if torch.cuda.is_available():
65
+ torch.cuda.empty_cache()
66
+
67
+ # Tokenize with improved settings
68
+ inputs = tokenizer(
69
+ chunk,
70
+ return_tensors="pt",
71
+ padding=True,
72
+ truncation=True,
73
+ max_length=512,
74
+ add_special_tokens=True
75
+ )
76
+
77
+ # Generate translation with improved settings
78
+ with torch.no_grad():
79
+ translated = model.generate(
80
+ **inputs,
81
+ num_beams=2, # Reduced for memory efficiency
82
+ length_penalty=0.6,
83
+ max_length=512,
84
+ min_length=0,
85
+ early_stopping=True
86
+ )
87
+
88
+ # Decode the translation
89
+ result = tokenizer.batch_decode(translated, skip_special_tokens=True)[0]
90
+ translated_chunks.append(result)
91
+
92
+ # Combine chunks
93
+ final_translation = ' '.join(translated_chunks)
94
+
95
+ # Post-process translation
96
+ final_translation = self._post_process_translation(final_translation, target_lang)
97
+
98
+ return final_translation
99
+
100
+ except Exception as e:
101
+ print(f"Translation error: {str(e)}")
102
+ return text # Return original text if translation fails
103
+
104
+ def detect_language(self, text: str) -> str:
105
+ """Detect the language of the input text."""
106
+ try:
107
+ # Clean text for better detection
108
+ cleaned_text = re.sub(r'[^\w\s]', '', text)
109
+ detected = detect(cleaned_text)
110
+
111
+ # Map detected language code to our supported languages
112
+ lang_code_map = {
113
+ 'ar': 'arabic',
114
+ 'en': 'english',
115
+ 'zh': 'chinese',
116
+ 'hi': 'hindi',
117
+ 'ur': 'urdu'
118
+ }
119
+
120
+ return lang_code_map.get(detected, 'english') # Default to English if unknown
121
+ except:
122
+ return 'english' # Default to English if detection fails
123
+
124
+ def preprocess_text(self, text: str) -> str:
125
+ """Preprocess text before translation."""
126
+ # Remove excessive whitespace
127
+ text = re.sub(r'\s+', ' ', text).strip()
128
+
129
+ # Remove special characters that might interfere with translation
130
+ text = re.sub(r'[^\w\s\.,!?-]', '', text)
131
+
132
+ return text
133
+
134
+ def get_supported_languages(self):
135
+ """Return list of supported languages."""
136
+ return list(self.language_codes.keys())
137
+
138
+ def _split_text_into_chunks(self, text: str, max_chunk_size: int = 450) -> list:
139
+ """Split text into manageable chunks for translation."""
140
+ # First try to split by paragraphs
141
+ paragraphs = text.split('\n\n')
142
+ chunks = []
143
+ current_chunk = []
144
+ current_length = 0
145
+
146
+ for para in paragraphs:
147
+ # If a single paragraph is too long, split it by sentences
148
+ if len(para) > max_chunk_size:
149
+ sentences = re.split(r'([.!?])\s+', para)
150
+ i = 0
151
+ while i < len(sentences):
152
+ sentence = sentences[i]
153
+ if i + 1 < len(sentences):
154
+ sentence += sentences[i + 1] # Add back the punctuation
155
+ i += 2
156
+ else:
157
+ i += 1
158
+
159
+ if current_length + len(sentence) > max_chunk_size:
160
+ if current_chunk:
161
+ chunks.append(' '.join(current_chunk))
162
+ current_chunk = []
163
+ current_length = 0
164
+
165
+ current_chunk.append(sentence)
166
+ current_length += len(sentence)
167
+ else:
168
+ if current_length + len(para) > max_chunk_size:
169
+ chunks.append(' '.join(current_chunk))
170
+ current_chunk = []
171
+ current_length = 0
172
+
173
+ current_chunk.append(para)
174
+ current_length += len(para)
175
+
176
+ if current_chunk:
177
+ chunks.append(' '.join(current_chunk))
178
+
179
+ return chunks
180
+
181
+ def _post_process_translation(self, text: str, target_lang: str) -> str:
182
+ """Post-process translated text based on target language."""
183
+ if target_lang.lower() in ['arabic', 'ar']:
184
+ # Fix Arabic-specific issues
185
+ text = re.sub(r'([ء-ي])\s+([ء-ي])', r'\1\2', text) # Remove spaces between Arabic letters
186
+ text = re.sub(r'[\u200B-\u200F\u202A-\u202E]', '', text) # Remove Unicode control characters
187
+
188
+ # Fix common Arabic punctuation issues
189
+ text = text.replace('،,', '،')
190
+ text = text.replace('.,', '.')
191
+ text = text.replace('؟?', '؟')
192
+ text = text.replace('!!', '!')
193
+
194
+ # Ensure proper spacing around numbers and Latin text
195
+ text = re.sub(r'([0-9])([ء-ي])', r'\1 \2', text)
196
+ text = re.sub(r'([ء-ي])([0-9])', r'\1 \2', text)
197
+ text = re.sub(r'([a-zA-Z])([ء-ي])', r'\1 \2', text)
198
+ text = re.sub(r'([ء-ي])([a-zA-Z])', r'\1 \2', text)
199
+
200
+ elif target_lang.lower() in ['english', 'en']:
201
+ # Fix English-specific issues
202
+ text = re.sub(r'\s+([.,!?])', r'\1', text) # Fix spacing before punctuation
203
+ text = re.sub(r'([.,!?])(?=[^\s])', r'\1 ', text) # Fix spacing after punctuation
204
+ text = re.sub(r'\s+', ' ', text) # Normalize spaces
205
+ text = text.replace(' ,', ',')
206
+ text = text.replace(' .', '.')
207
+
208
+ # Capitalize first letter of sentences
209
+ text = '. '.join(s.capitalize() for s in text.split('. '))
210
+
211
+ return text.strip()
212
+
213
+ def get_language_name(self, code: str) -> str:
214
+ """Get the display name for a language code."""
215
+ names = {
216
+ 'ar': 'العربية',
217
+ 'en': 'English',
218
+ 'zh': '中文',
219
+ 'hi': 'हिंदी',
220
+ 'ur': 'اردو'
221
+ }
222
+ return names.get(code, code)
utils.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List, Dict
2
+ import re
3
+ from langchain.tools import Tool
4
+ from config import UAE_LEGAL_DOMAINS
5
+
6
+ def is_arabic(text: str) -> bool:
7
+ """Check if the text contains Arabic characters."""
8
+ arabic_pattern = re.compile('[\u0600-\u06FF\u0750-\u077F\u08A0-\u08FF]+')
9
+ return bool(arabic_pattern.search(text))
10
+
11
+ def create_uae_legal_tools() -> List[Tool]:
12
+ """Create tools for UAE legal research."""
13
+ tools = [
14
+ Tool(
15
+ name="UAE Legal Database Search",
16
+ func=lambda q: search_uae_legal_database(q),
17
+ description="Search UAE legal databases for laws, regulations, and precedents"
18
+ ),
19
+ Tool(
20
+ name="Arabic Legal Term Translation",
21
+ func=lambda q: translate_legal_term(q),
22
+ description="Translate legal terms between Arabic and English"
23
+ ),
24
+ Tool(
25
+ name="UAE Case Law Search",
26
+ func=lambda q: search_uae_case_law(q),
27
+ description="Search UAE case law and legal precedents"
28
+ )
29
+ ]
30
+ return tools
31
+
32
+ def search_uae_legal_database(query: str) -> str:
33
+ """Simulate searching UAE legal databases."""
34
+ # In a real implementation, this would connect to actual UAE legal databases
35
+ return f"Found relevant UAE legal information for: {query}"
36
+
37
+ def translate_legal_term(term: str) -> str:
38
+ """Simulate legal term translation."""
39
+ # In a real implementation, this would use a legal terms dictionary
40
+ return f"Translation for: {term}"
41
+
42
+ def search_uae_case_law(query: str) -> str:
43
+ """Simulate searching UAE case law."""
44
+ # In a real implementation, this would search actual UAE case law databases
45
+ return f"Found relevant UAE case law for: {query}"
46
+
47
+ def format_legal_response(response: str, language: str = 'ar') -> str:
48
+ """Format legal responses with proper styling and language direction."""
49
+ if language == 'ar':
50
+ return f'<div dir="rtl">{response}</div>'
51
+ return response