Spaces:
Restarting
on
T4
Restarting
on
T4
Update ppt_chunker.py
Browse files- ppt_chunker.py +19 -12
ppt_chunker.py
CHANGED
@@ -165,6 +165,25 @@ def ppt_chunk(file_like, nlp):
|
|
165 |
raise NotImplementedError(
|
166 |
"file type not supported yet(pptx)")
|
167 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
168 |
def ppt_chunker(file_like, llm):
|
169 |
import time
|
170 |
|
@@ -182,18 +201,6 @@ def ppt_chunker(file_like, llm):
|
|
182 |
current_chunk = ''
|
183 |
continue
|
184 |
|
185 |
-
if elem.category == 'Table':
|
186 |
-
output = llm.chat(
|
187 |
-
build_prompt_conv(),
|
188 |
-
SamplingParams(temperature=0.6,top_p=0.9, max_tokens=10, top_k=10)
|
189 |
-
)
|
190 |
-
print(f'OUTPUT : {output[0].outputs[0].text}')
|
191 |
-
|
192 |
-
if current_chunk == '':
|
193 |
-
current_chunk = output[0].outputs[0].text.replace('"', '')
|
194 |
-
else:
|
195 |
-
current_chunk += '\n' + output[0].outputs[0].text.replace('"', '')
|
196 |
-
|
197 |
if current_chunk == '':
|
198 |
current_chunk = clean(elem.text, extra_whitespace=True, dashes=True, bullets=True, lowercase=True, trailing_punctuation=True)
|
199 |
else:
|
|
|
165 |
raise NotImplementedError(
|
166 |
"file type not supported yet(pptx)")
|
167 |
|
168 |
+
def build_prompt_conv():
|
169 |
+
return [
|
170 |
+
{
|
171 |
+
'role': 'system',
|
172 |
+
'content': """Assume the role of an innovator who thrives on creativity and resourcefulness. Your responses should encourage new approaches and challenge conventional thinking.
|
173 |
+
|
174 |
+
Behavior: Focus on brainstorming and ideation, offering unconventional solutions to problems.
|
175 |
+
|
176 |
+
Mannerisms: Use energetic, enthusiastic language that reflects your innovative spirit. Frequently propose ideas that are bold and forward-looking."""
|
177 |
+
},
|
178 |
+
{
|
179 |
+
'role': 'user',
|
180 |
+
'content': f"""Generate a short, single-sentence summary of the user's intent or topic based on their question, capturing the main focus of what they want to discuss.
|
181 |
+
|
182 |
+
Question : {st.session_state.user_input}
|
183 |
+
"""
|
184 |
+
}
|
185 |
+
]
|
186 |
+
|
187 |
def ppt_chunker(file_like, llm):
|
188 |
import time
|
189 |
|
|
|
201 |
current_chunk = ''
|
202 |
continue
|
203 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
204 |
if current_chunk == '':
|
205 |
current_chunk = clean(elem.text, extra_whitespace=True, dashes=True, bullets=True, lowercase=True, trailing_punctuation=True)
|
206 |
else:
|