XINZHANG-Geotab commited on
Commit
364f00c
·
verified ·
1 Parent(s): 4731e23

Upload 3 files

Browse files
Files changed (3) hide show
  1. utils/__init__.py +11 -3
  2. utils/refine_summary.py +9 -59
  3. utils/templates.py +39 -0
utils/__init__.py CHANGED
@@ -1,6 +1,14 @@
1
  from .refine_summary import RefineDataSummarizer
2
-
 
 
 
 
 
3
  __all__ = [
4
-
5
- "RefineDataSummarizer"
 
 
 
6
  ]
 
1
  from .refine_summary import RefineDataSummarizer
2
+ from .templates import (
3
+ prompt_template,
4
+ refine_template,
5
+ prompt_template_bullet_point,
6
+ refine_prompt_template_bullet_point
7
+ )
8
  __all__ = [
9
+ "RefineDataSummarizer",
10
+ "prompt_template",
11
+ "refine_template",
12
+ "prompt_template_bullet_point",
13
+ "refine_prompt_template_bullet_point"
14
  ]
utils/refine_summary.py CHANGED
@@ -24,72 +24,22 @@ class RefineDataSummarizer:
24
 
25
  def __init__(
26
  self,
27
- llm: BaseChatModel
 
 
 
28
  ):
29
  """Initialize the data summarizer."""
30
  self.llm = llm
31
  self.llm_model = self.llm.model_name
32
- prompt_template_bullet_point = (
33
- "Write a summary of the following text.\n"
34
- "TEXT: {text}\n"
35
- "SUMMARY:\n"
36
- )
37
-
38
- prompt_bullet_point = PromptTemplate(
39
- template=prompt_template_bullet_point, input_variables=["text"]
40
- )
41
-
42
- refine_prompt_template_bullet_point = (
43
- "Write a concise summary of the following text delimited by triple backquotes.\n"
44
- "Return your response in bullet points which covers the key points of the text.\n"
45
- " ```{text}```\n"
46
- "BULLET POINT SUMMARY:\n"
47
- )
48
-
49
- refine_prompt_bullet_point = PromptTemplate(
50
- template=refine_prompt_template_bullet_point, input_variables=["text"]
51
- )
52
-
53
- prompt_template = (
54
- "Write a concise summary of the following:\n"
55
- "{text}\n"
56
- "CONCISE SUMMARY:\n"
57
- )
58
-
59
- prompt = PromptTemplate.from_template(prompt_template)
60
-
61
- refine_template = (
62
- "Your job is to produce a final summary\n"
63
- "We have provided an existing summary up to a certain point: {existing_answer}\n"
64
- "We have the opportunity to refine the existing summary"
65
- "(only if needed) with some more context below.\n"
66
- "------------\n"
67
- "{text}\n"
68
- "------------\n"
69
- "Given the new context, refine the original summary.\n"
70
- "If the context isn't useful, return the original summary."
71
- )
72
- refine_prompt = PromptTemplate.from_template(refine_template)
73
-
74
- self.prompt = prompt
75
- self.refine_prompt = refine_prompt
76
-
77
- self.prompt_bullet_point = prompt_bullet_point
78
- self.refine_prompt_bullet_point = refine_prompt_bullet_point
79
 
80
  def get_summarization(self,
81
  text: str,
82
  chunk_num: int = 5,
83
- chunk_overlap: int = 30,
84
- bullet_point: bool = True) -> Dict:
85
  """Get Summarization."""
86
- if bullet_point:
87
- prompt = self.prompt_bullet_point
88
- refine_prompt = self.refine_prompt_bullet_point
89
- else:
90
- prompt = self.prompt
91
- refine_prompt = self.refine_prompt
92
-
93
  text_splitter = TokenTextSplitter(
94
  chunk_size=self.token_limit[self.llm_model] // chunk_num,
95
  chunk_overlap=chunk_overlap,
@@ -98,8 +48,8 @@ class RefineDataSummarizer:
98
  chain = load_summarize_chain(
99
  llm=self.llm,
100
  chain_type="refine",
101
- question_prompt=prompt,
102
- refine_prompt=refine_prompt,
103
  return_intermediate_steps=True,
104
  input_key="input_documents",
105
  output_key="output_text",
 
24
 
25
  def __init__(
26
  self,
27
+ llm: BaseChatModel,
28
+ prompt_template: str,
29
+ refine_template: str,
30
+
31
  ):
32
  """Initialize the data summarizer."""
33
  self.llm = llm
34
  self.llm_model = self.llm.model_name
35
+ self.prompt = PromptTemplate.from_template(prompt_template.strip())
36
+ self.refine_prompt = PromptTemplate.from_template(refine_template.strip())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
 
38
  def get_summarization(self,
39
  text: str,
40
  chunk_num: int = 5,
41
+ chunk_overlap: int = 30) -> Dict:
 
42
  """Get Summarization."""
 
 
 
 
 
 
 
43
  text_splitter = TokenTextSplitter(
44
  chunk_size=self.token_limit[self.llm_model] // chunk_num,
45
  chunk_overlap=chunk_overlap,
 
48
  chain = load_summarize_chain(
49
  llm=self.llm,
50
  chain_type="refine",
51
+ question_prompt=self.prompt,
52
+ refine_prompt=self.refine_prompt,
53
  return_intermediate_steps=True,
54
  input_key="input_documents",
55
  output_key="output_text",
utils/templates.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # regular template
2
+ prompt_template = (
3
+ "Write a concise summary of the following:\n"
4
+ "{text}\n"
5
+ "CONCISE SUMMARY:\n"
6
+ )
7
+
8
+
9
+ refine_template = (
10
+ "Your job is to produce a final summary\n"
11
+ "We have provided an existing summary up to a certain point: {existing_answer}\n"
12
+ "We have the opportunity to refine the existing summary"
13
+ "(only if needed) with some more context below.\n"
14
+ "------------\n"
15
+ "{text}\n"
16
+ "------------\n"
17
+ "Given the new context, refine the original summary.\n"
18
+ "If the context isn't useful, return the original summary."
19
+ )
20
+
21
+
22
+ # bullet points template
23
+ prompt_template_bullet_point = (
24
+ "Write a concise summary of the following in bullet points:\n"
25
+ "{text}\n"
26
+ "BULLET POINTS SUMMARY:\n"
27
+ )
28
+
29
+ refine_prompt_template_bullet_point = (
30
+ "Your job is to produce a final summary in bullet points\n"
31
+ "We have provided an existing bullet points summary up to a certain point: {existing_answer}\n"
32
+ "We have the opportunity to refine the existing bullet points"
33
+ "(only if needed) with some more context below.\n"
34
+ "------------\n"
35
+ "{text}\n"
36
+ "------------\n"
37
+ "Given the new context, refine the original bullet points summary.\n"
38
+ "If the context isn't useful, return the original bullet points."
39
+ )