Tonic commited on
Commit
5d6783e
·
verified ·
1 Parent(s): 58755ce

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -134
app.py CHANGED
@@ -3,39 +3,26 @@ import re
3
  import gradio as gr
4
  from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig
5
  import torch
 
6
 
7
- title = """# 🙋🏻‍♂️Welcome to🌟Tonic's🔮DeepSeekMath📉
8
- You can build with this endpoint using🔮DeepSeekMath📉 available here : [deepseek-ai/deepseek-math-7b-instruct](https://huggingface.co/deepseek-ai/deepseek-math-7b-instruct). We're using 🤖[introspector/unimath](https://huggingface.co/datasets/introspector/unimath) for cool examples, check it out below ! The demo is still a work in progress and we're looking forward to build downstream tasks that showcase outstanding mathematical reasoning. Have any ideas ? join us below !
9
- You can also use 🔮DeepSeekMath📉 by cloning this space. Simply click here: <a style="display:inline-block" href="https://huggingface.co/spaces/Tonic/Math?duplicate=true"><img src="https://img.shields.io/badge/-Duplicate%20Space-blue?labelColor=white&style=flat&logo=&logoWidth=14" alt="Duplicate Space"></a></h3>
10
- Join us : 🌟TeamTonic🌟 is always making cool demos! Join our active builder's 🛠️community 👻 [![Join us on Discord](https://img.shields.io/discord/1109943800132010065?label=Discord&logo=discord&style=flat-square)](https://discord.gg/GWpVpekp) On 🤗Huggingface: [TeamTonic](https://huggingface.co/TeamTonic) & [MultiTransformer](https://huggingface.co/MultiTransformer) Math with [introspector](https://huggingface.co/introspector) On 🌐Github: [Tonic-AI](https://github.com/tonic-ai) & contribute to🌟 [SciTonic](https://github.com/Tonic-AI/scitonic)🤗Big thanks to Yuvi Sharma and all the folks at huggingface for the community grant 🤗
11
- """
12
-
13
-
14
- LEAN4_DEFAULT_HEADER = """import Mathlib
15
- import Aesop
16
-
17
- set_option maxHeartbeats 0
18
-
19
- open BigOperators Real Nat Topology Rat
20
-
21
- """
22
-
23
-
24
- additional_info_prompt = "Explain the above using mathematics, print entire answer in latex format:"
25
 
 
 
26
 
27
- def format_prompt(question, additional_info):
28
- return f"""Complete the following Lean 4 code with explanatory comments preceding each line of code:
29
-
30
- ```lean4
31
- {LEAN4_DEFAULT_HEADER}
32
- {question}
33
- {additional_info}
34
- """
35
 
 
 
 
 
 
 
36
 
 
37
  unimath1 = """Goal:
38
-
39
  X : UU
40
  Y : UU
41
  P : UU
@@ -44,95 +31,24 @@ unimath1 = """Goal:
44
  X0 : X × Y → P
45
  x : X
46
  ============================
47
- (Y → P)
48
-
49
-
50
- DEBUG:Going to execute:
51
- PTRDEBUGTAC<coq-core.plugins.ltac::intro@1> $1
52
- DEBUG LTAC Evaluated term: yp
53
-
54
- TcDebug (0) > NONE??? LtacNotationCall (no location),/mnt/data1/2024/01/05/UniMath/UniMath/Foundations/PartA.v:234
55
- """
56
- # source : unimath/unimath/batch2/data08
57
 
58
  unimath2 = """Goal:
59
  R : ring M : module R
60
  ============================
61
- (islinear (idfun M))
62
-
63
-
64
- DEBUG:Going to execute:
65
- PTRDEBUGTACapply pathsinv0; trivial
66
- Level 0: Backtrace:
67
-
68
- Proof is not complete.
69
- Level 0: Backtrace:
70
-
71
- Proof is not complete.
72
-
73
- TcDebug (0) > NONE??? LtacNotationCall (no location),/mnt/data1/2024/01/05/UniMath/UniMath/MoreFoundations/Tactics.veasy:19, Tactics (UniMath.MoreFoundations),/mnt/data1/2024/01/05/UniMath/UniMath/Algebra/Modules/Examples.v:27
74
-
75
- """
76
- # source : unimath/unimath/batch2/data_22/BATCH122007
77
 
78
  unimath3 = """Goal:
79
  X : UU i : nat b : hProptoType (i < S i) x : Vector X (S i) r : i = i
80
  ============================
81
- (pr1 lastelement = pr1 (i,, b))
82
-
83
-
84
- DEBUG:Going to execute:
85
- PTRDEBUGTACsimpl
86
- DEBUG LTAC Evaluated term: isinjstntonat
87
-
88
- TcDebug (0) > /mnt/data1/2024/01/05/UniMath/UniMath/Combinatorics/FiniteSequences.v:114
89
- """
90
- # source : unimath/unimath/batch2/data_12/BATCH112026
91
 
92
  unimath4 = """Goal:
93
  X : dcpo CX : continuous_dcpo_struct X x : pr1hSet X y : pr1hSet X
94
  ============================
95
- (x ⊑ y
96
- ≃ (∀ i : approximating_family CX x, approximating_family CX x i ⊑ y))
97
-
98
-
99
- DEBUG:Going to execute:
100
- PTRDEBUGTACsimple refine (p _ _ _) ||
101
- simple refine (p _ _ _ _) ||
102
- simple refine (p _ _ _ _ _) ||
103
- simple refine (p _ _ _ _ _ _) ||
104
- simple refine (p _ _ _ _ _ _ _) ||
105
- simple refine (p _ _ _ _ _ _ _ _) ||
106
- simple refine (p _ _ _ _ _ _ _ _ _) ||
107
- simple refine (p _ _ _ _ _ _ _ _ _ _) ||
108
- simple refine (p _ _ _ _ _ _ _ _ _ _ _) ||
109
- simple refine (p _ _ _ _ _ _ _ _ _ _ _ _) ||
110
- simple refine (p _ _ _ _ _ _ _ _ _ _ _ _ _) ||
111
- simple refine
112
- (p _ _ _ _ _ _ _ _ _ _ _ _ _ _) || simple
113
- refine (p _ _ _ _ _ _ _ _ _ _ _ _ _ _ _)
114
- Level 0: Backtrace:
115
-
116
- In environment
117
- X : dcpo
118
- CX : continuous_dcpo_struct X
119
- x, y : X
120
- The term "weqimplimpl ?f ?g" has type "isaprop ?X → isaprop ?Y → ?X ≃ ?Y"
121
- while it is expected to have type
122
- "x ⊑ y ≃ (∀ i : approximating_family CX x, approximating_family CX x i ⊑ y)".
123
- Level 0: Backtrace:
124
-
125
- In environment
126
- X : dcpo
127
- CX : continuous_dcpo_struct X
128
- x, y : X
129
- The term "weqimplimpl ?f ?g" has type "isaprop ?X → isaprop ?Y → ?X ≃ ?Y"
130
- while it is expected to have type
131
- "x ⊑ y ≃ (∀ i : approximating_family CX x, approximating_family CX x i ⊑ y)".
132
-
133
- TcDebug (0) > NONE??? LtacNotationCall (no location),/mnt/data1/2024/01/05/UniMath/UniMath/Foundations/Init.vsimple_rapply:174, Init (UniMath.Foundations),/mnt/data1/2024/01/05/UniMath/UniMath/Foundations/Init.v??? LtacNotationCall:189, Init (UniMath.Foundations),/mnt/data1/2024/01/05/UniMath/UniMath/OrderTheory/DCPOs/Basis/Continuous.v:166
134
- """
135
- # source : unimath/unimath/batch2/data_42/BATCH142042
136
 
137
  examples = [
138
  [unimath1, additional_info_prompt, 1200],
@@ -144,8 +60,6 @@ examples = [
144
  model_name = "Goedel-LM/Goedel-Prover-SFT"
145
  tokenizer = AutoTokenizer.from_pretrained(model_name)
146
  model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16, device_map="auto")
147
- # model.generation_config = GenerationConfig.from_pretrained(model_name)
148
- # model.generation_config.pad_token_id = model.generation_config.eos_token_id
149
 
150
  # Set generation config
151
  model.generation_config = GenerationConfig.from_pretrained(model_name)
@@ -153,29 +67,33 @@ model.generation_config.pad_token_id = model.generation_config.eos_token_id
153
  model.generation_config.bos_token_id = 100000
154
  model.generation_config.eos_token_id = 100001
155
 
156
-
157
- def parse_full_answer(answer):
158
- """Parses the assistant's answer, excluding any text before 'Assistant :'."""
159
- match = re.search(r"Assistant\s*:\s*(.*)", answer, re.DOTALL)
160
- return match.group(1).strip() if match else "No assistant answer found."
161
-
162
- def parse_final_answer(answer):
163
- """Extracts the final answer enclosed within \boxed{}."""
164
- match = re.search(r"\\boxed\{([^}]+)\}", answer)
165
- return match.group(1) if match else "No final answer found."
166
-
167
  @spaces.GPU
168
- def solve_math_problem(question, additional_info, max_tokens):
169
  # Format the prompt using Lean4 structure
170
- prompt = format_prompt(question, additional_info)
 
171
  input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(model.device)
172
- outputs = model.generate(input_ids, max_length=max_tokens + input_ids.shape[1], pad_token_id=model.generation_config.pad_token_id)
 
 
 
 
 
 
 
173
  result = tokenizer.decode(outputs[0], skip_special_tokens=True)
174
- full_answer = parse_full_answer(result)
175
- final_answer = parse_final_answer(result)
176
- return full_answer, final_answer
177
-
178
-
 
 
 
 
 
 
 
179
 
180
  def main():
181
  iface = gr.Interface(
@@ -184,18 +102,15 @@ def main():
184
  You can also use 🔮Goedel Prover📉 by cloning this space. Simply click here: <a style="display:inline-block" href="https://huggingface.co/spaces/Tonic/Math?duplicate=true"><img src="https://img.shields.io/badge/-Duplicate%20Space-blue?labelColor=white&style=flat&logo=&logoWidth=14" alt="Duplicate Space"></a></h3>
185
  Join us : 🌟TeamTonic🌟 is always making cool demos! Join our active builder's 🛠️community 👻 [![Join us on Discord](https://img.shields.io/discord/1109943800132010065?label=Discord&logo=discord&style=flat-square)](https://discord.gg/GWpVpekp) On 🤗Huggingface: [TeamTonic](https://huggingface.co/TeamTonic) & [MultiTransformer](https://huggingface.co/MultiTransformer) Math with [introspector](https://huggingface.co/introspector) On 🌐Github: [Tonic-AI](https://github.com/tonic-ai) & contribute to🌟 [SciTonic](https://github.com/Tonic-AI/scitonic)🤗Big thanks to Yuvi Sharma and all the folks at huggingface for the community grant 🤗
186
  """,
187
-
188
-
189
-
190
  fn=solve_math_problem,
191
  outputs=[
192
- gr.Code(label="🔮TonicsMathAssistant📉", interactive=False),
193
- gr.Textbox(label="Final Answer")
194
  ],
195
  inputs=[
196
- gr.Textbox(label="🤔Enter your math problem", lines=7),
197
- gr.Textbox(value=additional_info_prompt, label="🪜Optional train-of-thought"),
198
- gr.Slider(minimum=150, maximum=1200, value=650, label="🪙Max Tokens")
199
  ],
200
  examples=examples
201
  )
@@ -203,4 +118,4 @@ Join us : 🌟TeamTonic🌟 is always making cool demos! Join our active builder
203
  iface.launch()
204
 
205
  if __name__ == "__main__":
206
- main()
 
3
  import gradio as gr
4
  from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig
5
  import torch
6
+ import json
7
 
8
+ LEAN4_DEFAULT_HEADER = "import Mathlib\nimport Aesop\n\nset_option maxHeartbeats 0\n\nopen BigOperators Real Nat Topology Rat\n\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
+ title = """# 🙋🏻‍♂️Welcome to🌟Tonic's🔮Goedel Prover📉
11
+ You can build with this endpoint using🔮Goedel-Prover-SFT📉 available here : [Goedel-LM/Goedel-Prover-SFT](https://huggingface.co/Goedel-LM/Goedel-Prover-SFT)."""
12
 
13
+ def format_prompt(formal_statement, informal_prefix=""):
14
+ """Format the input according to the Lean4 structure"""
15
+ return f"Complete the following Lean 4 code with explanatory comments preceding each line of code:\n\n```lean4\n{LEAN4_DEFAULT_HEADER}{informal_prefix}{formal_statement}"
 
 
 
 
 
16
 
17
+ def extract_code(response):
18
+ """Extract code between lean4 code blocks"""
19
+ try:
20
+ return re.search(r'```lean4\n(.*?)\n```', response, re.DOTALL).group(1)
21
+ except:
22
+ return "None"
23
 
24
+ # Example problems
25
  unimath1 = """Goal:
 
26
  X : UU
27
  Y : UU
28
  P : UU
 
31
  X0 : X × Y → P
32
  x : X
33
  ============================
34
+ (Y → P)"""
 
 
 
 
 
 
 
 
 
35
 
36
  unimath2 = """Goal:
37
  R : ring M : module R
38
  ============================
39
+ (islinear (idfun M))"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
 
41
  unimath3 = """Goal:
42
  X : UU i : nat b : hProptoType (i < S i) x : Vector X (S i) r : i = i
43
  ============================
44
+ (pr1 lastelement = pr1 (i,, b))"""
 
 
 
 
 
 
 
 
 
45
 
46
  unimath4 = """Goal:
47
  X : dcpo CX : continuous_dcpo_struct X x : pr1hSet X y : pr1hSet X
48
  ============================
49
+ (x ⊑ y ≃ (∀ i : approximating_family CX x, approximating_family CX x i ⊑ y))"""
50
+
51
+ additional_info_prompt = "/-Explain using mathematics-/"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
 
53
  examples = [
54
  [unimath1, additional_info_prompt, 1200],
 
60
  model_name = "Goedel-LM/Goedel-Prover-SFT"
61
  tokenizer = AutoTokenizer.from_pretrained(model_name)
62
  model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16, device_map="auto")
 
 
63
 
64
  # Set generation config
65
  model.generation_config = GenerationConfig.from_pretrained(model_name)
 
67
  model.generation_config.bos_token_id = 100000
68
  model.generation_config.eos_token_id = 100001
69
 
 
 
 
 
 
 
 
 
 
 
 
70
  @spaces.GPU
71
+ def solve_math_problem(question, informal_prefix, max_tokens):
72
  # Format the prompt using Lean4 structure
73
+ prompt = format_prompt(question, informal_prefix)
74
+
75
  input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(model.device)
76
+ outputs = model.generate(
77
+ input_ids,
78
+ max_length=max_tokens + input_ids.shape[1],
79
+ pad_token_id=model.generation_config.pad_token_id,
80
+ temperature=1.0,
81
+ top_p=0.95,
82
+ )
83
+
84
  result = tokenizer.decode(outputs[0], skip_special_tokens=True)
85
+
86
+ # Extract the full code from the response
87
+ full_code = extract_code(prompt + result)
88
+
89
+ # Create output dictionary similar to reference code
90
+ output_data = {
91
+ "model_input": prompt,
92
+ "model_output": result,
93
+ "full_code": full_code
94
+ }
95
+
96
+ return json.dumps(output_data, indent=2), full_code
97
 
98
  def main():
99
  iface = gr.Interface(
 
102
  You can also use 🔮Goedel Prover📉 by cloning this space. Simply click here: <a style="display:inline-block" href="https://huggingface.co/spaces/Tonic/Math?duplicate=true"><img src="https://img.shields.io/badge/-Duplicate%20Space-blue?labelColor=white&style=flat&logo=&logoWidth=14" alt="Duplicate Space"></a></h3>
103
  Join us : 🌟TeamTonic🌟 is always making cool demos! Join our active builder's 🛠️community 👻 [![Join us on Discord](https://img.shields.io/discord/1109943800132010065?label=Discord&logo=discord&style=flat-square)](https://discord.gg/GWpVpekp) On 🤗Huggingface: [TeamTonic](https://huggingface.co/TeamTonic) & [MultiTransformer](https://huggingface.co/MultiTransformer) Math with [introspector](https://huggingface.co/introspector) On 🌐Github: [Tonic-AI](https://github.com/tonic-ai) & contribute to🌟 [SciTonic](https://github.com/Tonic-AI/scitonic)🤗Big thanks to Yuvi Sharma and all the folks at huggingface for the community grant 🤗
104
  """,
 
 
 
105
  fn=solve_math_problem,
106
  outputs=[
107
+ gr.JSON(label="Full Output"),
108
+ gr.Code(label="Extracted Lean4 Code", language="lean")
109
  ],
110
  inputs=[
111
+ gr.Textbox(label="🤔Enter your Lean4 formal statement", lines=7),
112
+ gr.Textbox(value=additional_info_prompt, label="🪜Optional informal prefix"),
113
+ gr.Slider(minimum=150, maximum=2048, value=650, label="🪙Max Tokens")
114
  ],
115
  examples=examples
116
  )
 
118
  iface.launch()
119
 
120
  if __name__ == "__main__":
121
+ main()