linyongver commited on
Commit
2302175
·
verified ·
1 Parent(s): e9e28ee

Create README.md

Browse files
Files changed (1) hide show
  1. README.md +60 -0
README.md ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: mit
3
+ ---
4
+
5
+ This is the formalizer for tranlating infromal math problem into formal statement in Lean 4. We use the data pair annotated by Claude Sonnet 3.5 to train a Qwen-2.5-32B-Coder.
6
+
7
+ Here is an example code to use this formalizer,
8
+ ```
9
+ import re
10
+ from transformers import AutoTokenizer
11
+ from vllm import LLM, SamplingParams
12
+ import os
13
+ import json
14
+
15
+ def statement_translation_inference(informal_statement):
16
+ return F"""
17
+ I want you to translate a informal statment to formal statement in lean 4, the informal statement of the problem is:
18
+
19
+ {informal_statement}
20
+
21
+ The output is
22
+ """
23
+
24
+ model_name = "Goedel-LM/Goedel-Formalizer-32B-SonnetAnnotated"
25
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
26
+ model = LLM(model=model_name, seed=1, trust_remote_code=True, swap_space=8, tensor_parallel_size=2, max_model_len=4096)
27
+
28
+
29
+ sampling_params = SamplingParams(
30
+ temperature=1.0,
31
+ max_tokens=2048,
32
+ top_p=0.95,
33
+ n=args.n,
34
+ )
35
+
36
+ data_list = [{
37
+ "informal_statement": "Consider the terms of an arithmetic sequence: $-\frac{1}{3}, y+2, 4y, \ldots$. Solve for $y$."
38
+ }]
39
+
40
+ model_inputs = [statement_translation_inference(idata["informal_statement"], idata["informal_proof"]) for idata in data_list]
41
+
42
+ model_outputs = model.generate(
43
+ model_inputs,
44
+ sampling_params,
45
+ use_tqdm=True,
46
+ )
47
+ ```
48
+
49
+ ## Citation
50
+ ```latex
51
+ @misc{lin2025goedelproverfrontiermodelopensource,
52
+ title={Goedel-Prover: A Frontier Model for Open-Source Automated Theorem Proving},
53
+ author={Yong Lin and Shange Tang and Bohan Lyu and Jiayun Wu and Hongzhou Lin and Kaiyu Yang and Jia Li and Mengzhou Xia and Danqi Chen and Sanjeev Arora and Chi Jin},
54
+ year={2025},
55
+ eprint={2502.07640},
56
+ archivePrefix={arXiv},
57
+ primaryClass={cs.LG},
58
+ url={https://arxiv.org/abs/2502.07640},
59
+ }
60
+ ```