Update README.md
Browse files
README.md
CHANGED
@@ -59,3 +59,25 @@ Don't make up value not in the Input. Output must be a well-formed JSON object.`
|
|
59 |
}
|
60 |
}
|
61 |
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
59 |
}
|
60 |
}
|
61 |
```
|
62 |
+
|
63 |
+
# Load model directly
|
64 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM
|
65 |
+
|
66 |
+
tokenizer = AutoTokenizer.from_pretrained("mychen76/mistral7b_ocr_to_json_v1")
|
67 |
+
model = AutoModelForCausalLM.from_pretrained("mychen76/mistral7b_ocr_to_json_v1")
|
68 |
+
|
69 |
+
prompt=f"""### Instruction:
|
70 |
+
You are POS receipt data expert, parse, detect, recognize and convert following receipt OCR image result into structure receipt data object.
|
71 |
+
Don't make up value not in the Input. Output must be a well-formed JSON object.```json
|
72 |
+
|
73 |
+
### Input:
|
74 |
+
{receipt_boxes}
|
75 |
+
|
76 |
+
### Output:
|
77 |
+
"""
|
78 |
+
with torch.inference_mode():
|
79 |
+
inputs = tokenizer(prompt,return_tensors="pt",truncation=True).to(device)
|
80 |
+
outputs = model.generate(**inputs, max_new_tokens=512)
|
81 |
+
result_text = tokenizer.batch_decode(outputs)[0]
|
82 |
+
print(result_text)
|
83 |
+
|