Update README.md
Browse files
README.md
CHANGED
@@ -536,4 +536,83 @@ for y in result:
|
|
536 |
# {"store_name": "Trader Joe's"}
|
537 |
# {"names": ["John", "Mary", "James"]}
|
538 |
# {"names": ["JOHN", "MARY", "JAMES"], "female_names": ["MARY"]}
|
539 |
-
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
536 |
# {"store_name": "Trader Joe's"}
|
537 |
# {"names": ["John", "Mary", "James"]}
|
538 |
# {"names": ["JOHN", "MARY", "JAMES"], "female_names": ["MARY"]}
|
539 |
+
```
|
540 |
+
|
541 |
+
## Template Generation
|
542 |
+
If you want to convert existing schema files you have in other formats (e.g. XML, YAML, etc.) or start from an example, NuExtract 2 models can automatically generate this for you.
|
543 |
+
|
544 |
+
E.g. convert XML into a NuExtract template:
|
545 |
+
```python
|
546 |
+
def generate_template(description):
|
547 |
+
input_messages = [description]
|
548 |
+
input_content = prepare_inputs(
|
549 |
+
messages=input_messages,
|
550 |
+
image_paths=[],
|
551 |
+
tokenizer=tokenizer,
|
552 |
+
)
|
553 |
+
|
554 |
+
generation_config = {"do_sample": True, "temperature": 0.4, "max_new_tokens": 256}
|
555 |
+
|
556 |
+
with torch.no_grad():
|
557 |
+
result = nuextract_generate(
|
558 |
+
model=model,
|
559 |
+
tokenizer=tokenizer,
|
560 |
+
prompts=input_content['prompts'],
|
561 |
+
pixel_values_list=input_content['pixel_values_list'],
|
562 |
+
num_patches_list=input_content['num_patches_list'],
|
563 |
+
generation_config=generation_config
|
564 |
+
)
|
565 |
+
return result[0]
|
566 |
+
|
567 |
+
xml_template = """<SportResult>
|
568 |
+
<Date></Date>
|
569 |
+
<Sport></Sport>
|
570 |
+
<Venue></Venue>
|
571 |
+
<HomeTeam></HomeTeam>
|
572 |
+
<AwayTeam></AwayTeam>
|
573 |
+
<HomeScore></HomeScore>
|
574 |
+
<AwayScore></AwayScore>
|
575 |
+
<TopScorer></TopScorer>
|
576 |
+
</SportResult>"""
|
577 |
+
result = generate_template(xml_template)
|
578 |
+
|
579 |
+
print(result)
|
580 |
+
# {
|
581 |
+
# "SportResult": {
|
582 |
+
# "Date": "date-time",
|
583 |
+
# "Sport": "verbatim-string",
|
584 |
+
# "Venue": "verbatim-string",
|
585 |
+
# "HomeTeam": "verbatim-string",
|
586 |
+
# "AwayTeam": "verbatim-string",
|
587 |
+
# "HomeScore": "integer",
|
588 |
+
# "AwayScore": "integer",
|
589 |
+
# "TopScorer": "verbatim-string"
|
590 |
+
# }
|
591 |
+
# }
|
592 |
+
```
|
593 |
+
|
594 |
+
E.g. generate a template from natural language description:
|
595 |
+
```python
|
596 |
+
text = """Give me relevant info about startup companies mentioned."""
|
597 |
+
result = generate_template(text)
|
598 |
+
|
599 |
+
print(result)
|
600 |
+
# {
|
601 |
+
# "Startup_Companies": [
|
602 |
+
# {
|
603 |
+
# "Name": "verbatim-string",
|
604 |
+
# "Products": [
|
605 |
+
# "string"
|
606 |
+
# ],
|
607 |
+
# "Location": "verbatim-string",
|
608 |
+
# "Company_Type": [
|
609 |
+
# "Technology",
|
610 |
+
# "Finance",
|
611 |
+
# "Health",
|
612 |
+
# "Education",
|
613 |
+
# "Other"
|
614 |
+
# ]
|
615 |
+
# }
|
616 |
+
# ]
|
617 |
+
# }
|
618 |
+
```
|