liamcripwell commited on
Commit
a9f328c
·
verified ·
1 Parent(s): d82590b

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +80 -1
README.md CHANGED
@@ -536,4 +536,83 @@ for y in result:
536
  # {"store_name": "Trader Joe's"}
537
  # {"names": ["John", "Mary", "James"]}
538
  # {"names": ["JOHN", "MARY", "JAMES"], "female_names": ["MARY"]}
539
- ```
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
536
  # {"store_name": "Trader Joe's"}
537
  # {"names": ["John", "Mary", "James"]}
538
  # {"names": ["JOHN", "MARY", "JAMES"], "female_names": ["MARY"]}
539
+ ```
540
+
541
+ ## Template Generation
542
+ If you want to convert existing schema files you have in other formats (e.g. XML, YAML, etc.) or start from an example, NuExtract 2 models can automatically generate this for you.
543
+
544
+ E.g. convert XML into a NuExtract template:
545
+ ```python
546
+ def generate_template(description):
547
+ input_messages = [description]
548
+ input_content = prepare_inputs(
549
+ messages=input_messages,
550
+ image_paths=[],
551
+ tokenizer=tokenizer,
552
+ )
553
+
554
+ generation_config = {"do_sample": True, "temperature": 0.4, "max_new_tokens": 256}
555
+
556
+ with torch.no_grad():
557
+ result = nuextract_generate(
558
+ model=model,
559
+ tokenizer=tokenizer,
560
+ prompts=input_content['prompts'],
561
+ pixel_values_list=input_content['pixel_values_list'],
562
+ num_patches_list=input_content['num_patches_list'],
563
+ generation_config=generation_config
564
+ )
565
+ return result[0]
566
+
567
+ xml_template = """<SportResult>
568
+ <Date></Date>
569
+ <Sport></Sport>
570
+ <Venue></Venue>
571
+ <HomeTeam></HomeTeam>
572
+ <AwayTeam></AwayTeam>
573
+ <HomeScore></HomeScore>
574
+ <AwayScore></AwayScore>
575
+ <TopScorer></TopScorer>
576
+ </SportResult>"""
577
+ result = generate_template(xml_template)
578
+
579
+ print(result)
580
+ # {
581
+ # "SportResult": {
582
+ # "Date": "date-time",
583
+ # "Sport": "verbatim-string",
584
+ # "Venue": "verbatim-string",
585
+ # "HomeTeam": "verbatim-string",
586
+ # "AwayTeam": "verbatim-string",
587
+ # "HomeScore": "integer",
588
+ # "AwayScore": "integer",
589
+ # "TopScorer": "verbatim-string"
590
+ # }
591
+ # }
592
+ ```
593
+
594
+ E.g. generate a template from natural language description:
595
+ ```python
596
+ text = """Give me relevant info about startup companies mentioned."""
597
+ result = generate_template(text)
598
+
599
+ print(result)
600
+ # {
601
+ # "Startup_Companies": [
602
+ # {
603
+ # "Name": "verbatim-string",
604
+ # "Products": [
605
+ # "string"
606
+ # ],
607
+ # "Location": "verbatim-string",
608
+ # "Company_Type": [
609
+ # "Technology",
610
+ # "Finance",
611
+ # "Health",
612
+ # "Education",
613
+ # "Other"
614
+ # ]
615
+ # }
616
+ # ]
617
+ # }
618
+ ```