Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -1,88 +1,21 @@
|
|
1 |
-
import
|
2 |
-
from transformers import
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
model = BartForConditionalGeneration.from_pretrained("microsoft/tapex-large-finetuned-wtq")
|
23 |
-
|
24 |
-
|
25 |
-
# model_tapas = "google/tapas-large-finetuned-wtq"
|
26 |
-
# tokenizer_tapas = AutoTokenizer.from_pretrained(model_tapas)
|
27 |
-
# model_tapas = AutoModelForTableQuestionAnswering.from_pretrained(model_tapas)
|
28 |
-
# pipe_tapas = pipeline(
|
29 |
-
# "table-question-answering", model=model_tapas, tokenizer=tokenizer_tapas
|
30 |
-
# )
|
31 |
-
|
32 |
-
#new
|
33 |
-
pipe_tapas = pipeline(task="table-question-answering", model="google/tapas-large-finetuned-wtq")
|
34 |
-
pipe_tapas2 = pipeline(task="table-question-answering", model="google/tapas-large-finetuned-wikisql-supervised")
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
def process2(query, csv_dataStr):
|
40 |
-
# csv_data={"Actors": ["Brad Pitt", "Leonardo Di Caprio", "George Clooney"], "Number of movies": ["87", "53", "69"]}
|
41 |
-
csv_data = json.loads(csv_dataStr)
|
42 |
-
table = pd.DataFrame.from_dict(csv_data)
|
43 |
-
#microsoft
|
44 |
-
encoding = tokenizer(table=table, query=query, return_tensors="pt")
|
45 |
-
outputs = model.generate(**encoding)
|
46 |
-
result_tapex=tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
|
47 |
-
#google
|
48 |
-
result_tapas = pipe_tapas(table=table, query=query)['cells'][0]
|
49 |
-
#google2
|
50 |
-
result_tapas2 = pipe_tapas2(table=table, query=query)['cells'][0]
|
51 |
-
return result_tapex, result_tapas, result_tapas2
|
52 |
-
|
53 |
-
|
54 |
-
# Inputs
|
55 |
-
query_text = gr.Text(label="")
|
56 |
-
# input_file = gr.File(label="Upload a CSV file", type="file")
|
57 |
-
input_data = gr.Text(label="")
|
58 |
-
# rows_slider = gr.Slider(label="Number of rows")
|
59 |
-
|
60 |
-
# Output
|
61 |
-
answer_text_tapex = gr.Text(label="")
|
62 |
-
answer_text_tapas = gr.Text(label="")
|
63 |
-
answer_text_tapas2 = gr.Text(label="")
|
64 |
-
|
65 |
-
description = "This Space lets you ask questions on CSV documents with Microsoft [TAPEX-Large](https://huggingface.co/microsoft/tapex-large-finetuned-wtq) and Google [TAPAS-Large](https://huggingface.co/google/tapas-large-finetuned-wtq). \
|
66 |
-
Both have been fine-tuned on the [WikiTableQuestions](https://huggingface.co/datasets/wikitablequestions) dataset. \n\n\
|
67 |
-
A sample file with football statistics is available in the repository: \n\n\
|
68 |
-
* Which team has the most wins? Answer: Manchester City FC\n\
|
69 |
-
* Which team has the most wins: Chelsea, Liverpool or Everton? Answer: Liverpool\n\
|
70 |
-
* Which teams have scored less than 40 goals? Answer: Cardiff City FC, Fulham FC, Brighton & Hove Albion FC, Huddersfield Town FC\n\
|
71 |
-
* What is the average number of wins? Answer: 16 (rounded)\n\n\
|
72 |
-
You can also upload your own CSV file. Please note that maximum sequence length for both models is 1024 tokens, \
|
73 |
-
so you may need to limit the number of rows in your CSV file. Chunking is not implemented yet."
|
74 |
-
|
75 |
-
iface = gr.Interface(
|
76 |
-
theme="huggingface",
|
77 |
-
description=description,
|
78 |
-
layout="vertical",
|
79 |
-
fn=process2,
|
80 |
-
inputs=[query_text, input_data],
|
81 |
-
outputs=[answer_text_tapex, answer_text_tapas, answer_text_tapas2],
|
82 |
-
examples=[
|
83 |
-
|
84 |
-
],
|
85 |
-
allow_flagging="never",
|
86 |
-
)
|
87 |
-
|
88 |
-
iface.launch()
|
|
|
1 |
+
import torch
|
2 |
+
from transformers import GPT2Tokenizer, GPT2LMHeadModel, pipeline
|
3 |
+
tokenizer = GPT2Tokenizer.from_pretrained('distilgpt2')
|
4 |
+
tokenizer.add_special_tokens({'pad_token': '[PAD]'})
|
5 |
+
model = GPT2LMHeadModel.from_pretrained('FredZhang7/anime-anything-promptgen-v2')
|
6 |
+
|
7 |
+
prompt = r'1girl, genshin'
|
8 |
+
|
9 |
+
# generate text using fine-tuned model
|
10 |
+
nlp = pipeline('text-generation', model=model, tokenizer=tokenizer)
|
11 |
+
|
12 |
+
# generate 10 samples using contrastive search
|
13 |
+
outs = nlp(prompt, max_length=76, num_return_sequences=10, do_sample=True, repetition_penalty=1.2, temperature=0.7, top_k=4, early_stopping=True)
|
14 |
+
|
15 |
+
print('\nInput:\n' + 100 * '-')
|
16 |
+
print('\033[96m' + prompt + '\033[0m')
|
17 |
+
print('\nOutput:\n' + 100 * '-')
|
18 |
+
for i in range(len(outs)):
|
19 |
+
# remove trailing commas and double spaces
|
20 |
+
outs[i] = str(outs[i]['generated_text']).replace(' ', '').rstrip(',')
|
21 |
+
print('\033[92m' + '\n\n'.join(outs) + '\033[0m\n')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|