Spaces:
Sleeping
Sleeping
File size: 7,604 Bytes
d0c1c22 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 |
# https://huggingface.co./MoritzLaurer/DeBERTa-v3-base-mnli-fever-anli
# from transformers import pipeline
# classifier = pipeline("zero-shot-classification", model="MoritzLaurer/DeBERTa-v3-base-mnli-fever-anli")
# sequence_to_classify = "Angela Merkel is a politician in Germany and leader of the CDU"
# candidate_labels = ["politics", "economy", "entertainment", "environment"]
# output = classifier(sequence_to_classify, candidate_labels, multi_label=False)
# print(output)
# from transformers import pipeline
# generator = pipeline("text-generation", model="distilgpt2")
# output = generator("In this course, we will teach you how to")
# print(output)
# https://huggingface.co./bigscience/bloom-560m
# from transformers import AutoTokenizer, AutoModelForCausalLM
# import transformers
# import torch
# model = "bigscience/bloom-560m"
# tokenizer = AutoTokenizer.from_pretrained(model)
# pipeline = transformers.pipeline(
# "text-generation",
# model=model,
# tokenizer=tokenizer,
# torch_dtype=torch.bfloat16,
# trust_remote_code=True,
# device_map="auto",
# )
# sequences = pipeline(
# "Girafatron is obsessed with giraffes, the most glorious animal on the face of this Earth. Giraftron believes all other animals are irrelevant when compared to the glorious majesty of the giraffe.\nDaniel: Hello, Girafatron!\nGirafatron:",
# max_length=200,
# do_sample=True,
# top_k=10,
# num_return_sequences=1,
# eos_token_id=tokenizer.eos_token_id,
# )
# for seq in sequences:
# print(f"Result: {seq['generated_text']}")
# https://huggingface.co./bert-base-uncased
# from transformers import pipeline
# unmasker = pipeline('fill-mask', model='bert-base-multilingual-cased')
# output = unmasker("tu es [MASK] homme?")
# named entity recognition
# from transformers import pipeline
# ner = pipeline("ner", grouped_entities=True)
# output = ner("My name is Sylvain and I work at Hugging Face in Brooklyn.")
# https://huggingface.co./facebook/bart-large-cnn
from transformers import pipeline
# summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
# output = summarizer(
# """
# America has changed dramatically during recent years. Not only has the number of
# graduates in traditional engineering disciplines such as mechanical, civil,
# electrical, chemical, and aeronautical engineering declined, but in most of
# the premier American universities engineering curricula now concentrate on
# and encourage largely the study of engineering science. As a result, there
# are declining offerings in engineering subjects dealing with infrastructure,
# the environment, and related issues, and greater concentration on high
# technology subjects, largely supporting increasingly complex scientific
# developments. While the latter is important, it should not be at the expense
# of more traditional engineering.
# Rapidly developing economies such as China and India, as well as other
# industrial countries in Europe and Asia, continue to encourage and advance
# the teaching of engineering. Both China and India, respectively, graduate
# six and eight times as many traditional engineers as does the United States.
# Other industrial countries at minimum maintain their output, while America
# suffers an increasingly serious decline in the number of engineering graduates
# and a lack of well-educated engineers.
# """
# )
# from transformers import pipeline
# translator = pipeline("translation", model="Helsinki-NLP/opus-mt-zh-en")
# output = translator("屌")
# print(output)
# from transformers import AutoTokenizer
# tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")
# sequence = "Using a Transformer network is simple"
# tokens = tokenizer.tokenize(sequence)
# print(tokens)
# ids = tokenizer.convert_tokens_to_ids(tokens)
# print(ids)
# decoded_string = tokenizer.decode(ids)
# print(decoded_string)
# print("----------------------")
# sequence = "Using a Transform network are simple"
# tokens = tokenizer.tokenize(sequence)
# print(tokens)
# ids = tokenizer.convert_tokens_to_ids(tokens)
# print(ids)
# decoded_string = tokenizer.decode(ids)
# print(decoded_string)
# import torch
# from transformers import AutoTokenizer, AutoModelForSequenceClassification
# checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
# tokenizer = AutoTokenizer.from_pretrained(checkpoint)
# model = AutoModelForSequenceClassification.from_pretrained(checkpoint)
# sequence = "I’ve been waiting for a HuggingFace course my whole life."
# tokens = tokenizer.tokenize(sequence)
# print(tokens)
# sequence1_ids = tokenizer.convert_tokens_to_ids(tokens)
# print(sequence1_ids)
# sequence = "I hate this so much!"
# tokens = tokenizer.tokenize(sequence)
# print(tokens)
# sequence2_ids = tokenizer.convert_tokens_to_ids(tokens)
# print(sequence2_ids)
# sequence1_ids = [[200, 200, 200]]
# sequence2_ids = [[200, 200]]
# batched_ids = [
# [1045, 1521, 2310, 2042, 3403, 2005, 1037, 17662, 12172, 2607, 2026, 2878, 2166, 1012],
# [1045, 5223, 2023, 2061, 2172, 999, tokenizer.pad_token_id, tokenizer.pad_token_id, tokenizer.pad_token_id, tokenizer.pad_token_id, tokenizer.pad_token_id, tokenizer.pad_token_id, tokenizer.pad_token_id, tokenizer.pad_token_id],
# ]
# attention_mask = [
# [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
# [1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
# ]
# outputs = model(torch.tensor(batched_ids), attention_mask=torch.tensor(attention_mask))
# print(outputs.logits)
# from transformers import AutoTokenizer
# checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
# tokenizer = AutoTokenizer.from_pretrained(checkpoint)
# sequence = "I've been waiting for a HuggingFace course my whole life."
# model_inputs = tokenizer(sequence)
# print(model_inputs)
# sequences = ["I've been waiting for a HuggingFace course my whole life.", "So have I!"]
# Will pad the sequences up to the maximum sequence length
# model_inputs = tokenizer(sequences, padding="longest")
# print(model_inputs)
# print("-------------------------")
# Will pad the sequences up to the specified max length
# model_inputs = tokenizer(sequences, padding="max_length", max_length=8)
# print(model_inputs)
# from transformers import AutoTokenizer
# checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
# tokenizer = AutoTokenizer.from_pretrained(checkpoint)
# sequence = "I've been waiting for a HuggingFace course my whole life."
# model_inputs = tokenizer(sequence)
# print("model_inputs = tokenizer(sequence)")
# print(model_inputs)
# print(model_inputs["input_ids"])
# tokens = tokenizer.tokenize(sequence)
# print("tokens = tokenizer.tokenize(sequence)")
# print(tokens)
# ids = tokenizer.convert_tokens_to_ids(tokens)
# print(sequence)
# print(ids)
# import torch
# from transformers import AutoTokenizer, AutoModelForSequenceClassification
# checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
# tokenizer = AutoTokenizer.from_pretrained(checkpoint)
# model = AutoModelForSequenceClassification.from_pretrained(checkpoint)
# sequences = ["I've been waiting for a HuggingFace course my whole life.", "So have I!"]
# tokens = tokenizer(sequences, padding=True, truncation=True, return_tensors="pt")
# output = model(**tokens)
# print(output)
from transformers import AutoTokenizer, AutoModel
tokenizer = AutoTokenizer.from_pretrained("gpt2")
model = AutoModel.from_pretrained("gpt2")
encoded = tokenizer("Hey!", return_tensors="pt")
result = model(**encoded)
print(result) |