LongformerμΈμ½λ KoBARTλ‘ AIHUB κΈμ΅ λ° μ½ μλ΄ λν λ°μ΄ν°λ₯Ό CHATGPTλ₯Ό ν΅ν΄ μμ½ν νμ΅ λ°μ΄ν°λ₯Ό νμ΅ν λͺ¨λΈ
input = """κ³ κ°: μλ
νμΈμ, μ κ° μ¬κΈ°μ μ¬μ©νλ μ μ©μΉ΄λμ λν΄ κΆκΈν κ² μμ΄μ.
μλ΄μ: μλ
νμΈμ! λ€, μ΄λ€ λ¬Έμκ° μμΌμ κ°μ?
κ³ κ°: μ κ° μ΄λ² λ¬μ μΉ΄λλ₯Ό μ¬μ©νλ©΄μ 리μλ ν¬μΈνΈλ₯Ό μΌλ§λ μμλμ§ νμΈνκ³ μΆμ΄μ.
μλ΄μ: λ€, λΉμ μ 리μλ ν¬μΈνΈ μμ‘μ νμΈν΄ λ릴 μ μμ΅λλ€. μ κ° λΉμ μ μΉ΄λ λ²νΈλ₯Ό μ
λ ₯νκ³ νμΈν΄λ³Όκ²μ. λ²νΈλ₯Ό μλ €μ£Όμ€ μ μμκΉμ?
κ³ κ°: λ€, μ μΉ΄λ λ²νΈλ 1234-5678-9012-3456μ
λλ€.
μλ΄μ: κ°μ¬ν©λλ€. μ μλ§ κΈ°λ€λ €μ£ΌμΈμ. νμΈ μ€μ΄μμ... λ€, νμ¬ λΉμ μ 리μλ ν¬μΈνΈ μμ‘μ 3,250 ν¬μΈνΈμ
λλ€.
κ³ κ°: μκ² μ΄μ, κ°μ¬ν©λλ€! κ·ΈλΌ μΆκ°μ μΈ μ΄μ© ννμ΄λ ν μΈμ κ΄ν μ 보λ μ»μ μ μμκΉμ?
μλ΄μ: λ¬Όλ‘ μ΄μ£ ! μ ν¬ μΉ΄λμ¬λ λ€μν μ΄μ© ννμ μ 곡νκ³ μμ΅λλ€. μλ₯Ό λ€μ΄, μ¬ν, μΌν, μμ¬ λ± λ€μν λΆμΌμμ ν μΈ ννμ λ°μ μ μκ±°λ, 리μλ ν¬μΈνΈλ₯Ό μ¬μ©νμ¬ μνμ΄λ κΈ°ννΈ μΉ΄λλ‘ κ΅νν μ μμ΅λλ€. μ΄λ€ ννμ κ΄μ¬μ΄ μμΌμ κ°μ?
κ³ κ°: μ λ μ¬ν ν μΈμ΄λ λ§μΌλ¦¬μ§ μ 립μ κ΄μ¬μ΄ μμ΄μ.
μλ΄μ: κ·Έλ° κ²½μ°μλ λΉμ μκ² μ ν©ν μ¬ν μΉ΄λ ννμ μ 곡νλ μΉ΄λλ₯Ό μΆμ²ν΄ λ릴 μ μμ΅λλ€. μ¬ν μΉ΄λλ νκ³΅μ¬ λ§μΌλ¦¬μ§λ₯Ό μμ μ μκ³ , νΈν
ν μΈ ννμ λ°μ μλ μμ΅λλ€. μ κ° λͺ κ°μ§ μ΅μ
μ μ μν΄ λ³ΌκΉμ?
κ³ κ°: λ€, κ·Έλ¬λ©΄ μ’μ κ² κ°μμ. κ°μ¬ν©λλ€!
μλ΄μ: λ§μν΄ μ£Όμ
μ κ°μ¬ν©λλ€. μ΄μ μ κ° λͺ κ°μ§ μΆμ²μ λ리λλ‘ νκ² μ΅λλ€. μ΄λ€ ν곡μ¬λ₯Ό μ£Όλ‘ μ΄μ©νμλμ?"""
output ="""
- κ³ κ°μ΄ μ μ©μΉ΄λμ λν΄ κΆκΈν μ¬ν μλ΄
- 리μλ ν¬μΈνΈ νμΈ μμ²
- μλ΄μμ΄ μΉ΄λ λ²νΈμ μμ‘ νμΈ ν μΆκ° μ΄μ© νν μλ΄
- κ³ κ°μ΄ μ¬ν ν μΈ, λ§μΌλ¦¬μ§, νΈν
ν μΈ λ± λ€μν ννμ κ΄μ¬ νν
"""
ν΄λΉ λͺ¨λΈμ νμ©νκΈ° μν΄μ λ€μκ³Ό κ°μ class νμ
class LongformerSelfAttentionForBart(nn.Module):
def __init__(self, config, layer_id):
super().__init__()
self.embed_dim = config.d_model
self.longformer_self_attn = LongformerSelfAttention(config, layer_id=layer_id)
self.output = nn.Linear(self.embed_dim, self.embed_dim)
def forward(
self,
hidden_states: torch.Tensor,
key_value_states: Optional[torch.Tensor] = None,
past_key_value: Optional[Tuple[torch.Tensor]] = None,
attention_mask: Optional[torch.Tensor] = None,
layer_head_mask: Optional[torch.Tensor] = None,
output_attentions: bool = False,
) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
is_cross_attention = key_value_states is not None
bsz, tgt_len, embed_dim = hidden_states.size()
# bs x seq_len x seq_len -> bs x seq_len μΌλ‘ λ³κ²½
attention_mask = attention_mask.squeeze(dim=1)
attention_mask = attention_mask[:,0]
is_index_masked = attention_mask < 0
is_index_global_attn = attention_mask > 0
is_global_attn = is_index_global_attn.flatten().any().item()
outputs = self.longformer_self_attn(
hidden_states,
attention_mask=attention_mask,
layer_head_mask=None,
is_index_masked=is_index_masked,
is_index_global_attn=is_index_global_attn,
is_global_attn=is_global_attn,
output_attentions=output_attentions,
)
attn_output = self.output(outputs[0])
return (attn_output,) + outputs[1:] if len(outputs) == 2 else (attn_output, None, None)
class LongformerEncoderDecoderForConditionalGeneration(BartForConditionalGeneration):
def __init__(self, config):
super().__init__(config)
if config.attention_mode == 'n2':
pass # do nothing, use BertSelfAttention instead
else:
self.model.encoder.embed_positions = BartLearnedPositionalEmbedding(
config.max_encoder_position_embeddings,
config.d_model)
self.model.decoder.embed_positions = BartLearnedPositionalEmbedding(
config.max_decoder_position_embeddings,
config.d_model)
for i, layer in enumerate(self.model.encoder.layers):
layer.self_attn = LongformerSelfAttentionForBart(config, layer_id=i)
class LongformerEncoderDecoderConfig(BartConfig):
def __init__(self, attention_window: List[int] = None, attention_dilation: List[int] = None,
autoregressive: bool = False, attention_mode: str = 'sliding_chunks',
gradient_checkpointing: bool = False, **kwargs):
"""
Args:
attention_window: list of attention window sizes of length = number of layers.
window size = number of attention locations on each side.
For an affective window size of 512, use `attention_window=[256]*num_layers`
which is 256 on each side.
attention_dilation: list of attention dilation of length = number of layers.
attention dilation of `1` means no dilation.
autoregressive: do autoregressive attention or have attention of both sides
attention_mode: 'n2' for regular n^2 self-attention, 'tvm' for TVM implemenation of Longformer
selfattention, 'sliding_chunks' for another implementation of Longformer selfattention
"""
super().__init__(**kwargs)
self.attention_window = attention_window
self.attention_dilation = attention_dilation
self.autoregressive = autoregressive
self.attention_mode = attention_mode
self.gradient_checkpointing = gradient_checkpointing
assert self.attention_mode in ['tvm', 'sliding_chunks', 'n2']
λͺ¨λΈ μ€λΈμ νΈ λ‘λ ν weightνμΌμ λ³λλ‘ λ€μ΄λ°μμ load_state_dictλ‘ μ¨μ΄νΈλ₯Ό λΆλ¬μΌ ν©λλ€.
tokenizer = AutoTokenizer.from_pretrained("cocoirun/longforemr-kobart-summary-v1")
model = LongformerEncoderDecoderForConditionalGeneration.from_pretrained("cocoirun/longforemr-kobart-summary-v1")
device = torch.device('cuda')
model.load_state_dict(torch.load("summary weight.ckpt"))
model.to(device)
λͺ¨λΈ μμ½ ν¨μ
def summarize(text, max_len):
max_seq_len = 4096
context_tokens = ['<s>'] + tokenizer.tokenize(text) + ['</s>']
input_ids = tokenizer.convert_tokens_to_ids(context_tokens)
if len(input_ids) < max_seq_len:
while len(input_ids) < max_seq_len:
input_ids += [tokenizer.pad_token_id]
else:
input_ids = input_ids[:max_seq_len - 1] + [
tokenizer.eos_token_id]
res_ids = model.generate(torch.tensor([input_ids]).to(device),
max_length=max_len,
num_beams=5,
no_repeat_ngram_size = 3,
eos_token_id=tokenizer.eos_token_id,
bad_words_ids=[[tokenizer.unk_token_id]])
res = tokenizer.batch_decode(res_ids.tolist(), skip_special_tokens=True)[0]
res = res.replace("\n\n","\n")
return res
- Downloads last month
- 109
This model does not have enough activity to be deployed to Inference API (serverless) yet. Increase its social
visibility and check back later, or deploy to Inference Endpoints (dedicated)
instead.