Ashishkr commited on
Commit
460c718
1 Parent(s): 6211b2f

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +112 -4
README.md CHANGED
@@ -1,9 +1,117 @@
1
  ---
2
  tags:
3
- - autotrain
4
- - text-generation
5
  widget:
6
- - text: "I love AutoTrain because "
 
 
 
 
 
 
 
7
  ---
8
 
9
- # Model Trained Using AutoTrain
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
  tags:
3
+ - text-generation
 
4
  widget:
5
+ - text: >
6
+ John Doe, born on January 1, 1990, currently lives at 1234 Elm Street, Springfield, Anywhere 12345.
7
+ He can be reached at [email protected] or at the phone number 555-123-4567. His social security number is 123-45-6789,
8
+ and he has a bank account number 9876543210 at Springfield Bank. John attended Springfield University where he earned
9
+ a Bachelor's degree in Computer Science. He now works at Acme Corp and his employee ID is 123456. John's medical record number
10
+ is MRN-001234, and he has a history of asthma and high blood pressure. His primary care physician is Dr. Jane Smith,
11
+ who practices at Springfield Medical Center. His recent blood test results show a cholesterol level of 200 mg/dL and a
12
+ blood glucose level of 90 mg/dL.
13
  ---
14
 
15
+
16
+ ```python
17
+
18
+ import transformers
19
+ from peft import PeftModel, PeftConfig
20
+ from transformers import AutoModelForCausalLM, AutoTokenizer
21
+ import torch
22
+ from torch import cuda, bfloat16
23
+
24
+ base_model_id = 'meta-llama/Llama-2-7b-hf'
25
+
26
+ device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'
27
+
28
+ bnb_config = transformers.BitsAndBytesConfig(
29
+ load_in_4bit=True,
30
+ bnb_4bit_quant_type='nf4',
31
+ bnb_4bit_use_double_quant=True,
32
+ bnb_4bit_compute_dtype=bfloat16
33
+ )
34
+
35
+
36
+ hf_auth = "hf_your-huggingface-access-token"
37
+ model_config = transformers.AutoConfig.from_pretrained(
38
+ base_model_id,
39
+ use_auth_token=hf_auth
40
+ )
41
+
42
+ model = transformers.AutoModelForCausalLM.from_pretrained(
43
+ base_model_id,
44
+ trust_remote_code=True,
45
+ config=model_config,
46
+ quantization_config=bnb_config,
47
+ device_map='auto',
48
+ use_auth_token=hf_auth
49
+ )
50
+
51
+ config = PeftConfig.from_pretrained("Ashishkr/pii-removal")
52
+ model = PeftModel.from_pretrained(model, "Ashishkr/pii-removal").to(device)
53
+
54
+ model.eval()
55
+ print(f"Model loaded on {device}")
56
+
57
+ tokenizer = transformers.AutoTokenizer.from_pretrained(
58
+ base_model_id,
59
+ use_auth_token=hf_auth
60
+ )
61
+
62
+ ```
63
+
64
+
65
+ ```python
66
+ def remove_pii_info(
67
+ model: AutoModelForCausalLM,
68
+ tokenizer: AutoTokenizer,
69
+ prompt: str,
70
+ max_new_tokens: int = 128,
71
+ temperature: float = 0.92):
72
+
73
+ inputs = tokenizer(
74
+ [prompt],
75
+ return_tensors="pt",
76
+ return_token_type_ids=False).to(device)
77
+
78
+ max_new_tokens = inputs["input_ids"].shape[1]
79
+
80
+ # Check if bfloat16 is supported, otherwise use float16
81
+ dtype_to_use = torch.bfloat16 if torch.cuda.is_bf16_supported() else torch.float16
82
+
83
+ with torch.autocast("cuda", dtype=dtype_to_use):
84
+ response = model.generate(
85
+ **inputs,
86
+ max_new_tokens=max_new_tokens,
87
+ temperature=temperature,
88
+ return_dict_in_generate=True,
89
+ eos_token_id=tokenizer.eos_token_id,
90
+ pad_token_id=tokenizer.pad_token_id,
91
+ )
92
+
93
+ decoded_output = tokenizer.decode(
94
+ response["sequences"][0],
95
+ skip_special_tokens=True,
96
+ )
97
+
98
+ return decoded_output[len(prompt) :]
99
+
100
+ prompt = """
101
+ Input: "John Doe, born on January 1, 1990, currently lives at 1234 Elm Street, Springfield, Anywhere 12345.
102
+ He can be reached at [email protected] or at the phone number 555-123-4567. His social security number is 123-45-6789,
103
+ and he has a bank account number 9876543210 at Springfield Bank. John attended Springfield University where he earned
104
+ a Bachelor's degree in Computer Science. He now works at Acme Corp and his employee ID is 123456. John's medical record number
105
+ is MRN-001234, and he has a history of asthma and high blood pressure. His primary care physician is Dr. Jane Smith,
106
+ who practices at Springfield Medical Center. His recent blood test results show a cholesterol level of 200 mg/dL and a
107
+ blood glucose level of 90 mg/dL.
108
+ " Output: """
109
+ # You can use the function as before
110
+ response = remove_pii_info(
111
+ model,
112
+ tokenizer,
113
+ prompt,
114
+ temperature=0.7)
115
+
116
+ print(response)
117
+ ```