|
import csv
|
|
import torch
|
|
from transformers import pipeline
|
|
import random
|
|
|
|
|
|
chatbot = pipeline(
|
|
"text-generation",
|
|
model="mistralai/Mistral-7B-Instruct-v0.3",
|
|
torch_dtype=torch.float16,
|
|
device=0
|
|
)
|
|
|
|
|
|
sentiments = ["Positive or Encouraging", "Neutral or Factual", "Negative or Toxic"]
|
|
|
|
|
|
formats = [
|
|
"Feature Stories", "Instructional Manuals", "FAQs", "Policy Documents", "Live Stream Descriptions",
|
|
"Editorial Content", "Research Papers", "User Manuals", "Commentaries", "Opinion Pieces",
|
|
"Newsletters", "Online Courses", "Photo Essays", "Annual Reports", "User-Generated Content",
|
|
"Testimonials", "DIY Content", "How-To Videos", "Campaign Reports", "Legal Briefs",
|
|
"Blog Posts", "Case Studies", "Tutorials", "Interviews", "Press Releases",
|
|
"eBooks", "Infographics", "Webinars", "Podcast Descriptions", "Video Scripts",
|
|
"Advertisements", "Forum Discussions", "Whitepapers", "Surveys", "Product Reviews",
|
|
"Event Summaries", "Opinion Editorials", "Letters to the Editor", "Round-Up Posts",
|
|
"Buying Guides", "Checklists", "Cheat Sheets", "Recipes", "Travel Guides",
|
|
"Profiles", "Lists", "Q&A Sessions", "Debates", "Polls"
|
|
]
|
|
|
|
|
|
topics = [
|
|
"Family", "Travel", "Politics", "Science", "Health", "Technology", "Sports",
|
|
"Education", "Environment", "Economics", "Culture", "History", "Music",
|
|
"Literature", "Food", "Art", "Fashion", "Entertainment", "Business",
|
|
"Relationships", "Fitness", "Automotive", "Finance", "Real Estate", "Law",
|
|
"Psychology", "Philosophy", "Religion", "Gardening", "DIY", "Hobbies",
|
|
"Pets", "Career", "Marketing", "Customer Service", "Networking", "Innovation",
|
|
"Artificial Intelligence", "Sustainability", "Social Issues", "Digital Media",
|
|
"Programming", "Cybersecurity", "Astronomy", "Geography", "Travel Tips",
|
|
"Cooking", "Parenting", "Productivity", "Mindfulness", "Mental Health",
|
|
"Self-Improvement", "Leadership", "Teamwork", "Volunteering", "Nonprofits",
|
|
"Gaming", "E-commerce", "Photography", "Videography", "Film", "Television",
|
|
"Streaming Services", "Podcasts", "Public Speaking", "Event Planning",
|
|
"Interior Design", "Architecture", "Urban Development", "Agriculture",
|
|
"Climate Change", "Renewable Energy", "Space Exploration", "Biotechnology",
|
|
"Cryptocurrency", "Blockchain", "Robotics", "Automated Systems", "Genetics",
|
|
"Medicine", "Pharmacy", "Veterinary Science", "Marine Biology", "Ecology",
|
|
"Conservation", "Wildlife", "Botany", "Zoology", "Geology", "Meteorology",
|
|
"Aviation", "Maritime", "Logistics", "Supply Chain", "Human Resources",
|
|
"Diversity and Inclusion", "Ethics", "Corporate Governance", "Public Relations",
|
|
"Journalism", "Advertising", "Sales", "Customer Experience", "Retail",
|
|
"Hospitality", "Tourism", "Luxury Goods", "Consumer Electronics", "Fashion Design",
|
|
"Textiles", "Jewelry", "Cosmetics", "Skincare", "Perfume", "Toys", "Gadgets",
|
|
"Home Appliances", "Furniture", "Home Improvement", "Landscaping", "Real Estate Investment"
|
|
]
|
|
|
|
|
|
styles = [
|
|
"Super Casual", "Internet Slang", "Every Day", "Formal", "Conversational",
|
|
"Bad Grammar and Spelling", "Lazy typing", "Professional", "Academic",
|
|
"Technical", "Narrative", "Descriptive", "Analytical", "Critical",
|
|
"Objective", "Subjective", "Third Person", "First Person",
|
|
"Persuasive", "Informative", "Journalistic", "Reflective",
|
|
"DM", "Social", "Informal", "Casual", "Colloquial"
|
|
]
|
|
|
|
|
|
starting_phrases = [
|
|
"Have you ever wondered", "Let's talk about", "It's interesting how",
|
|
"Did you know", "The reality is", "Many people believe",
|
|
"It's surprising that", "You might not know", "Let's dive into",
|
|
"Here's the thing", "A common misconception is", "It's clear that",
|
|
"Most people don't realize", "One thing to note is",
|
|
"The fact is", "Consider this", "Here's an example",
|
|
"Think about", "For instance", "To illustrate",
|
|
"In my experience", "A key point is", "It's worth noting",
|
|
"Let's explore", "Interestingly enough", "I want to highlight",
|
|
"When it comes to", "The truth is", "Many experts agree",
|
|
"Research shows", "Statistics indicate", "It's often said",
|
|
"In reality", "From my perspective", "Surprisingly",
|
|
"One thing I've noticed", "In recent studies", "Let's break down",
|
|
"People often forget", "You should know", "Interestingly",
|
|
"It turns out", "As it happens", "Experts suggest",
|
|
"The surprising fact is", "It's commonly known", "Let's be honest",
|
|
"The reality of", "It's fascinating that", "Have you noticed",
|
|
"The thing is", "It's a fact that", "Let's not forget",
|
|
"Studies have shown", "A notable point is", "It's often overlooked",
|
|
"An important aspect is", "Let's take a closer look",
|
|
"It's essential to understand", "Interestingly, research suggests",
|
|
"One aspect to consider is", "It's beneficial to know",
|
|
"It's worth considering", "The interesting thing is", "Let's examine",
|
|
"A surprising fact is", "It's helpful to know", "One surprising element is",
|
|
"Imagine this", "Here's a thought", "You might be surprised",
|
|
"Think of it this way", "Here's an idea", "It's funny how",
|
|
"Let me tell you", "Picture this", "The question is",
|
|
"Believe it or not", "You won't believe", "Let's face it",
|
|
"The best part is", "What's interesting is", "I discovered that",
|
|
"It's amazing how", "The funny thing is", "Here's why",
|
|
"What if I told you", "It's worth mentioning", "This reminds me of",
|
|
"Let me explain", "Here's something new", "I realized that",
|
|
"Have you seen", "You might enjoy", "I learned that",
|
|
"It's clear to see", "What's fascinating is", "Here's a question",
|
|
"I heard that", "The cool part is", "Here's what happened",
|
|
"It appears that", "It's evident that", "Let me share",
|
|
"You'll find that", "What's notable is", "Consider the fact that",
|
|
"It's interesting to note", "Hello everyone", "Hi there",
|
|
"Greetings", "Hey folks", "Good morning", "Good afternoon",
|
|
"Good evening", "Hey", "What's up", "Hi", "Hello",
|
|
"Amazing!", "Serious?", "Wow...", "That's pretty cool.",
|
|
"Can you believe it?", "Unbelievable!", "Incredible!", "No way!",
|
|
"Check this out", "Guess what?", "Surprise!", "Fascinating!",
|
|
"Impressive!", "I don't get it?", "Really?", "What?",
|
|
"Why?", "How come?", "Is that so?", "Are you sure?",
|
|
"What do you think?", "By the way", "Just so you know",
|
|
"For your information", "Incidentally", "On a side note",
|
|
"As a reminder", "In addition", "Besides that",
|
|
"While we're on the subject", "Speaking of which",
|
|
"Have you", "Has anyone", "Would we", "Would it be",
|
|
"OK, now", "OK but", "OK you", "OK nobody",
|
|
"Here's a quick fact", "To put it simply", "Here's why this matters",
|
|
"Let's consider", "Now, think about this", "Take this into account",
|
|
"Here's something to think about", "On that note",
|
|
"Interestingly enough", "Just imagine", "That reminds me",
|
|
"As it turns out", "Here's a fun fact", "The reality of it is",
|
|
"By the way, did you know", "Interestingly", "Speaking of",
|
|
"Now, let's dive in", "You'll be surprised to know",
|
|
"I recently discovered", "Would you believe", "Can you imagine",
|
|
"What's more", "Even more interesting is"
|
|
]
|
|
|
|
|
|
csv_file = "sentences.csv"
|
|
with open(csv_file, mode='w', newline='', encoding='utf-8') as file:
|
|
writer = csv.writer(file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
|
|
writer.writerow(["text", "label"])
|
|
|
|
|
|
def ensure_correct_quoting(text):
|
|
|
|
if text.startswith('"') and text.endswith('"'):
|
|
return text
|
|
else:
|
|
return f'"{text}"'
|
|
|
|
|
|
row_count = 0
|
|
format_index = 0
|
|
topic_index = 0
|
|
style_index = 0
|
|
|
|
while row_count < 100000:
|
|
for idx, sentiment in enumerate(sentiments):
|
|
format_type = formats[format_index % len(formats)]
|
|
format_index += 1
|
|
topic = topics[topic_index % len(topics)]
|
|
topic_index += 1
|
|
style = styles[style_index % len(styles)]
|
|
style_index += 1
|
|
start_phrase = random.choice(starting_phrases)
|
|
|
|
|
|
prompt = f"Start your paragraph with '{start_phrase}'. Write a single paragraph of text. Format: {format_type}. Topic: {topic}. Vibe: {sentiment}. Style: {style}."
|
|
|
|
response = chatbot(prompt, max_new_tokens=100)
|
|
|
|
|
|
print(f"Full model response: {response}")
|
|
|
|
|
|
generated_text = response[0]['generated_text']
|
|
|
|
|
|
clean_text = generated_text.replace(prompt, "").strip().split('\n')[0]
|
|
|
|
|
|
correctly_quoted_text = ensure_correct_quoting(clean_text)
|
|
|
|
|
|
with open(csv_file, mode='a', newline='', encoding='utf-8') as file:
|
|
writer = csv.writer(file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
|
|
writer.writerow([correctly_quoted_text, idx])
|
|
|
|
row_count += 1
|
|
print(f"Response for sentiment '{sentiment}' saved to {csv_file}. Total rows: {row_count}")
|
|
|
|
if row_count >= 100000:
|
|
break
|
|
|
|
print("All responses saved. Total rows:", row_count)
|
|
|