microsoft/AI-For-Beginners

Public

mirrored fromhttps://github.com/microsoft/AI-For-BeginnersAvailable

CodeCommitsIssuesPull requestsActionsInsightsSecurity
9e39f4cfea8fa8b65f84ba900c94da22d949726b

Branches

Tags

  • No tags available.
0Branches0Tags
Go to file
Add file
Code

Clone

HTTPS

Download ZIP

examples/04-text-sentiment.py

268lines ยท modecode

1"""
2Simple Text Sentiment Analysis
3================================
4
5This example shows how to analyze the sentiment (emotion) of text.
6It's a simplified version that teaches NLP concepts without complex libraries.
7
8What you'll learn:
9- Text preprocessing (cleaning and preparing text)
10- Feature extraction (converting words to numbers)
11- Sentiment classification (positive vs negative)
12
13Use case: Determine if a movie review is positive or negative.
14"""
15
16import re
17from collections import Counter
18
19class SimpleSentimentAnalyzer:
20 """
21 A basic sentiment analyzer that learns from labeled examples.
22
23 How it works:
24 1. Learns which words appear more in positive vs negative texts
25 2. Calculates a "sentiment score" for each word
26 3. Uses these scores to predict sentiment of new text
27 """
28
29 def __init__(self):
30 # Store word scores (positive words get positive scores)
31 self.word_scores = {}
32 # Track if we've trained
33 self.is_trained = False
34
35 def preprocess_text(self, text):
36 """
37 Clean and prepare text for analysis.
38
39 Steps:
40 1. Convert to lowercase
41 2. Remove punctuation
42 3. Split into words
43
44 Args:
45 text: Raw text string
46
47 Returns:
48 List of cleaned words
49 """
50 # Convert to lowercase
51 text = text.lower()
52
53 # Remove punctuation and special characters
54 text = re.sub(r'[^a-z\s]', '', text)
55
56 # Split into words
57 words = text.split()
58
59 # Remove very short words (like "a", "i")
60 words = [w for w in words if len(w) > 2]
61
62 return words
63
64 def train(self, training_data):
65 """
66 Learn sentiment patterns from labeled examples.
67
68 Args:
69 training_data: List of (text, sentiment) tuples
70 where sentiment is 'positive' or 'negative'
71 """
72 print("๐ŸŽ“ Training sentiment analyzer...")
73
74 # Count words in positive and negative texts
75 positive_words = Counter()
76 negative_words = Counter()
77
78 for text, sentiment in training_data:
79 words = self.preprocess_text(text)
80
81 if sentiment == 'positive':
82 positive_words.update(words)
83 else:
84 negative_words.update(words)
85
86 # Calculate sentiment score for each word
87 # Score > 0 means more positive, < 0 means more negative
88 all_words = set(positive_words.keys()) | set(negative_words.keys())
89
90 for word in all_words:
91 pos_count = positive_words[word]
92 neg_count = negative_words[word]
93
94 # Calculate score: difference in appearances
95 # Add smoothing (+1) to avoid division by zero
96 total = pos_count + neg_count
97 self.word_scores[word] = (pos_count - neg_count) / (total + 1)
98
99 self.is_trained = True
100
101 # Show some learned words
102 print(f"โœ… Learned sentiment for {len(self.word_scores)} words")
103 print("\n๐Ÿ“Š Most positive words:")
104 sorted_words = sorted(self.word_scores.items(), key=lambda x: x[1], reverse=True)
105 for word, score in sorted_words[:5]:
106 print(f" '{word}': {score:+.3f}")
107
108 print("\n๐Ÿ“Š Most negative words:")
109 for word, score in sorted_words[-5:]:
110 print(f" '{word}': {score:+.3f}")
111
112 def analyze(self, text):
113 """
114 Predict the sentiment of new text.
115
116 Args:
117 text: Text to analyze
118
119 Returns:
120 Tuple of (sentiment, confidence, score)
121 """
122 if not self.is_trained:
123 raise Exception("Please train the analyzer first!")
124
125 # Preprocess text
126 words = self.preprocess_text(text)
127
128 # Calculate total sentiment score
129 total_score = 0
130 word_count = 0
131
132 for word in words:
133 if word in self.word_scores:
134 total_score += self.word_scores[word]
135 word_count += 1
136
137 # Average score
138 if word_count > 0:
139 avg_score = total_score / word_count
140 else:
141 avg_score = 0
142
143 # Determine sentiment and confidence
144 sentiment = "positive" if avg_score > 0 else "negative"
145 confidence = min(abs(avg_score) * 100, 100) # Convert to percentage
146
147 return sentiment, confidence, avg_score
148
149
150def create_training_data():
151 """
152 Create sample training data (movie reviews with labels).
153
154 In a real application, you'd have thousands of examples!
155
156 Returns:
157 List of (review_text, sentiment) tuples
158 """
159 return [
160 # Positive reviews
161 ("This movie was absolutely amazing and wonderful! I loved every minute.", "positive"),
162 ("Brilliant performance! The acting was superb and the story captivating.", "positive"),
163 ("Fantastic film! Highly recommend to everyone. Best movie of the year!", "positive"),
164 ("Loved it! Great storytelling and beautiful cinematography.", "positive"),
165 ("Excellent movie with outstanding performances. A must watch!", "positive"),
166 ("Amazing! This film exceeded all my expectations. Truly remarkable.", "positive"),
167 ("Wonderful experience! The plot was engaging and entertaining.", "positive"),
168 ("Superb direction and acting! One of the best films I've seen.", "positive"),
169
170 # Negative reviews
171 ("Terrible movie. Waste of time and money. Very disappointed.", "negative"),
172 ("Awful film! Poor acting and boring story. Would not recommend.", "negative"),
173 ("Horrible! The worst movie I have ever seen. Extremely disappointing.", "negative"),
174 ("Bad movie with terrible plot. Boring and predictable.", "negative"),
175 ("Disappointing film. Poor execution and weak performances.", "negative"),
176 ("Worst movie ever! Horrible acting and stupid storyline.", "negative"),
177 ("Terrible experience. Boring and poorly made. Don't waste your time.", "negative"),
178 ("Awful! Poor quality and uninteresting. Complete waste of time.", "negative"),
179 ]
180
181
182def main():
183 """
184 Main function - Let's analyze some sentiments!
185 """
186 print("=" * 70)
187 print("Simple Text Sentiment Analysis")
188 print("=" * 70)
189 print("\n๐Ÿ“š Task: Learn to identify positive and negative movie reviews")
190 print()
191
192 # Step 1: Create training data
193 training_data = create_training_data()
194 print(f"๐Ÿ“Š Training data: {len(training_data)} movie reviews")
195 print()
196
197 # Step 2: Create and train analyzer
198 analyzer = SimpleSentimentAnalyzer()
199 analyzer.train(training_data)
200 print()
201
202 # Step 3: Test on new reviews
203 print("๐Ÿงช Testing on new movie reviews:")
204 print("=" * 70)
205
206 test_reviews = [
207 "This movie was fantastic! I really enjoyed it.",
208 "Boring and terrible. Not worth watching.",
209 "Amazing cinematography and wonderful acting!",
210 "The worst film I've seen this year. Awful.",
211 "Pretty good movie with some great moments.",
212 "Disappointing and poorly directed.",
213 ]
214
215 for i, review in enumerate(test_reviews, 1):
216 sentiment, confidence, score = analyzer.analyze(review)
217
218 # Visual indicator
219 indicator = "๐Ÿ˜Š" if sentiment == "positive" else "๐Ÿ˜ž"
220
221 print(f"\nReview {i}:")
222 print(f" Text: \"{review}\"")
223 print(f" {indicator} Sentiment: {sentiment.upper()}")
224 print(f" ๐Ÿ“Š Confidence: {confidence:.1f}%")
225 print(f" ๐Ÿ“ˆ Score: {score:+.3f}")
226
227 print("\n" + "=" * 70)
228
229 # Interactive mode
230 print("\n๐Ÿ’ฌ Try it yourself! Enter your own review (or 'quit' to exit):")
231 print("-" * 70)
232
233 while True:
234 user_input = input("\nYour review: ").strip()
235
236 if user_input.lower() in ['quit', 'exit', 'q']:
237 break
238
239 if not user_input:
240 continue
241
242 try:
243 sentiment, confidence, score = analyzer.analyze(user_input)
244 indicator = "๐Ÿ˜Š" if sentiment == "positive" else "๐Ÿ˜ž"
245
246 print(f"\n{indicator} Sentiment: {sentiment.upper()}")
247 print(f"๐Ÿ“Š Confidence: {confidence:.1f}%")
248 print(f"๐Ÿ“ˆ Score: {score:+.3f}")
249 except Exception as e:
250 print(f"Error: {e}")
251
252 # Explanation
253 print("\n๐Ÿ’ก What just happened?")
254 print("1. The analyzer learned word patterns from example reviews")
255 print("2. It calculated 'sentiment scores' for words")
256 print("3. For new text, it combines word scores to predict sentiment")
257 print()
258 print("๐ŸŽ‰ You just built a sentiment analyzer!")
259 print()
260 print("๐Ÿš€ Next steps:")
261 print(" - Add more training examples to improve accuracy")
262 print(" - Try analyzing tweets, product reviews, or comments")
263 print(" - Explore more advanced NLP in lessons/5-NLP/")
264 print()
265
266
267if __name__ == "__main__":
268 main()
269