microsoft/AI-For-Beginners

Public

mirrored fromhttps://github.com/microsoft/AI-For-BeginnersAvailable

Watch0 Fork0 Star0

Code Commits Issues Pull requests Actions Insights Security

9e39f4cfea8fa8b65f84ba900c94da22d949726b

Find a branch or tag

Branches

9e39f4cfea8fa8b65f84ba900c94da22d949726b

Clone

HTTPS

Download ZIP

AI-For-Beginners/examples

examples/04-text-sentiment.py

268lines · modecode

Raw Download

Latest commit unavailable.

unknown

1	`"""`
2	`Simple Text Sentiment Analysis`
3	`================================`
4
5	`This example shows how to analyze the sentiment (emotion) of text.`
6	`It's a simplified version that teaches NLP concepts without complex libraries.`
7
8	`What you'll learn:`
9	`- Text preprocessing (cleaning and preparing text)`
10	`- Feature extraction (converting words to numbers)`
11	`- Sentiment classification (positive vs negative)`
12
13	`Use case: Determine if a movie review is positive or negative.`
14	`"""`
15
16	`import re`
17	`from collections import Counter`
18
19	`class SimpleSentimentAnalyzer:`
20	`"""`
21	`A basic sentiment analyzer that learns from labeled examples.`
22
23	`How it works:`
24	`1. Learns which words appear more in positive vs negative texts`
25	`2. Calculates a "sentiment score" for each word`
26	`3. Uses these scores to predict sentiment of new text`
27	`"""`
28
29	`def __init__(self):`
30	`# Store word scores (positive words get positive scores)`
31	`self.word_scores = {}`
32	`# Track if we've trained`
33	`self.is_trained = False`
34
35	`def preprocess_text(self, text):`
36	`"""`
37	`Clean and prepare text for analysis.`
38
39	`Steps:`
40	`1. Convert to lowercase`
41	`2. Remove punctuation`
42	`3. Split into words`
43
44	`Args:`
45	`text: Raw text string`
46
47	`Returns:`
48	`List of cleaned words`
49	`"""`
50	`# Convert to lowercase`
51	`text = text.lower()`
52
53	`# Remove punctuation and special characters`
54	`text = re.sub(r'[^a-z\s]', '', text)`
55
56	`# Split into words`
57	`words = text.split()`
58
59	`# Remove very short words (like "a", "i")`
60	`words = [w for w in words if len(w) > 2]`
61
62	`return words`
63
64	`def train(self, training_data):`
65	`"""`
66	`Learn sentiment patterns from labeled examples.`
67
68	`Args:`
69	`training_data: List of (text, sentiment) tuples`
70	`where sentiment is 'positive' or 'negative'`
71	`"""`
72	`print("🎓 Training sentiment analyzer...")`
73
74	`# Count words in positive and negative texts`
75	`positive_words = Counter()`
76	`negative_words = Counter()`
77
78	`for text, sentiment in training_data:`
79	`words = self.preprocess_text(text)`
80
81	`if sentiment == 'positive':`
82	`positive_words.update(words)`
83	`else:`
84	`negative_words.update(words)`
85
86	`# Calculate sentiment score for each word`
87	`# Score > 0 means more positive, < 0 means more negative`
88	`all_words = set(positive_words.keys()) \| set(negative_words.keys())`
89
90	`for word in all_words:`
91	`pos_count = positive_words[word]`
92	`neg_count = negative_words[word]`
93
94	`# Calculate score: difference in appearances`
95	`# Add smoothing (+1) to avoid division by zero`
96	`total = pos_count + neg_count`
97	`self.word_scores[word] = (pos_count - neg_count) / (total + 1)`
98
99	`self.is_trained = True`
100
101	`# Show some learned words`
102	`print(f"✅ Learned sentiment for {len(self.word_scores)} words")`
103	`print("\n📊 Most positive words:")`
104	`sorted_words = sorted(self.word_scores.items(), key=lambda x: x[1], reverse=True)`
105	`for word, score in sorted_words[:5]:`
106	`print(f" '{word}': {score:+.3f}")`
107
108	`print("\n📊 Most negative words:")`
109	`for word, score in sorted_words[-5:]:`
110	`print(f" '{word}': {score:+.3f}")`
111
112	`def analyze(self, text):`
113	`"""`
114	`Predict the sentiment of new text.`
115
116	`Args:`
117	`text: Text to analyze`
118
119	`Returns:`
120	`Tuple of (sentiment, confidence, score)`
121	`"""`
122	`if not self.is_trained:`
123	`raise Exception("Please train the analyzer first!")`
124
125	`# Preprocess text`
126	`words = self.preprocess_text(text)`
127
128	`# Calculate total sentiment score`
129	`total_score = 0`
130	`word_count = 0`
131
132	`for word in words:`
133	`if word in self.word_scores:`
134	`total_score += self.word_scores[word]`
135	`word_count += 1`
136
137	`# Average score`
138	`if word_count > 0:`
139	`avg_score = total_score / word_count`
140	`else:`
141	`avg_score = 0`
142
143	`# Determine sentiment and confidence`
144	`sentiment = "positive" if avg_score > 0 else "negative"`
145	`confidence = min(abs(avg_score) * 100, 100) # Convert to percentage`
146
147	`return sentiment, confidence, avg_score`
148
149
150	`def create_training_data():`
151	`"""`
152	`Create sample training data (movie reviews with labels).`
153
154	`In a real application, you'd have thousands of examples!`
155
156	`Returns:`
157	`List of (review_text, sentiment) tuples`
158	`"""`
159	`return [`
160	`# Positive reviews`
161	`("This movie was absolutely amazing and wonderful! I loved every minute.", "positive"),`
162	`("Brilliant performance! The acting was superb and the story captivating.", "positive"),`
163	`("Fantastic film! Highly recommend to everyone. Best movie of the year!", "positive"),`
164	`("Loved it! Great storytelling and beautiful cinematography.", "positive"),`
165	`("Excellent movie with outstanding performances. A must watch!", "positive"),`
166	`("Amazing! This film exceeded all my expectations. Truly remarkable.", "positive"),`
167	`("Wonderful experience! The plot was engaging and entertaining.", "positive"),`
168	`("Superb direction and acting! One of the best films I've seen.", "positive"),`
169
170	`# Negative reviews`
171	`("Terrible movie. Waste of time and money. Very disappointed.", "negative"),`
172	`("Awful film! Poor acting and boring story. Would not recommend.", "negative"),`
173	`("Horrible! The worst movie I have ever seen. Extremely disappointing.", "negative"),`
174	`("Bad movie with terrible plot. Boring and predictable.", "negative"),`
175	`("Disappointing film. Poor execution and weak performances.", "negative"),`
176	`("Worst movie ever! Horrible acting and stupid storyline.", "negative"),`
177	`("Terrible experience. Boring and poorly made. Don't waste your time.", "negative"),`
178	`("Awful! Poor quality and uninteresting. Complete waste of time.", "negative"),`
179	`]`
180
181
182	`def main():`
183	`"""`
184	`Main function - Let's analyze some sentiments!`
185	`"""`
186	`print("=" * 70)`
187	`print("Simple Text Sentiment Analysis")`
188	`print("=" * 70)`
189	`print("\n📚 Task: Learn to identify positive and negative movie reviews")`
190	`print()`
191
192	`# Step 1: Create training data`
193	`training_data = create_training_data()`
194	`print(f"📊 Training data: {len(training_data)} movie reviews")`
195	`print()`
196
197	`# Step 2: Create and train analyzer`
198	`analyzer = SimpleSentimentAnalyzer()`
199	`analyzer.train(training_data)`
200	`print()`
201
202	`# Step 3: Test on new reviews`
203	`print("🧪 Testing on new movie reviews:")`
204	`print("=" * 70)`
205
206	`test_reviews = [`
207	`"This movie was fantastic! I really enjoyed it.",`
208	`"Boring and terrible. Not worth watching.",`
209	`"Amazing cinematography and wonderful acting!",`
210	`"The worst film I've seen this year. Awful.",`
211	`"Pretty good movie with some great moments.",`
212	`"Disappointing and poorly directed.",`
213	`]`
214
215	`for i, review in enumerate(test_reviews, 1):`
216	`sentiment, confidence, score = analyzer.analyze(review)`
217
218	`# Visual indicator`
219	`indicator = "😊" if sentiment == "positive" else "😞"`
220
221	`print(f"\nReview {i}:")`
222	`print(f" Text: \"{review}\"")`
223	`print(f" {indicator} Sentiment: {sentiment.upper()}")`
224	`print(f" 📊 Confidence: {confidence:.1f}%")`
225	`print(f" 📈 Score: {score:+.3f}")`
226
227	`print("\n" + "=" * 70)`
228
229	`# Interactive mode`
230	`print("\n💬 Try it yourself! Enter your own review (or 'quit' to exit):")`
231	`print("-" * 70)`
232
233	`while True:`
234	`user_input = input("\nYour review: ").strip()`
235
236	`if user_input.lower() in ['quit', 'exit', 'q']:`
237	`break`
238
239	`if not user_input:`
240	`continue`
241
242	`try:`
243	`sentiment, confidence, score = analyzer.analyze(user_input)`
244	`indicator = "😊" if sentiment == "positive" else "😞"`
245
246	`print(f"\n{indicator} Sentiment: {sentiment.upper()}")`
247	`print(f"📊 Confidence: {confidence:.1f}%")`
248	`print(f"📈 Score: {score:+.3f}")`
249	`except Exception as e:`
250	`print(f"Error: {e}")`
251
252	`# Explanation`
253	`print("\n💡 What just happened?")`
254	`print("1. The analyzer learned word patterns from example reviews")`
255	`print("2. It calculated 'sentiment scores' for words")`
256	`print("3. For new text, it combines word scores to predict sentiment")`
257	`print()`
258	`print("🎉 You just built a sentiment analyzer!")`
259	`print()`
260	`print("🚀 Next steps:")`
261	`print(" - Add more training examples to improve accuracy")`
262	`print(" - Try analyzing tweets, product reviews, or comments")`
263	`print(" - Explore more advanced NLP in lessons/5-NLP/")`
264	`print()`
265
266
267	`if __name__ == "__main__":`
268	`main()`
269

microsoft/AI-For-Beginners

Branches

Tags

Clone