openai/openai-python
Publicmirrored from https://github.com/openai/openai-pythonAvailable
examples/embeddings/utils.py
94lines · modeblame
1f324723Boris Power4 years ago | 1 | import openai |
| 2 | import pandas as pd | |
| 3 | import numpy as np | |
| 4 | import matplotlib.pyplot as plt | |
| 5 | | |
| 6 | from tenacity import retry, wait_random_exponential, stop_after_attempt | |
| 7 | from sklearn.metrics import precision_recall_curve | |
| 8 | from sklearn.metrics import average_precision_score | |
| 9 | | |
| 10 | | |
| 11 | @retry(wait=wait_random_exponential(min=1, max=20), stop=stop_after_attempt(6)) | |
| 12 | def get_embedding(text, engine="davinci-similarity"): | |
| 13 | | |
| 14 | # replace newlines, which can negatively affect performance. | |
| 15 | text = text.replace("\n", " ") | |
| 16 | | |
| 17 | return openai.Engine(id=engine).embeddings(input = [text])['data'][0]['embedding'] | |
| 18 | | |
| 19 | | |
| 20 | def cosine_similarity(a, b): | |
| 21 | return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b)) | |
| 22 | | |
| 23 | | |
| 24 | def plot_multiclass_precision_recall( | |
| 25 | y_score, y_true_untransformed, class_list, classifier_name | |
| 26 | ): | |
| 27 | """ | |
| 28 | Precision-Recall plotting for a multiclass problem. It plots average precision-recall, per class precision recall and reference f1 contours. | |
| 29 | | |
| 30 | Code slightly modified, but heavily based on https://scikit-learn.org/stable/auto_examples/model_selection/plot_precision_recall.html | |
| 31 | """ | |
| 32 | n_classes = len(class_list) | |
| 33 | y_true = pd.concat( | |
| 34 | [(y_true_untransformed == class_list[i]) for i in range(n_classes)], axis=1 | |
| 35 | ).values | |
| 36 | | |
| 37 | # For each class | |
| 38 | precision = dict() | |
| 39 | recall = dict() | |
| 40 | average_precision = dict() | |
| 41 | for i in range(n_classes): | |
| 42 | precision[i], recall[i], _ = precision_recall_curve(y_true[:, i], y_score[:, i]) | |
| 43 | average_precision[i] = average_precision_score(y_true[:, i], y_score[:, i]) | |
| 44 | | |
| 45 | # A "micro-average": quantifying score on all classes jointly | |
| 46 | precision["micro"], recall["micro"], _ = precision_recall_curve( | |
| 47 | y_true.ravel(), y_score.ravel() | |
| 48 | ) | |
| 49 | average_precision["micro"] = average_precision_score( | |
| 50 | y_true, y_score, average="micro" | |
| 51 | ) | |
| 52 | print( | |
| 53 | str(classifier_name) | |
| 54 | + " - Average precision score over all classes: {0:0.2f}".format( | |
| 55 | average_precision["micro"] | |
| 56 | ) | |
| 57 | ) | |
| 58 | | |
| 59 | # setup plot details | |
| 60 | plt.figure(figsize=(9, 10)) | |
| 61 | f_scores = np.linspace(0.2, 0.8, num=4) | |
| 62 | lines = [] | |
| 63 | labels = [] | |
| 64 | for f_score in f_scores: | |
| 65 | x = np.linspace(0.01, 1) | |
| 66 | y = f_score * x / (2 * x - f_score) | |
| 67 | (l,) = plt.plot(x[y >= 0], y[y >= 0], color="gray", alpha=0.2) | |
| 68 | plt.annotate("f1={0:0.1f}".format(f_score), xy=(0.9, y[45] + 0.02)) | |
| 69 | | |
| 70 | lines.append(l) | |
| 71 | labels.append("iso-f1 curves") | |
| 72 | (l,) = plt.plot(recall["micro"], precision["micro"], color="gold", lw=2) | |
| 73 | lines.append(l) | |
| 74 | labels.append( | |
| 75 | "average Precision-recall (auprc = {0:0.2f})" | |
| 76 | "".format(average_precision["micro"]) | |
| 77 | ) | |
| 78 | | |
| 79 | for i in range(n_classes): | |
| 80 | (l,) = plt.plot(recall[i], precision[i], lw=2) | |
| 81 | lines.append(l) | |
| 82 | labels.append( | |
| 83 | "Precision-recall for class `{0}` (auprc = {1:0.2f})" | |
| 84 | "".format(class_list[i], average_precision[i]) | |
| 85 | ) | |
| 86 | | |
| 87 | fig = plt.gcf() | |
| 88 | fig.subplots_adjust(bottom=0.25) | |
| 89 | plt.xlim([0.0, 1.0]) | |
| 90 | plt.ylim([0.0, 1.05]) | |
| 91 | plt.xlabel("Recall") | |
| 92 | plt.ylabel("Precision") | |
| 93 | plt.title(f"{classifier_name}: Precision-Recall curve for each class") | |
| 94 | plt.legend(lines, labels) |