openai/openai-python
Publicmirrored fromhttps://github.com/openai/openai-pythonAvailable
examples/embeddings/utils.py
94lines · modecode
| 1 | import openai |
| 2 | import pandas as pd |
| 3 | import numpy as np |
| 4 | import matplotlib.pyplot as plt |
| 5 | |
| 6 | from tenacity import retry, wait_random_exponential, stop_after_attempt |
| 7 | from sklearn.metrics import precision_recall_curve |
| 8 | from sklearn.metrics import average_precision_score |
| 9 | |
| 10 | |
| 11 | @retry(wait=wait_random_exponential(min=1, max=20), stop=stop_after_attempt(6)) |
| 12 | def get_embedding(text, engine="davinci-similarity"): |
| 13 | |
| 14 | # replace newlines, which can negatively affect performance. |
| 15 | text = text.replace("\n", " ") |
| 16 | |
| 17 | return openai.Engine(id=engine).embeddings(input = [text])['data'][0]['embedding'] |
| 18 | |
| 19 | |
| 20 | def cosine_similarity(a, b): |
| 21 | return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b)) |
| 22 | |
| 23 | |
| 24 | def plot_multiclass_precision_recall( |
| 25 | y_score, y_true_untransformed, class_list, classifier_name |
| 26 | ): |
| 27 | """ |
| 28 | Precision-Recall plotting for a multiclass problem. It plots average precision-recall, per class precision recall and reference f1 contours. |
| 29 | |
| 30 | Code slightly modified, but heavily based on https://scikit-learn.org/stable/auto_examples/model_selection/plot_precision_recall.html |
| 31 | """ |
| 32 | n_classes = len(class_list) |
| 33 | y_true = pd.concat( |
| 34 | [(y_true_untransformed == class_list[i]) for i in range(n_classes)], axis=1 |
| 35 | ).values |
| 36 | |
| 37 | # For each class |
| 38 | precision = dict() |
| 39 | recall = dict() |
| 40 | average_precision = dict() |
| 41 | for i in range(n_classes): |
| 42 | precision[i], recall[i], _ = precision_recall_curve(y_true[:, i], y_score[:, i]) |
| 43 | average_precision[i] = average_precision_score(y_true[:, i], y_score[:, i]) |
| 44 | |
| 45 | # A "micro-average": quantifying score on all classes jointly |
| 46 | precision["micro"], recall["micro"], _ = precision_recall_curve( |
| 47 | y_true.ravel(), y_score.ravel() |
| 48 | ) |
| 49 | average_precision["micro"] = average_precision_score( |
| 50 | y_true, y_score, average="micro" |
| 51 | ) |
| 52 | print( |
| 53 | str(classifier_name) |
| 54 | + " - Average precision score over all classes: {0:0.2f}".format( |
| 55 | average_precision["micro"] |
| 56 | ) |
| 57 | ) |
| 58 | |
| 59 | # setup plot details |
| 60 | plt.figure(figsize=(9, 10)) |
| 61 | f_scores = np.linspace(0.2, 0.8, num=4) |
| 62 | lines = [] |
| 63 | labels = [] |
| 64 | for f_score in f_scores: |
| 65 | x = np.linspace(0.01, 1) |
| 66 | y = f_score * x / (2 * x - f_score) |
| 67 | (l,) = plt.plot(x[y >= 0], y[y >= 0], color="gray", alpha=0.2) |
| 68 | plt.annotate("f1={0:0.1f}".format(f_score), xy=(0.9, y[45] + 0.02)) |
| 69 | |
| 70 | lines.append(l) |
| 71 | labels.append("iso-f1 curves") |
| 72 | (l,) = plt.plot(recall["micro"], precision["micro"], color="gold", lw=2) |
| 73 | lines.append(l) |
| 74 | labels.append( |
| 75 | "average Precision-recall (auprc = {0:0.2f})" |
| 76 | "".format(average_precision["micro"]) |
| 77 | ) |
| 78 | |
| 79 | for i in range(n_classes): |
| 80 | (l,) = plt.plot(recall[i], precision[i], lw=2) |
| 81 | lines.append(l) |
| 82 | labels.append( |
| 83 | "Precision-recall for class `{0}` (auprc = {1:0.2f})" |
| 84 | "".format(class_list[i], average_precision[i]) |
| 85 | ) |
| 86 | |
| 87 | fig = plt.gcf() |
| 88 | fig.subplots_adjust(bottom=0.25) |
| 89 | plt.xlim([0.0, 1.0]) |
| 90 | plt.ylim([0.0, 1.05]) |
| 91 | plt.xlabel("Recall") |
| 92 | plt.ylabel("Precision") |
| 93 | plt.title(f"{classifier_name}: Precision-Recall curve for each class") |
| 94 | plt.legend(lines, labels) |