openai/openai-python

Public

mirrored from https://github.com/openai/openai-pythonAvailable

CodeCommitsIssuesPull requestsActionsInsightsSecurity
v0.11.3

Branches

Tags

  • No tags available.
0Branches0Tags
Go to file
Add file
Code

Clone

HTTPS

Download ZIP

examples/embeddings/utils.py

94lines · modeblame

1f324723Boris Power4 years ago1import openai
2import pandas as pd
3import numpy as np
4import matplotlib.pyplot as plt
5
6from tenacity import retry, wait_random_exponential, stop_after_attempt
7from sklearn.metrics import precision_recall_curve
8from sklearn.metrics import average_precision_score
9
10
11@retry(wait=wait_random_exponential(min=1, max=20), stop=stop_after_attempt(6))
12def get_embedding(text, engine="davinci-similarity"):
13
14# replace newlines, which can negatively affect performance.
15text = text.replace("\n", " ")
16
17return openai.Engine(id=engine).embeddings(input = [text])['data'][0]['embedding']
18
19
20def cosine_similarity(a, b):
21return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))
22
23
24def plot_multiclass_precision_recall(
25y_score, y_true_untransformed, class_list, classifier_name
26):
27"""
28Precision-Recall plotting for a multiclass problem. It plots average precision-recall, per class precision recall and reference f1 contours.
29
30Code slightly modified, but heavily based on https://scikit-learn.org/stable/auto_examples/model_selection/plot_precision_recall.html
31"""
32n_classes = len(class_list)
33y_true = pd.concat(
34[(y_true_untransformed == class_list[i]) for i in range(n_classes)], axis=1
35).values
36
37# For each class
38precision = dict()
39recall = dict()
40average_precision = dict()
41for i in range(n_classes):
42precision[i], recall[i], _ = precision_recall_curve(y_true[:, i], y_score[:, i])
43average_precision[i] = average_precision_score(y_true[:, i], y_score[:, i])
44
45# A "micro-average": quantifying score on all classes jointly
46precision["micro"], recall["micro"], _ = precision_recall_curve(
47y_true.ravel(), y_score.ravel()
48)
49average_precision["micro"] = average_precision_score(
50y_true, y_score, average="micro"
51)
52print(
53str(classifier_name)
54+ " - Average precision score over all classes: {0:0.2f}".format(
55average_precision["micro"]
56)
57)
58
59# setup plot details
60plt.figure(figsize=(9, 10))
61f_scores = np.linspace(0.2, 0.8, num=4)
62lines = []
63labels = []
64for f_score in f_scores:
65x = np.linspace(0.01, 1)
66y = f_score * x / (2 * x - f_score)
67(l,) = plt.plot(x[y >= 0], y[y >= 0], color="gray", alpha=0.2)
68plt.annotate("f1={0:0.1f}".format(f_score), xy=(0.9, y[45] + 0.02))
69
70lines.append(l)
71labels.append("iso-f1 curves")
72(l,) = plt.plot(recall["micro"], precision["micro"], color="gold", lw=2)
73lines.append(l)
74labels.append(
75"average Precision-recall (auprc = {0:0.2f})"
76"".format(average_precision["micro"])
77)
78
79for i in range(n_classes):
80(l,) = plt.plot(recall[i], precision[i], lw=2)
81lines.append(l)
82labels.append(
83"Precision-recall for class `{0}` (auprc = {1:0.2f})"
84"".format(class_list[i], average_precision[i])
85)
86
87fig = plt.gcf()
88fig.subplots_adjust(bottom=0.25)
89plt.xlim([0.0, 1.0])
90plt.ylim([0.0, 1.05])
91plt.xlabel("Recall")
92plt.ylabel("Precision")
93plt.title(f"{classifier_name}: Precision-Recall curve for each class")
94plt.legend(lines, labels)