openai/openai-python

Public

mirrored fromhttps://github.com/openai/openai-pythonAvailable

CodeCommitsIssuesPull requestsActionsInsightsSecurity
v0.11.1

Branches

Tags

  • No tags available.
0Branches0Tags
Go to file
Add file
Code

Clone

HTTPS

Download ZIP

examples/embeddings/utils.py

94lines · modecode

1import openai
2import pandas as pd
3import numpy as np
4import matplotlib.pyplot as plt
5
6from tenacity import retry, wait_random_exponential, stop_after_attempt
7from sklearn.metrics import precision_recall_curve
8from sklearn.metrics import average_precision_score
9
10
11@retry(wait=wait_random_exponential(min=1, max=20), stop=stop_after_attempt(6))
12def get_embedding(text, engine="davinci-similarity"):
13
14 # replace newlines, which can negatively affect performance.
15 text = text.replace("\n", " ")
16
17 return openai.Engine(id=engine).embeddings(input = [text])['data'][0]['embedding']
18
19
20def cosine_similarity(a, b):
21 return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))
22
23
24def plot_multiclass_precision_recall(
25 y_score, y_true_untransformed, class_list, classifier_name
26):
27 """
28 Precision-Recall plotting for a multiclass problem. It plots average precision-recall, per class precision recall and reference f1 contours.
29
30 Code slightly modified, but heavily based on https://scikit-learn.org/stable/auto_examples/model_selection/plot_precision_recall.html
31 """
32 n_classes = len(class_list)
33 y_true = pd.concat(
34 [(y_true_untransformed == class_list[i]) for i in range(n_classes)], axis=1
35 ).values
36
37 # For each class
38 precision = dict()
39 recall = dict()
40 average_precision = dict()
41 for i in range(n_classes):
42 precision[i], recall[i], _ = precision_recall_curve(y_true[:, i], y_score[:, i])
43 average_precision[i] = average_precision_score(y_true[:, i], y_score[:, i])
44
45 # A "micro-average": quantifying score on all classes jointly
46 precision["micro"], recall["micro"], _ = precision_recall_curve(
47 y_true.ravel(), y_score.ravel()
48 )
49 average_precision["micro"] = average_precision_score(
50 y_true, y_score, average="micro"
51 )
52 print(
53 str(classifier_name)
54 + " - Average precision score over all classes: {0:0.2f}".format(
55 average_precision["micro"]
56 )
57 )
58
59 # setup plot details
60 plt.figure(figsize=(9, 10))
61 f_scores = np.linspace(0.2, 0.8, num=4)
62 lines = []
63 labels = []
64 for f_score in f_scores:
65 x = np.linspace(0.01, 1)
66 y = f_score * x / (2 * x - f_score)
67 (l,) = plt.plot(x[y >= 0], y[y >= 0], color="gray", alpha=0.2)
68 plt.annotate("f1={0:0.1f}".format(f_score), xy=(0.9, y[45] + 0.02))
69
70 lines.append(l)
71 labels.append("iso-f1 curves")
72 (l,) = plt.plot(recall["micro"], precision["micro"], color="gold", lw=2)
73 lines.append(l)
74 labels.append(
75 "average Precision-recall (auprc = {0:0.2f})"
76 "".format(average_precision["micro"])
77 )
78
79 for i in range(n_classes):
80 (l,) = plt.plot(recall[i], precision[i], lw=2)
81 lines.append(l)
82 labels.append(
83 "Precision-recall for class `{0}` (auprc = {1:0.2f})"
84 "".format(class_list[i], average_precision[i])
85 )
86
87 fig = plt.gcf()
88 fig.subplots_adjust(bottom=0.25)
89 plt.xlim([0.0, 1.0])
90 plt.ylim([0.0, 1.05])
91 plt.xlabel("Recall")
92 plt.ylabel("Precision")
93 plt.title(f"{classifier_name}: Precision-Recall curve for each class")
94 plt.legend(lines, labels)