import openai
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tenacity import retry, wait_random_exponential, stop_after_attempt
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import average_precision_score
@retry(wait=wait_random_exponential(min=1, max=20), stop=stop_after_attempt(6))
def get_embedding(text, engine="davinci-similarity"):
# replace newlines, which can negatively affect performance.
text = text.replace("\n", " ")
return openai.Engine(id=engine).embeddings(input = [text])['data'][0]['embedding']
def cosine_similarity(a, b):
return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))
def plot_multiclass_precision_recall(
y_score, y_true_untransformed, class_list, classifier_name
):
"""
Precision-Recall plotting for a multiclass problem. It plots average precision-recall, per class precision recall and reference f1 contours.
Code slightly modified, but heavily based on https://scikit-learn.org/stable/auto_examples/model_selection/plot_precision_recall.html
"""
n_classes = len(class_list)
y_true = pd.concat(
[(y_true_untransformed == class_list[i]) for i in range(n_classes)], axis=1
).values
# For each class
precision = dict()
recall = dict()
average_precision = dict()
for i in range(n_classes):
precision[i], recall[i], _ = precision_recall_curve(y_true[:, i], y_score[:, i])
average_precision[i] = average_precision_score(y_true[:, i], y_score[:, i])
# A "micro-average": quantifying score on all classes jointly
precision["micro"], recall["micro"], _ = precision_recall_curve(
y_true.ravel(), y_score.ravel()
)
average_precision["micro"] = average_precision_score(
y_true, y_score, average="micro"
)
print(
str(classifier_name)
+ " - Average precision score over all classes: {0:0.2f}".format(
average_precision["micro"]
)
)
# setup plot details
plt.figure(figsize=(9, 10))
f_scores = np.linspace(0.2, 0.8, num=4)
lines = []
labels = []
for f_score in f_scores:
x = np.linspace(0.01, 1)
y = f_score * x / (2 * x - f_score)
(l,) = plt.plot(x[y >= 0], y[y >= 0], color="gray", alpha=0.2)
plt.annotate("f1={0:0.1f}".format(f_score), xy=(0.9, y[45] + 0.02))
lines.append(l)
labels.append("iso-f1 curves")
(l,) = plt.plot(recall["micro"], precision["micro"], color="gold", lw=2)
lines.append(l)
labels.append(
"average Precision-recall (auprc = {0:0.2f})"
"".format(average_precision["micro"])
)
for i in range(n_classes):
(l,) = plt.plot(recall[i], precision[i], lw=2)
lines.append(l)
labels.append(
"Precision-recall for class `{0}` (auprc = {1:0.2f})"
"".format(class_list[i], average_precision[i])
)
fig = plt.gcf()
fig.subplots_adjust(bottom=0.25)
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel("Recall")
plt.ylabel("Precision")
plt.title(f"{classifier_name}: Precision-Recall curve for each class")
plt.legend(lines, labels)openai/openai-python
Publicmirrored from https://github.com/openai/openai-pythonAvailable
examples/embeddings/utils.py
94lines · modepreview