microsoft/TypeAgent

Public

mirrored fromhttps://github.com/microsoft/TypeAgentAvailable

CodeCommitsIssuesPull requestsActionsInsightsSecurity
3d6af4fbf8c1941877d71e910be2718ab9fd3bf6

Branches

Tags

  • No tags available.
0Branches0Tags
Go to file
Add file
Code

Clone

HTTPS

Download ZIP

python/nprData/embedding.py

42lines · modecode

1# Copyright (c) Microsoft Corporation and Henry Lucco.
2# Licensed under the MIT License.
3
4from dataclasses import dataclass
5from typing import List
6from openai import OpenAI
7import os
8
9@dataclass
10class Embedding:
11 values: List[float]
12 dimension: int
13
14 @classmethod
15 def from_text(cls, text: str) -> "Embedding":
16 openai_api_key = os.environ.get("OPENAI_API_KEY")
17 if not openai_api_key:
18 raise ValueError("OPENAI_API_KEY environment variable is not set")
19
20 openai_client = OpenAI(
21 api_key=openai_api_key
22 )
23 text = text.strip().replace("\n", " ")
24
25 embedding_model = os.environ.get("EMBEDDING_MODEL", "text-embedding-ada-002")
26
27 embedding_value = openai_client.embeddings.create(
28 input=[text],
29 model=embedding_model
30 ).data[0].embedding
31
32 return cls(embedding_value, len(embedding_value))
33
34 @classmethod
35 def from_dict(cls, embedding_dict: dict) -> "Embedding":
36 return cls(embedding_dict["values"], embedding_dict["dimension"])
37
38 def to_dict(self):
39 return {
40 "values": self.values,
41 "dimension": self.dimension
42 }