openai/openai-python

Public

mirrored from https://github.com/openai/openai-pythonAvailable

Watch0 Fork0 Star0

Code Commits Issues Pull requests Actions Insights Security

v0.10.3

Find a branch or tag

Branches

v0.10.3

Clone

HTTPS

Download ZIP

openai-python/examples/codex

examples/codex/backtranslation.py

187lines · modeblame

Raw Download

Latest commit unavailable.

unknown

`7febb755`Boris Power4 years ago	1	`import openai`
	2	`from smokey import Smokey`
	3	`from typing import List, Union`
	4
	5
	6	`def get_candidates(`
	7	`prompt: str,`
	8	`stop: List[str],`
	9	`temperature: float,`
	10	`priming_prefix: str,`
	11	`engine: str,`
	12	`n: int = 5,`
	13	`) -> List[str]:`
	14	`"""`
	15	`Generate N candidate completions based on the prompt, generated with a specific temperature.`
	16
	17	`:param prompt: The prompt to start the conversation with.`
	18	`:param stop: A list of tokens that indicate the end of the generation.`
	19	`:param temperature: The temperature of the generation.`
	20	`:param priming_prefix: The prefix to use for the priming.`
	21	`:param engine: The engine to use for the generation.`
	22	`:param n: The number of completions to generate.`
	23	`:return: A list of completions.`
	24	`"""`
	25	`response = openai.Completion.create(`
	26	`engine=engine,`
	27	`prompt=prompt,`
	28	`temperature=temperature,`
	29	`max_tokens=150,`
	30	`top_p=1,`
	31	`frequency_penalty=0,`
	32	`presence_penalty=0,`
	33	`stop=stop,`
	34	`n=n,`
	35	`)`
	36	`responses = [priming_prefix + choice.text for choice in response.choices]`
	37	`return responses`
	38
	39
	40	`def rindex(lst: List, value: str) -> int:`
	41	`"""`
	42	`Return the index of the last occurence of a value in a list.`
	43
	44	`:param lst: The list to search in.`
	45	`:param value: The value to search for.`
	46	`:return: The index of the last occurence of the value.`
	47	`"""`
	48	`try:`
	49	`return len(lst) - lst[::-1].index(value) - 1`
	50	`except ValueError:`
	51	raise ValueError(f"Answer start token `{value}` not found in the eval template")
	52
	53
	54	`def eval_candidate(`
	55	`candidate_answer: str,`
	56	`original_instruction: str,`
	57	`eval_template: str,`
	58	`answer_start_token: str,`
	59	`engine: str,`
	60	`) -> float:`
	61	`"""`
	62	`Evaluate a candidate answer by calculating the average log probability`
	63	`of the original instruction, given the candidate answer with a specific`
	64	`evaluation template, aimed at reconstructing the original instruction.`
65
66	`:param candidate_answer: The candidate answer to evaluate.`
67	`:param original_instruction: The original instruction.`
68	`:param eval_template: The template to use for the evaluation.`
69	`:param answer_start_token: The token to use to indicate the start of the answer.`
70	`:param engine: The engine to use for the evaluation.`
71	`:return: The evaluation of the candidate answer.`
72	`"""`
73	`response = openai.Completion.create(`
74	`engine=engine,`
75	`prompt=eval_template.format(candidate_answer, original_instruction),`
76	`temperature=0,`
77	`max_tokens=0,`
78	`top_p=1,`
79	`frequency_penalty=0,`
80	`presence_penalty=0,`
81	`logprobs=1,`
82	`echo=True,`
83	`)`
84
85	`answer_start = rindex(`
86	`response["choices"][0]["logprobs"]["tokens"], answer_start_token`
87	`)`
88	`logprobs = response["choices"][0]["logprobs"]["token_logprobs"][answer_start + 1 :]`
89	`return sum(logprobs) / len(logprobs)`
90
91
92	`def backtranslation(`
93	`prompt_template: str,`
94	`additional_info: str,`
95	`instruction: str,`
96	`eval_template: str,`
97	`priming_prefix: str = "SELECT",`
98	`stop1: List[str] = ["#", ";"],`
99	`answer_start_token: str = "--",`
100	`n: int = 5,`
101	`temperature: float = 0.5,`
102	`return_all_results: bool = False,`
103	`engine: str = "davinci-codex",`
104	`) -> Union[str, List[str, float]]:`
105	`"""`
106	`Generate a number of SQL queries given a natural language instruction,`
107	`and pick the best one based on the average log probability of explaining the`
108	`candidate SQL query with the exact original instruction, when prompted for`
109	`a natural language explanation of the candidate SQL query.`
110
111	`:param prompt_template: The template to use for the prompt to generate SQL.`
112	`:param additional_info: Additional information to include in the prompt`
113	`(SQL Tables, and their properties).`
114	`:param instruction: The instruction in natural language.`
115	`:param eval_template: The template to use for the evaluation.`
116	`:param priming_prefix: The prefix to use for the priming of the SQL query.`
117	`:param stop1: A list of tokens that indicate the end of the generation.`
118	`:param answer_start_token: The token to use to indicate the start of the`
119	`natural answer.`
120	`:param n: The number of candidates to generate.`
121	`:param temperature: The temperature of the generation.`
122	`:param return_all_results: Whether to return all results or just the best one.`
123	`:param engine: The engine to use for the generation and evaluation.`
124	`:return: The best SQL query, or a list of all scored generated SQL queries.`
125	`"""`
126	`prompt_template = prompt_template.format(`
127	`additional_info, instruction, priming_prefix`
128	`)`
129
130	`candidates = []`
131	`responses = get_candidates(`
132	`prompt_template, stop1, temperature, priming_prefix, engine=engine, n=n`
133	`)`
134	`for i in range(n):`
135	`quality = eval_candidate(`
136	`responses[i],`
137	`instruction,`
138	`eval_template,`
139	`answer_start_token,`
140	`engine=engine,`
141	`)`
142	`candidates.append((responses[i], quality))`
143
144	`candidates.sort(key=lambda x: x[1], reverse=True)`
145	`if return_all_results:`
146	`return candidates`
147	`return candidates[0][0]`
148
149
150	`def main(`
151	`nl_query: str = "Return the name of each department that had more than 10 employees in June 2021",`
152	`eval_template: str = "{};\n-- Explanation of the above query in human readable format\n-- {}",`
153	`table_definitions: str = "# Employee(id, name, department_id)\n# Department(id, name, address)\n# Salary_Payments(id, employee_id, amount, date)\n",`
154	`prompt_template: str = "### Postgres SQL tables, with their properties:\n#\n{}#\n### {}\n{}",`
155	`n: int = 3,`
156	`temperature: float = 0.3,`
157	`engine: str = "davinci-codex",`
158	`):`
159	`"""`
160	`Generate a number of SQL queries given a natural language instruction,`
161	`and pick the best one based on the highest backtranslation score.`
162
163	`:param nl_query: The natural language query.`
164	`:param eval_template: The template to use for the evaluation.`
165	`:param table_definitions: The definitions of the tables used in the query.`
166	`:param prompt_template: The template to use for the prompt to generate SQL.`
167	`:param n: The number of candidates to generate.`
168	`:param temperature: The temperature of the generation.`
169	`:param engine: The engine to use for the generation and evaluation.`
170	`:return: The best SQL query, or a list of all scored generated SQL queries.`
171	`"""`
172
173	`result = backtranslation(`
174	`prompt_template,`
175	`table_definitions,`
176	`nl_query,`
177	`eval_template,`
178	`priming_prefix="SELECT",`
179	`temperature=temperature,`
180	`n=n,`
181	`engine=engine,`
182	`)`
183	`print(result)`
184
185
186	`if __name__ == "__main__":`
187	`Smokey(main)`

openai/openai-python

Branches

Tags

Clone