microsoft/TypeAgent

Public

mirrored fromhttps://github.com/microsoft/TypeAgentAvailable

CodeCommitsIssuesPull requestsActionsInsightsSecurity
8ba3ebd84dd1bb6343ebae028996313cabd70764

Branches

Tags

  • No tags available.
0Branches0Tags
Go to file
Add file
Code

Clone

HTTPS

Download ZIP

python/fineTuning/dataset_convert.py

30lines · modecode

1# Copyright (c) Microsoft Corporation and Henry Lucco.
2# Licensed under the MIT License.
3
4import json
5
6from chaparral.models.data import ChapparalDataset
7from chaparral.prompts.knowledge import get_knowledge_prompt
8
9if __name__ == "__main__":
10
11 dataset_file = "./gpt4o_train_200.json"
12
13 with open(dataset_file, "r") as in_file:
14 data = json.load(in_file)
15
16 dataset = ChapparalDataset.from_list(data)
17
18 items = []
19 for pair in dataset.info_pairs:
20 items.append({
21 "instruction" : get_knowledge_prompt(pair.message),
22 "input": "",
23 "output" : pair.knowledge.to_str()
24 })
25
26 print(len(items[0]["instruction"]), len(items[0]["output"]), len(items[0]["instruction"]) + len(items[0]["output"]))
27 exit()
28
29 with open("gpt4o_train_200_converted.json", "w") as out_file:
30 json.dump(items, out_file)
31