openai/tiktoken

Public

mirrored fromhttps://github.com/openai/tiktokenAvailable

CodeCommitsIssuesPull requestsActionsInsightsSecurity
0.7.0

Branches

Tags

  • No tags available.
0Branches0Tags
Go to file
Add file
Code

Clone

HTTPS

Download ZIP

tests/test_simple_public.py

42lines · modecode

1import subprocess
2import sys
3
4import tiktoken
5
6
7def test_simple():
8 # Note that there are more actual tests, they're just not currently public :-)
9 enc = tiktoken.get_encoding("gpt2")
10 assert enc.encode("hello world") == [31373, 995]
11 assert enc.decode([31373, 995]) == "hello world"
12 assert enc.encode("hello <|endoftext|>", allowed_special="all") == [31373, 220, 50256]
13
14 enc = tiktoken.get_encoding("cl100k_base")
15 assert enc.encode("hello world") == [15339, 1917]
16 assert enc.decode([15339, 1917]) == "hello world"
17 assert enc.encode("hello <|endoftext|>", allowed_special="all") == [15339, 220, 100257]
18
19 for enc_name in tiktoken.list_encoding_names():
20 enc = tiktoken.get_encoding(enc_name)
21 for token in range(10_000):
22 assert enc.encode_single_token(enc.decode_single_token_bytes(token)) == token
23
24
25def test_encoding_for_model():
26 enc = tiktoken.encoding_for_model("gpt2")
27 assert enc.name == "gpt2"
28 enc = tiktoken.encoding_for_model("text-davinci-003")
29 assert enc.name == "p50k_base"
30 enc = tiktoken.encoding_for_model("text-davinci-edit-001")
31 assert enc.name == "p50k_edit"
32 enc = tiktoken.encoding_for_model("gpt-3.5-turbo-0301")
33 assert enc.name == "cl100k_base"
34
35
36def test_optional_blobfile_dependency():
37 prog = """
38import tiktoken
39import sys
40assert "blobfile" not in sys.modules
41"""
42 subprocess.check_call([sys.executable, "-c", prog])
43