openai/tiktoken

Public

mirrored from https://github.com/openai/tiktokenAvailable

CodeCommitsIssuesPull requestsActionsInsightsSecurity
next

Branches

Tags

  • No tags available.
0Branches0Tags
Go to file
Add file
Code

Clone

HTTPS

Download ZIP

scripts/redact.py

67lines · modeblame

a1a9f168Shantanu Jain3 years ago1import argparse
2import re
3import subprocess
4from pathlib import Path
5
6
7def redact_file(path: Path, dry_run: bool) -> None:
8if not path.exists() or path.is_dir():
9return
10
11text = path.read_text()
40d9b1f1Shantanu Jain3 years ago12if not text:
13return
a1a9f168Shantanu Jain3 years ago14
15first_line = text.splitlines()[0]
16if "redact" in first_line:
17if not dry_run:
18path.unlink()
19print(f"Deleted {path}")
20return
21
22pattern = "|".join(
095924e0Shantanu Jain3 years ago23r" *" + re.escape(x)
a1a9f168Shantanu Jain3 years ago24for x in [
25"# ===== redact-beg =====\n",
26"# ===== redact-end =====\n",
27"<!--- redact-beg -->\n",
28"<!--- redact-end -->\n",
29]
30)
31
32if re.search(pattern, text):
33redacted_text = "".join(re.split(pattern, text)[::2])
34if not dry_run:
35path.write_text(redacted_text)
36print(f"Redacted {path}")
37return
38
39print(f"Skipped {path}")
40
41
42def redact(dry_run: bool) -> None:
43tiktoken_root = Path(__file__).parent.parent
44assert tiktoken_root.name == "tiktoken"
45assert (tiktoken_root / "pyproject.toml").exists()
46
47try:
48output = subprocess.check_output(["git", "ls-files"], cwd=tiktoken_root, text=True)
49paths = [Path(p) for p in output.splitlines()]
50except subprocess.CalledProcessError:
51paths = list(tiktoken_root.glob("**/*"))
52
53for path in paths:
54redact_file(path, dry_run=dry_run)
55
56
57def main() -> None:
58parser = argparse.ArgumentParser()
59parser.add_argument("--dry-run", type=lambda x: not x or x[0].lower() != "f", default=True)
60args = parser.parse_args()
61redact(args.dry_run)
62if args.dry_run:
63print("Dry run, use --dry-run=false to actually redact files")
64
65
66if __name__ == "__main__":
67main()