openai/tiktoken

Public

mirrored fromhttps://github.com/openai/tiktokenAvailable

CodeCommitsIssuesPull requestsActionsInsightsSecurity
5818d566268693d947c06ba76c3be2e48d8c6ded

Branches

Tags

  • No tags available.
0Branches0Tags
Go to file
Add file
Code

Clone

HTTPS

Download ZIP

scripts/redact.py

67lines · modecode

1import argparse
2import re
3import subprocess
4from pathlib import Path
5
6
7def redact_file(path: Path, dry_run: bool) -> None:
8 if not path.exists() or path.is_dir():
9 return
10
11 text = path.read_text()
12 if not text:
13 return
14
15 first_line = text.splitlines()[0]
16 if "redact" in first_line:
17 if not dry_run:
18 path.unlink()
19 print(f"Deleted {path}")
20 return
21
22 pattern = "|".join(
23 r" *" + re.escape(x)
24 for x in [
25 "# ===== redact-beg =====\n",
26 "# ===== redact-end =====\n",
27 "<!--- redact-beg -->\n",
28 "<!--- redact-end -->\n",
29 ]
30 )
31
32 if re.search(pattern, text):
33 redacted_text = "".join(re.split(pattern, text)[::2])
34 if not dry_run:
35 path.write_text(redacted_text)
36 print(f"Redacted {path}")
37 return
38
39 print(f"Skipped {path}")
40
41
42def redact(dry_run: bool) -> None:
43 tiktoken_root = Path(__file__).parent.parent
44 assert tiktoken_root.name == "tiktoken"
45 assert (tiktoken_root / "pyproject.toml").exists()
46
47 try:
48 output = subprocess.check_output(["git", "ls-files"], cwd=tiktoken_root, text=True)
49 paths = [Path(p) for p in output.splitlines()]
50 except subprocess.CalledProcessError:
51 paths = list(tiktoken_root.glob("**/*"))
52
53 for path in paths:
54 redact_file(path, dry_run=dry_run)
55
56
57def main() -> None:
58 parser = argparse.ArgumentParser()
59 parser.add_argument("--dry-run", type=lambda x: not x or x[0].lower() != "f", default=True)
60 args = parser.parse_args()
61 redact(args.dry_run)
62 if args.dry_run:
63 print("Dry run, use --dry-run=false to actually redact files")
64
65
66if __name__ == "__main__":
67 main()
68