openai/openai-python
Publicmirrored fromhttps://github.com/openai/openai-pythonAvailable
bin/blacken-docs.py
251lines · modecode
| 1 | # fork of https://github.com/asottile/blacken-docs implementing https://github.com/asottile/blacken-docs/issues/170 |
| 2 | from __future__ import annotations |
| 3 | |
| 4 | import re |
| 5 | import argparse |
| 6 | import textwrap |
| 7 | import contextlib |
| 8 | from typing import Match, Optional, Sequence, Generator, NamedTuple, cast |
| 9 | |
| 10 | import black |
| 11 | from black.mode import TargetVersion |
| 12 | from black.const import DEFAULT_LINE_LENGTH |
| 13 | |
| 14 | MD_RE = re.compile( |
| 15 | r"(?P<before>^(?P<indent> *)```\s*python\n)" r"(?P<code>.*?)" r"(?P<after>^(?P=indent)```\s*$)", |
| 16 | re.DOTALL | re.MULTILINE, |
| 17 | ) |
| 18 | MD_PYCON_RE = re.compile( |
| 19 | r"(?P<before>^(?P<indent> *)```\s*pycon\n)" r"(?P<code>.*?)" r"(?P<after>^(?P=indent)```.*$)", |
| 20 | re.DOTALL | re.MULTILINE, |
| 21 | ) |
| 22 | RST_PY_LANGS = frozenset(("python", "py", "sage", "python3", "py3", "numpy")) |
| 23 | BLOCK_TYPES = "(code|code-block|sourcecode|ipython)" |
| 24 | DOCTEST_TYPES = "(testsetup|testcleanup|testcode)" |
| 25 | RST_RE = re.compile( |
| 26 | rf"(?P<before>" |
| 27 | rf"^(?P<indent> *)\.\. (" |
| 28 | rf"jupyter-execute::|" |
| 29 | rf"{BLOCK_TYPES}:: (?P<lang>\w+)|" |
| 30 | rf"{DOCTEST_TYPES}::.*" |
| 31 | rf")\n" |
| 32 | rf"((?P=indent) +:.*\n)*" |
| 33 | rf"\n*" |
| 34 | rf")" |
| 35 | rf"(?P<code>(^((?P=indent) +.*)?\n)+)", |
| 36 | re.MULTILINE, |
| 37 | ) |
| 38 | RST_PYCON_RE = re.compile( |
| 39 | r"(?P<before>" |
| 40 | r"(?P<indent> *)\.\. ((code|code-block):: pycon|doctest::.*)\n" |
| 41 | r"((?P=indent) +:.*\n)*" |
| 42 | r"\n*" |
| 43 | r")" |
| 44 | r"(?P<code>(^((?P=indent) +.*)?(\n|$))+)", |
| 45 | re.MULTILINE, |
| 46 | ) |
| 47 | PYCON_PREFIX = ">>> " |
| 48 | PYCON_CONTINUATION_PREFIX = "..." |
| 49 | PYCON_CONTINUATION_RE = re.compile( |
| 50 | rf"^{re.escape(PYCON_CONTINUATION_PREFIX)}( |$)", |
| 51 | ) |
| 52 | LATEX_RE = re.compile( |
| 53 | r"(?P<before>^(?P<indent> *)\\begin{minted}{python}\n)" |
| 54 | r"(?P<code>.*?)" |
| 55 | r"(?P<after>^(?P=indent)\\end{minted}\s*$)", |
| 56 | re.DOTALL | re.MULTILINE, |
| 57 | ) |
| 58 | LATEX_PYCON_RE = re.compile( |
| 59 | r"(?P<before>^(?P<indent> *)\\begin{minted}{pycon}\n)" r"(?P<code>.*?)" r"(?P<after>^(?P=indent)\\end{minted}\s*$)", |
| 60 | re.DOTALL | re.MULTILINE, |
| 61 | ) |
| 62 | PYTHONTEX_LANG = r"(?P<lang>pyblock|pycode|pyconsole|pyverbatim)" |
| 63 | PYTHONTEX_RE = re.compile( |
| 64 | rf"(?P<before>^(?P<indent> *)\\begin{{{PYTHONTEX_LANG}}}\n)" |
| 65 | rf"(?P<code>.*?)" |
| 66 | rf"(?P<after>^(?P=indent)\\end{{(?P=lang)}}\s*$)", |
| 67 | re.DOTALL | re.MULTILINE, |
| 68 | ) |
| 69 | INDENT_RE = re.compile("^ +(?=[^ ])", re.MULTILINE) |
| 70 | TRAILING_NL_RE = re.compile(r"\n+\Z", re.MULTILINE) |
| 71 | |
| 72 | |
| 73 | class CodeBlockError(NamedTuple): |
| 74 | offset: int |
| 75 | exc: Exception |
| 76 | |
| 77 | |
| 78 | def format_str( |
| 79 | src: str, |
| 80 | black_mode: black.FileMode, |
| 81 | ) -> tuple[str, Sequence[CodeBlockError]]: |
| 82 | errors: list[CodeBlockError] = [] |
| 83 | |
| 84 | @contextlib.contextmanager |
| 85 | def _collect_error(match: Match[str]) -> Generator[None, None, None]: |
| 86 | try: |
| 87 | yield |
| 88 | except Exception as e: |
| 89 | errors.append(CodeBlockError(match.start(), e)) |
| 90 | |
| 91 | def _md_match(match: Match[str]) -> str: |
| 92 | code = textwrap.dedent(match["code"]) |
| 93 | with _collect_error(match): |
| 94 | code = black.format_str(code, mode=black_mode) |
| 95 | code = textwrap.indent(code, match["indent"]) |
| 96 | return f'{match["before"]}{code}{match["after"]}' |
| 97 | |
| 98 | def _rst_match(match: Match[str]) -> str: |
| 99 | lang = match["lang"] |
| 100 | if lang is not None and lang not in RST_PY_LANGS: |
| 101 | return match[0] |
| 102 | min_indent = min(INDENT_RE.findall(match["code"])) |
| 103 | trailing_ws_match = TRAILING_NL_RE.search(match["code"]) |
| 104 | assert trailing_ws_match |
| 105 | trailing_ws = trailing_ws_match.group() |
| 106 | code = textwrap.dedent(match["code"]) |
| 107 | with _collect_error(match): |
| 108 | code = black.format_str(code, mode=black_mode) |
| 109 | code = textwrap.indent(code, min_indent) |
| 110 | return f'{match["before"]}{code.rstrip()}{trailing_ws}' |
| 111 | |
| 112 | def _pycon_match(match: Match[str]) -> str: |
| 113 | code = "" |
| 114 | fragment = cast(Optional[str], None) |
| 115 | |
| 116 | def finish_fragment() -> None: |
| 117 | nonlocal code |
| 118 | nonlocal fragment |
| 119 | |
| 120 | if fragment is not None: |
| 121 | with _collect_error(match): |
| 122 | fragment = black.format_str(fragment, mode=black_mode) |
| 123 | fragment_lines = fragment.splitlines() |
| 124 | code += f"{PYCON_PREFIX}{fragment_lines[0]}\n" |
| 125 | for line in fragment_lines[1:]: |
| 126 | # Skip blank lines to handle Black adding a blank above |
| 127 | # functions within blocks. A blank line would end the REPL |
| 128 | # continuation prompt. |
| 129 | # |
| 130 | # >>> if True: |
| 131 | # ... def f(): |
| 132 | # ... pass |
| 133 | # ... |
| 134 | if line: |
| 135 | code += f"{PYCON_CONTINUATION_PREFIX} {line}\n" |
| 136 | if fragment_lines[-1].startswith(" "): |
| 137 | code += f"{PYCON_CONTINUATION_PREFIX}\n" |
| 138 | fragment = None |
| 139 | |
| 140 | indentation = None |
| 141 | for line in match["code"].splitlines(): |
| 142 | orig_line, line = line, line.lstrip() |
| 143 | if indentation is None and line: |
| 144 | indentation = len(orig_line) - len(line) |
| 145 | continuation_match = PYCON_CONTINUATION_RE.match(line) |
| 146 | if continuation_match and fragment is not None: |
| 147 | fragment += line[continuation_match.end() :] + "\n" |
| 148 | else: |
| 149 | finish_fragment() |
| 150 | if line.startswith(PYCON_PREFIX): |
| 151 | fragment = line[len(PYCON_PREFIX) :] + "\n" |
| 152 | else: |
| 153 | code += orig_line[indentation:] + "\n" |
| 154 | finish_fragment() |
| 155 | return code |
| 156 | |
| 157 | def _md_pycon_match(match: Match[str]) -> str: |
| 158 | code = _pycon_match(match) |
| 159 | code = textwrap.indent(code, match["indent"]) |
| 160 | return f'{match["before"]}{code}{match["after"]}' |
| 161 | |
| 162 | def _rst_pycon_match(match: Match[str]) -> str: |
| 163 | code = _pycon_match(match) |
| 164 | min_indent = min(INDENT_RE.findall(match["code"])) |
| 165 | code = textwrap.indent(code, min_indent) |
| 166 | return f'{match["before"]}{code}' |
| 167 | |
| 168 | def _latex_match(match: Match[str]) -> str: |
| 169 | code = textwrap.dedent(match["code"]) |
| 170 | with _collect_error(match): |
| 171 | code = black.format_str(code, mode=black_mode) |
| 172 | code = textwrap.indent(code, match["indent"]) |
| 173 | return f'{match["before"]}{code}{match["after"]}' |
| 174 | |
| 175 | def _latex_pycon_match(match: Match[str]) -> str: |
| 176 | code = _pycon_match(match) |
| 177 | code = textwrap.indent(code, match["indent"]) |
| 178 | return f'{match["before"]}{code}{match["after"]}' |
| 179 | |
| 180 | src = MD_RE.sub(_md_match, src) |
| 181 | src = MD_PYCON_RE.sub(_md_pycon_match, src) |
| 182 | src = RST_RE.sub(_rst_match, src) |
| 183 | src = RST_PYCON_RE.sub(_rst_pycon_match, src) |
| 184 | src = LATEX_RE.sub(_latex_match, src) |
| 185 | src = LATEX_PYCON_RE.sub(_latex_pycon_match, src) |
| 186 | src = PYTHONTEX_RE.sub(_latex_match, src) |
| 187 | return src, errors |
| 188 | |
| 189 | |
| 190 | def format_file( |
| 191 | filename: str, |
| 192 | black_mode: black.FileMode, |
| 193 | skip_errors: bool, |
| 194 | ) -> int: |
| 195 | with open(filename, encoding="UTF-8") as f: |
| 196 | contents = f.read() |
| 197 | new_contents, errors = format_str(contents, black_mode) |
| 198 | for error in errors: |
| 199 | lineno = contents[: error.offset].count("\n") + 1 |
| 200 | print(f"{filename}:{lineno}: code block parse error {error.exc}") |
| 201 | if errors and not skip_errors: |
| 202 | return 1 |
| 203 | if contents != new_contents: |
| 204 | print(f"{filename}: Rewriting...") |
| 205 | with open(filename, "w", encoding="UTF-8") as f: |
| 206 | f.write(new_contents) |
| 207 | return 0 |
| 208 | else: |
| 209 | return 0 |
| 210 | |
| 211 | |
| 212 | def main(argv: Sequence[str] | None = None) -> int: |
| 213 | parser = argparse.ArgumentParser() |
| 214 | parser.add_argument( |
| 215 | "-l", |
| 216 | "--line-length", |
| 217 | type=int, |
| 218 | default=DEFAULT_LINE_LENGTH, |
| 219 | ) |
| 220 | parser.add_argument( |
| 221 | "-t", |
| 222 | "--target-version", |
| 223 | action="append", |
| 224 | type=lambda v: TargetVersion[v.upper()], |
| 225 | default=[], |
| 226 | help=f"choices: {[v.name.lower() for v in TargetVersion]}", |
| 227 | dest="target_versions", |
| 228 | ) |
| 229 | parser.add_argument( |
| 230 | "-S", |
| 231 | "--skip-string-normalization", |
| 232 | action="store_true", |
| 233 | ) |
| 234 | parser.add_argument("-E", "--skip-errors", action="store_true") |
| 235 | parser.add_argument("filenames", nargs="*") |
| 236 | args = parser.parse_args(argv) |
| 237 | |
| 238 | black_mode = black.FileMode( |
| 239 | target_versions=set(args.target_versions), |
| 240 | line_length=args.line_length, |
| 241 | string_normalization=not args.skip_string_normalization, |
| 242 | ) |
| 243 | |
| 244 | retv = 0 |
| 245 | for filename in args.filenames: |
| 246 | retv |= format_file(filename, black_mode, skip_errors=args.skip_errors) |
| 247 | return retv |
| 248 | |
| 249 | |
| 250 | if __name__ == "__main__": |
| 251 | raise SystemExit(main()) |
| 252 | |