microsoft/TypeAgent
Publicmirrored from https://github.com/microsoft/TypeAgentAvailable
python/ta/tools/vizcmp.py
107lines · modecode
| 1 | # Copyright (c) Microsoft Corporation. |
| 2 | # Licensed under the MIT License. |
| 3 | |
| 4 | import os |
| 5 | import glob |
| 6 | import re |
| 7 | import statistics |
| 8 | import sys |
| 9 | |
| 10 | from colorama import Back, Fore, Style |
| 11 | |
| 12 | |
| 13 | def main(): |
| 14 | files = sys.argv[1:] or glob.glob("evals/eval-*.txt") |
| 15 | table = {} # {file: {counter: score, ...}, ...} |
| 16 | questions = {} # {counter: question, ...} |
| 17 | |
| 18 | # Fill table with scoring data from eval files |
| 19 | for file in files: |
| 20 | with open(file, "r") as f: |
| 21 | lines = f.readlines() |
| 22 | |
| 23 | counter = None |
| 24 | for i, line in enumerate(lines): |
| 25 | if m := re.match(r"^(?:-+|\*+)\s+(\d+)\s+", line): |
| 26 | counter = int(m.group(1)) |
| 27 | elif m := re.match(r"^.*; Question:\s+(.*)$", line): |
| 28 | question = m.group(1) |
| 29 | if counter not in questions: |
| 30 | questions[counter] = question |
| 31 | elif questions[counter] != question: |
| 32 | print(f"File {file} has a different question for {counter}:") |
| 33 | print(f"< {questions[counter]}") |
| 34 | print(f"> {question}") |
| 35 | |
| 36 | i = lines.index("==================================================\n") |
| 37 | if i < 0: |
| 38 | print(f"File {file} does not contain a separator line") |
| 39 | continue |
| 40 | lines = lines[i + 1 :] |
| 41 | text = "".join(lines) |
| 42 | matches = re.findall(r"\d\.\d\d\d\(\d+\)", text) |
| 43 | if not matches: |
| 44 | print(f"File {file} does not contain any scores") |
| 45 | continue |
| 46 | # print(len(matches), matches) |
| 47 | data = {} |
| 48 | for match in matches: |
| 49 | m = re.match(r"(\d\.\d\d\d)\((\d+)\)", match) |
| 50 | assert m |
| 51 | score = float(m.group(1)) |
| 52 | counter = int(m.group(2)) |
| 53 | data[counter] = score |
| 54 | assert len(data) == len(matches) |
| 55 | table[file] = data |
| 56 | |
| 57 | # Print header |
| 58 | all_files = sorted(table.keys()) |
| 59 | print_header(all_files) |
| 60 | |
| 61 | # Print data |
| 62 | all_counters = sorted( |
| 63 | {counter for data in table.values() for counter in data.keys()}, |
| 64 | key=lambda x: table[all_files[0]].get(x, 0.0), |
| 65 | reverse=True, |
| 66 | ) |
| 67 | for counter in all_counters: |
| 68 | print(f"{counter:>3}:", end="") |
| 69 | for file in all_files: |
| 70 | score = table[file].get(counter, None) |
| 71 | if score is None: |
| 72 | output = " N/A " |
| 73 | else: |
| 74 | output = f"{score:.3f}" |
| 75 | output = f"{output:>6}" |
| 76 | if score >= 0.97: |
| 77 | output = Fore.GREEN + output + Fore.RESET |
| 78 | if score >= 0.999: |
| 79 | output = Style.BRIGHT + output + Style.RESET_ALL |
| 80 | elif score >= 0.9: |
| 81 | output = Fore.BLUE + output + Fore.RESET |
| 82 | else: |
| 83 | output = Fore.RED + output + Fore.RESET |
| 84 | if score == 0.0: |
| 85 | output = Style.BRIGHT + output + Style.RESET_ALL |
| 86 | print(output, end="") |
| 87 | print(f" {questions.get(counter)}") |
| 88 | |
| 89 | # Print header again |
| 90 | print_header(all_files) |
| 91 | |
| 92 | |
| 93 | def print_header(all_files): |
| 94 | print(" ", end="") |
| 95 | for i, file in enumerate(all_files): |
| 96 | base = os.path.basename(file) |
| 97 | m = re.match(r"eval-(\d+\w*).*\.txt", base) |
| 98 | if m: |
| 99 | label = m.group(1) |
| 100 | else: |
| 101 | label = "--" |
| 102 | print(f"{label:>6}", end="") |
| 103 | print() |
| 104 | |
| 105 | |
| 106 | if __name__ == "__main__": |
| 107 | main() |
| 108 | |