microsoft/TypeAgent
Publicmirrored fromhttps://github.com/microsoft/TypeAgentAvailable
python/ta/demo.py
75lines · modecode
| 1 | # Copyright (c) Microsoft Corporation. |
| 2 | # Licensed under the MIT License. |
| 3 | |
| 4 | import argparse |
| 5 | import asyncio |
| 6 | import os |
| 7 | import textwrap |
| 8 | import time |
| 9 | |
| 10 | from typeagent.aitools import auth |
| 11 | from typeagent.knowpro.importing import ConversationSettings |
| 12 | from typeagent.podcasts import podcast |
| 13 | |
| 14 | parser = argparse.ArgumentParser() |
| 15 | parser.add_argument( |
| 16 | "filename", |
| 17 | nargs="?", |
| 18 | type=str, |
| 19 | default=os.path.expanduser( |
| 20 | "~/TypeAgent/python/ta/testdata/Episode_53_AdrianTchaikovsky_index" |
| 21 | ), |
| 22 | ) |
| 23 | |
| 24 | |
| 25 | async def main(): |
| 26 | auth.load_dotenv() |
| 27 | args = parser.parse_args() |
| 28 | print("Create conversation settings ...") |
| 29 | settings = ConversationSettings() |
| 30 | print(f"Loading {args.filename} ...") |
| 31 | t0 = time.time() |
| 32 | pod = podcast.Podcast.read_from_file(args.filename, settings) |
| 33 | t1 = time.time() |
| 34 | print(f"Loading took {t1-t0:.3f} seconds") |
| 35 | if pod is None: |
| 36 | print("Failed to read podcast") |
| 37 | return |
| 38 | |
| 39 | term = "book" |
| 40 | print(f"\nSearching {pod.name_tag!r} for term {term!r} ...") |
| 41 | book_list = pod.semantic_ref_index.lookup_term(term) |
| 42 | if book_list is not None: |
| 43 | for scored_ord in book_list: |
| 44 | ord = scored_ord.semantic_ref_ordinal |
| 45 | assert 0 <= ord < len(pod.semantic_refs) |
| 46 | sref = pod.semantic_refs[ord] |
| 47 | assert sref.semantic_ref_ordinal == ord |
| 48 | print(f"\n{ord}: Term {term!r} has knowledge", end=" ") |
| 49 | print(f"of type {sref.knowledge_type!r} at {sref.range}:") |
| 50 | print(" ", sref.knowledge) |
| 51 | # Now dig up the messages |
| 52 | start_msg_ord = sref.range.start.message_ordinal |
| 53 | end_msg_ord = sref.range.end.message_ordinal if sref.range.end else None |
| 54 | messages = pod.messages[start_msg_ord:end_msg_ord] |
| 55 | for message, msg_ord in zip( |
| 56 | messages, range(start_msg_ord, (end_msg_ord or start_msg_ord) + 1) |
| 57 | ): |
| 58 | text = " ".join(message.text_chunks).strip() |
| 59 | wrapped = textwrap.wrap(text) |
| 60 | for line in wrapped: |
| 61 | print(f" {line}") |
| 62 | |
| 63 | print(f"\nChecking that serialize -> deserialize -> serialize is 'idempotent' ...") |
| 64 | ser1 = pod.serialize() |
| 65 | pod2 = podcast.Podcast(settings=settings) |
| 66 | pod2.deserialize(ser1) |
| 67 | ser2 = pod.serialize() |
| 68 | if ser2 != ser1: |
| 69 | print("Serialized data does not match original") |
| 70 | else: |
| 71 | print("Serialized data matches original") |
| 72 | |
| 73 | |
| 74 | if __name__ == "__main__": |
| 75 | asyncio.run(main()) |
| 76 | |