microsoft/hve-core
Publicmirrored fromhttps://github.com/microsoft/hve-coreAvailable
.github/skills/experimental/powerpoint/scripts/pptx_fonts.py
133lines · modecode
| 1 | """Font normalization, matching, and extraction utilities. |
| 2 | |
| 3 | Centralizes font-related constants and functions used by |
| 4 | build_deck.py, extract_content.py, and validate_deck.py. |
| 5 | """ |
| 6 | |
| 7 | from pptx.enum.text import PP_ALIGN |
| 8 | from pptx_colors import rgb_to_hex |
| 9 | |
| 10 | FONT_WEIGHT_SUFFIXES = ( |
| 11 | " Semibold", |
| 12 | " SemiBold", |
| 13 | " Bold", |
| 14 | " Light", |
| 15 | " Thin", |
| 16 | " Black", |
| 17 | " Medium", |
| 18 | " ExtraBold", |
| 19 | " ExtraLight", |
| 20 | ) |
| 21 | |
| 22 | ALIGNMENT_MAP = { |
| 23 | "left": PP_ALIGN.LEFT, |
| 24 | "center": PP_ALIGN.CENTER, |
| 25 | "right": PP_ALIGN.RIGHT, |
| 26 | "justify": PP_ALIGN.JUSTIFY, |
| 27 | } |
| 28 | |
| 29 | ALIGNMENT_REVERSE_MAP = {1: "left", 2: "center", 3: "right", 4: "justify"} |
| 30 | |
| 31 | |
| 32 | def normalize_font_family(name: str) -> str: |
| 33 | """Strip weight suffixes from a font name to get the base family.""" |
| 34 | for suffix in FONT_WEIGHT_SUFFIXES: |
| 35 | if name.endswith(suffix): |
| 36 | return name[: -len(suffix)] |
| 37 | return name |
| 38 | |
| 39 | |
| 40 | def font_family_matches(font_name: str, expected_fonts: set[str]) -> bool: |
| 41 | """Check if a font matches expected fonts. |
| 42 | |
| 43 | Weight variants (e.g. Segoe UI Semibold) are treated as |
| 44 | compatible with the base family. |
| 45 | """ |
| 46 | if font_name in expected_fonts: |
| 47 | return True |
| 48 | base = font_name |
| 49 | for suffix in FONT_WEIGHT_SUFFIXES: |
| 50 | if font_name.endswith(suffix): |
| 51 | base = font_name[: -len(suffix)] |
| 52 | break |
| 53 | for expected in expected_fonts: |
| 54 | exp_base = expected |
| 55 | for suffix in FONT_WEIGHT_SUFFIXES: |
| 56 | if expected.endswith(suffix): |
| 57 | exp_base = expected[: -len(suffix)] |
| 58 | break |
| 59 | if base == exp_base: |
| 60 | return True |
| 61 | return False |
| 62 | |
| 63 | |
| 64 | def extract_font_info(font) -> dict: |
| 65 | """Extract font information from a python-pptx font object.""" |
| 66 | info = {} |
| 67 | if font.name: |
| 68 | info["font"] = font.name |
| 69 | if font.size: |
| 70 | info["size"] = int(font.size.pt) |
| 71 | try: |
| 72 | if font.color and font.color.rgb: |
| 73 | info["color"] = rgb_to_hex(font.color.rgb) |
| 74 | except (AttributeError, TypeError): |
| 75 | pass |
| 76 | if font.bold: |
| 77 | info["bold"] = True |
| 78 | if font.italic: |
| 79 | info["italic"] = True |
| 80 | if font.underline: |
| 81 | info["underline"] = True |
| 82 | # Character spacing (spc attribute in hundredths of a point) |
| 83 | spc = _extract_char_spacing(font) |
| 84 | if spc is not None: |
| 85 | info["char_spacing"] = spc |
| 86 | return info |
| 87 | |
| 88 | |
| 89 | def _extract_char_spacing(font) -> float | None: |
| 90 | """Extract character spacing from font's underlying XML (a:rPr spc attribute). |
| 91 | |
| 92 | Returns spacing in points (spc is stored in hundredths of a point). |
| 93 | """ |
| 94 | try: |
| 95 | rpr = font._element |
| 96 | spc_val = rpr.get("spc") |
| 97 | if spc_val is not None: |
| 98 | return int(spc_val) / 100.0 |
| 99 | except (AttributeError, TypeError): |
| 100 | pass |
| 101 | return None |
| 102 | |
| 103 | |
| 104 | def extract_paragraph_font(paragraph) -> dict: |
| 105 | """Extract font properties from a paragraph's default run properties. |
| 106 | |
| 107 | python-pptx exposes paragraph-level defaults via ``paragraph.font``. |
| 108 | Many PPTX files store styling here rather than on individual runs. |
| 109 | """ |
| 110 | info = {} |
| 111 | font = paragraph.font |
| 112 | if font.name: |
| 113 | info["font"] = font.name |
| 114 | if font.size: |
| 115 | info["size"] = int(font.size.pt) |
| 116 | try: |
| 117 | if font.color and font.color.rgb: |
| 118 | info["color"] = rgb_to_hex(font.color.rgb) |
| 119 | except (AttributeError, TypeError): |
| 120 | pass |
| 121 | if font.bold is True: |
| 122 | info["bold"] = True |
| 123 | if font.italic is True: |
| 124 | info["italic"] = True |
| 125 | return info |
| 126 | |
| 127 | |
| 128 | def extract_alignment(paragraph) -> str | None: |
| 129 | """Map a paragraph alignment enum to a string.""" |
| 130 | al = paragraph.alignment |
| 131 | if al is None: |
| 132 | return None |
| 133 | return ALIGNMENT_REVERSE_MAP.get(int(al)) |
| 134 | |