microsoft/qdk
Publicmirrored fromhttps://github.com/microsoft/qdkAvailable
source/npm/qsharp/markdown_latex_plugin.js
277lines · modecode
| 1 | // Copyright (c) Microsoft Corporation. |
| 2 | // Licensed under the MIT License. |
| 3 | |
| 4 | // @ts-check |
| 5 | /// <reference lib="es2022"/> |
| 6 | |
| 7 | /***** LaTeX passthrough plug-in for the markdown-it parser ***** |
| 8 | |
| 9 | When converting Markdown to HTML that may contain LaTeX, but wanting to |
| 10 | leave the LaTeX untouched, there are a number of hazards. These include: |
| 11 | |
| 12 | - Markdown escapes in the LaTeX, such as double escapes ("\\" and lines |
| 13 | ending with a "\"), will be processed, corrupting the LaTeX. This sequence |
| 14 | is common in \begin{bmatrix} structures (which is common in Quantum). |
| 15 | |
| 16 | - The LaTeX may be processed for Markdown sequences. Common examples |
| 17 | here include finding two "_" chars in the LaTeX and converting this to |
| 18 | emphasis "<em>" tags around the content. |
| 19 | |
| 20 | Disabling Markdown escapes can avoid the former, but not the latter, which |
| 21 | requires re-writing the LaTeX if possible to avoid, and can be subtle to catch. |
| 22 | |
| 23 | To avoid these issues, this plug-in detects $..$ and $$..$$ content in |
| 24 | raw Markdown and inline HTML, and passes it through the Markdown parser as-is. |
| 25 | |
| 26 | */ |
| 27 | |
| 28 | // To help make type checking annotations cleaner |
| 29 | /** @typedef {import("markdown-it/dist/markdown-it.js")} MarkdownIt */ |
| 30 | /** @typedef {import("markdown-it/dist/markdown-it.js").StateInline} StateInline */ |
| 31 | |
| 32 | // Below code to locate LaTeX blocks largely taken from @vscode/markdown-it-katex |
| 33 | // See https://github.com/microsoft/vscode-markdown-it-katex/blob/9f3e1dff0fa2e011c63cb6a05fa6e80b7624538f/src/index.ts |
| 34 | |
| 35 | /** |
| 36 | * @param {MarkdownIt} md |
| 37 | */ |
| 38 | function plugin(md) { |
| 39 | // Add rules to extract LaTeX |
| 40 | md.inline.ruler.after("escape", "math_inline", inlineMath); |
| 41 | md.inline.ruler.after("escape", "math_inline_block", inlineMathBlock); |
| 42 | |
| 43 | // Just render the LaTeX 'as-is' |
| 44 | md.renderer.rules.math_inline = (tokens, idx) => { |
| 45 | return "$" + escapeHtml(tokens[idx].content) + "$"; |
| 46 | }; |
| 47 | |
| 48 | md.renderer.rules.math_block = (tokens, idx) => { |
| 49 | return "$$" + escapeHtml(tokens[idx].content) + "$$"; |
| 50 | }; |
| 51 | } |
| 52 | |
| 53 | /** |
| 54 | * @param {string} unsafe |
| 55 | */ |
| 56 | function escapeHtml(unsafe) { |
| 57 | return unsafe |
| 58 | .replace(/&/g, "&") |
| 59 | .replace(/</g, "<") |
| 60 | .replace(/>/g, ">") |
| 61 | .replace(/"/g, """) |
| 62 | .replace(/'/g, "'"); |
| 63 | } |
| 64 | |
| 65 | /** |
| 66 | * @param {string} char |
| 67 | * @returns boolean |
| 68 | */ |
| 69 | function isWhitespace(char) { |
| 70 | return /^\s$/u.test(char); |
| 71 | } |
| 72 | |
| 73 | /** |
| 74 | * @param {string} char |
| 75 | * @returns boolean |
| 76 | */ |
| 77 | function isWordCharacterOrNumber(char) { |
| 78 | return /^[\w\d]$/u.test(char); |
| 79 | } |
| 80 | |
| 81 | /** |
| 82 | * @param {StateInline} state |
| 83 | * @param {number} pos |
| 84 | */ |
| 85 | function isValidInlineDelim(state, pos) { |
| 86 | const prevChar = state.src[pos - 1]; |
| 87 | const char = state.src[pos]; |
| 88 | const nextChar = state.src[pos + 1]; |
| 89 | if (char !== "$") { |
| 90 | return { can_open: false, can_close: false }; |
| 91 | } |
| 92 | let canOpen = false; |
| 93 | let canClose = false; |
| 94 | if ( |
| 95 | prevChar !== "$" && |
| 96 | prevChar !== "\\" && |
| 97 | (prevChar === undefined || |
| 98 | isWhitespace(prevChar) || |
| 99 | !isWordCharacterOrNumber(prevChar)) |
| 100 | ) { |
| 101 | canOpen = true; |
| 102 | } |
| 103 | if ( |
| 104 | nextChar !== "$" && |
| 105 | (nextChar == undefined || |
| 106 | isWhitespace(nextChar) || |
| 107 | !isWordCharacterOrNumber(nextChar)) |
| 108 | ) { |
| 109 | canClose = true; |
| 110 | } |
| 111 | return { can_open: canOpen, can_close: canClose }; |
| 112 | } |
| 113 | |
| 114 | /** |
| 115 | * @param {*} state |
| 116 | * @param {number} pos |
| 117 | */ |
| 118 | function isValidBlockDelim(state, pos) { |
| 119 | const prevChar = state.src[pos - 1]; |
| 120 | const char = state.src[pos]; |
| 121 | const nextChar = state.src[pos + 1]; |
| 122 | const nextCharPlus1 = state.src[pos + 2]; |
| 123 | if ( |
| 124 | char === "$" && |
| 125 | prevChar !== "$" && |
| 126 | prevChar !== "\\" && |
| 127 | nextChar === "$" && |
| 128 | nextCharPlus1 !== "$" |
| 129 | ) { |
| 130 | return { can_open: true, can_close: true }; |
| 131 | } |
| 132 | return { can_open: false, can_close: false }; |
| 133 | } |
| 134 | |
| 135 | /** |
| 136 | * @param {*} state |
| 137 | * @param {boolean} silent |
| 138 | */ |
| 139 | function inlineMath(state, silent) { |
| 140 | if (state.src[state.pos] !== "$") { |
| 141 | return false; |
| 142 | } |
| 143 | |
| 144 | let res = isValidInlineDelim(state, state.pos); |
| 145 | if (!res.can_open) { |
| 146 | if (!silent) { |
| 147 | state.pending += "$"; |
| 148 | } |
| 149 | state.pos += 1; |
| 150 | return true; |
| 151 | } |
| 152 | // First check for and bypass all properly escaped delimieters |
| 153 | // This loop will assume that the first leading backtick can not |
| 154 | // be the first character in state.src, which is known since |
| 155 | // we have found an opening delimieter already. |
| 156 | let start = state.pos + 1; |
| 157 | let match = start; |
| 158 | let pos; |
| 159 | while ((match = state.src.indexOf("$", match)) !== -1) { |
| 160 | // Found potential $, look for escapes, pos will point to |
| 161 | // first non escape when complete |
| 162 | pos = match - 1; |
| 163 | while (state.src[pos] === "\\") { |
| 164 | pos -= 1; |
| 165 | } |
| 166 | // Even number of escapes, potential closing delimiter found |
| 167 | if ((match - pos) % 2 == 1) { |
| 168 | break; |
| 169 | } |
| 170 | match += 1; |
| 171 | } |
| 172 | // No closing delimter found. Consume $ and continue. |
| 173 | if (match === -1) { |
| 174 | if (!silent) { |
| 175 | state.pending += "$"; |
| 176 | } |
| 177 | state.pos = start; |
| 178 | return true; |
| 179 | } |
| 180 | // Check if we have empty content, ie: $$. Do not parse. |
| 181 | if (match - start === 0) { |
| 182 | if (!silent) { |
| 183 | state.pending += "$$"; |
| 184 | } |
| 185 | state.pos = start + 1; |
| 186 | return true; |
| 187 | } |
| 188 | // Check for valid closing delimiter |
| 189 | res = isValidInlineDelim(state, match); |
| 190 | if (!res.can_close) { |
| 191 | if (!silent) { |
| 192 | state.pending += "$"; |
| 193 | } |
| 194 | state.pos = start; |
| 195 | return true; |
| 196 | } |
| 197 | if (!silent) { |
| 198 | const token = state.push("math_inline", "math", 0); |
| 199 | token.markup = "$"; |
| 200 | token.content = state.src.slice(start, match); |
| 201 | } |
| 202 | state.pos = match + 1; |
| 203 | return true; |
| 204 | } |
| 205 | |
| 206 | /** |
| 207 | * @param {StateInline} state |
| 208 | * @param {boolean} silent |
| 209 | */ |
| 210 | function inlineMathBlock(state, silent) { |
| 211 | var start, match, token, res, pos; |
| 212 | if (state.src.slice(state.pos, state.pos + 2) !== "$$") { |
| 213 | return false; |
| 214 | } |
| 215 | res = isValidBlockDelim(state, state.pos); |
| 216 | if (!res.can_open) { |
| 217 | if (!silent) { |
| 218 | state.pending += "$$"; |
| 219 | } |
| 220 | state.pos += 2; |
| 221 | return true; |
| 222 | } |
| 223 | // First check for and bypass all properly escaped delimieters |
| 224 | // This loop will assume that the first leading backtick can not |
| 225 | // be the first character in state.src, which is known since |
| 226 | // we have found an opening delimieter already. |
| 227 | start = state.pos + 2; |
| 228 | match = start; |
| 229 | while ((match = state.src.indexOf("$$", match)) !== -1) { |
| 230 | // Found potential $$, look for escapes, pos will point to |
| 231 | // first non escape when complete |
| 232 | pos = match - 1; |
| 233 | while (state.src[pos] === "\\") { |
| 234 | pos -= 1; |
| 235 | } |
| 236 | // Even number of escapes, potential closing delimiter found |
| 237 | if ((match - pos) % 2 == 1) { |
| 238 | break; |
| 239 | } |
| 240 | match += 2; |
| 241 | } |
| 242 | // No closing delimter found. Consume $$ and continue. |
| 243 | if (match === -1) { |
| 244 | if (!silent) { |
| 245 | state.pending += "$$"; |
| 246 | } |
| 247 | state.pos = start; |
| 248 | return true; |
| 249 | } |
| 250 | // Check if we have empty content, ie: $$$$. Do not parse. |
| 251 | if (match - start === 0) { |
| 252 | if (!silent) { |
| 253 | state.pending += "$$$$"; |
| 254 | } |
| 255 | state.pos = start + 2; |
| 256 | return true; |
| 257 | } |
| 258 | // Check for valid closing delimiter |
| 259 | res = isValidBlockDelim(state, match); |
| 260 | if (!res.can_close) { |
| 261 | if (!silent) { |
| 262 | state.pending += "$$"; |
| 263 | } |
| 264 | state.pos = start; |
| 265 | return true; |
| 266 | } |
| 267 | if (!silent) { |
| 268 | token = state.push("math_block", "math", 0); |
| 269 | token.block = true; |
| 270 | token.markup = "$$"; |
| 271 | token.content = state.src.slice(start, match); |
| 272 | } |
| 273 | state.pos = match + 2; |
| 274 | return true; |
| 275 | } |
| 276 | |
| 277 | export { plugin }; |
| 278 | |