microsoft/qdk

Public

mirrored fromhttps://github.com/microsoft/qdkAvailable

CodeCommitsIssuesPull requestsActionsInsightsSecurity
v1.17.0

Branches

Tags

  • No tags available.
0Branches0Tags
Go to file
Add file
Code

Clone

HTTPS

Download ZIP

compiler/qsc_data_structures/src/line_column/tests.rs

398lines · modecode

1// Copyright (c) Microsoft Corporation.
2// Licensed under the MIT License.
3
4use crate::span::Span;
5
6use super::{Encoding, Position, Range};
7use expect_test::expect;
8use std::fmt::Write;
9
10#[test]
11fn empty_string() {
12 let contents = "";
13 let pos = Position::from_utf8_byte_offset(Encoding::Utf8, contents, 0);
14 expect![[r"
15 Position {
16 line: 0,
17 column: 0,
18 }
19 "]]
20 .assert_debug_eq(&pos);
21}
22
23#[test]
24fn offset_out_of_bounds() {
25 let contents = "hello";
26 let pos = Position::from_utf8_byte_offset(Encoding::Utf8, contents, 10);
27 // Sould return the <eof> position
28 expect![[r"
29 Position {
30 line: 0,
31 column: 5,
32 }
33 "]]
34 .assert_debug_eq(&pos);
35}
36
37#[allow(clippy::cast_possible_truncation)]
38#[test]
39fn position_out_of_bounds() {
40 let contents = "hello";
41 // A position that is off range for the given string
42 let pos = Position {
43 line: 10,
44 column: 10,
45 };
46 let offset = pos.to_utf8_byte_offset(Encoding::Utf8, contents);
47 // Sould return the <eof> offset
48 assert!(offset == contents.len() as u32);
49}
50
51#[test]
52fn one_line() {
53 let contents = "Hello, world!";
54 check_all_offsets(
55 contents,
56 &expect![[r"
57 byte | utf-8 | utf-16 | char
58 0 | 0, 0 | 0, 0 | 'H'
59 1 | 0, 1 | 0, 1 | 'e'
60 2 | 0, 2 | 0, 2 | 'l'
61 3 | 0, 3 | 0, 3 | 'l'
62 4 | 0, 4 | 0, 4 | 'o'
63 5 | 0, 5 | 0, 5 | ','
64 6 | 0, 6 | 0, 6 | ' '
65 7 | 0, 7 | 0, 7 | 'w'
66 8 | 0, 8 | 0, 8 | 'o'
67 9 | 0, 9 | 0, 9 | 'r'
68 10 | 0,10 | 0,10 | 'l'
69 11 | 0,11 | 0,11 | 'd'
70 12 | 0,12 | 0,12 | '!'
71 13 | 0,13 | 0,13 | <eof>
72 "]],
73 );
74}
75
76#[test]
77fn lines() {
78 let contents = "line1\nline2\nline3";
79 check_all_offsets(
80 contents,
81 &expect![[r"
82 byte | utf-8 | utf-16 | char
83 0 | 0, 0 | 0, 0 | 'l'
84 1 | 0, 1 | 0, 1 | 'i'
85 2 | 0, 2 | 0, 2 | 'n'
86 3 | 0, 3 | 0, 3 | 'e'
87 4 | 0, 4 | 0, 4 | '1'
88 5 | 0, 5 | 0, 5 | '\n'
89 6 | 1, 0 | 1, 0 | 'l'
90 7 | 1, 1 | 1, 1 | 'i'
91 8 | 1, 2 | 1, 2 | 'n'
92 9 | 1, 3 | 1, 3 | 'e'
93 10 | 1, 4 | 1, 4 | '2'
94 11 | 1, 5 | 1, 5 | '\n'
95 12 | 2, 0 | 2, 0 | 'l'
96 13 | 2, 1 | 2, 1 | 'i'
97 14 | 2, 2 | 2, 2 | 'n'
98 15 | 2, 3 | 2, 3 | 'e'
99 16 | 2, 4 | 2, 4 | '3'
100 17 | 2, 5 | 2, 5 | <eof>
101 "]],
102 );
103}
104
105#[test]
106fn newline_at_end() {
107 let contents = "Hello, world!\n";
108 check_all_offsets(
109 contents,
110 &expect![[r"
111 byte | utf-8 | utf-16 | char
112 0 | 0, 0 | 0, 0 | 'H'
113 1 | 0, 1 | 0, 1 | 'e'
114 2 | 0, 2 | 0, 2 | 'l'
115 3 | 0, 3 | 0, 3 | 'l'
116 4 | 0, 4 | 0, 4 | 'o'
117 5 | 0, 5 | 0, 5 | ','
118 6 | 0, 6 | 0, 6 | ' '
119 7 | 0, 7 | 0, 7 | 'w'
120 8 | 0, 8 | 0, 8 | 'o'
121 9 | 0, 9 | 0, 9 | 'r'
122 10 | 0,10 | 0,10 | 'l'
123 11 | 0,11 | 0,11 | 'd'
124 12 | 0,12 | 0,12 | '!'
125 13 | 0,13 | 0,13 | '\n'
126 14 | 1, 0 | 1, 0 | <eof>
127 "]],
128 );
129}
130
131#[test]
132fn windows_crlf_line_breaks() {
133 let contents = "line1\r\nline2\r\n";
134 check_all_offsets(
135 contents,
136 &expect![[r"
137 byte | utf-8 | utf-16 | char
138 0 | 0, 0 | 0, 0 | 'l'
139 1 | 0, 1 | 0, 1 | 'i'
140 2 | 0, 2 | 0, 2 | 'n'
141 3 | 0, 3 | 0, 3 | 'e'
142 4 | 0, 4 | 0, 4 | '1'
143 5 | 0, 5 | 0, 5 | '\r'
144 6 | 0, 6 | 0, 6 | '\n'
145 7 | 1, 0 | 1, 0 | 'l'
146 8 | 1, 1 | 1, 1 | 'i'
147 9 | 1, 2 | 1, 2 | 'n'
148 10 | 1, 3 | 1, 3 | 'e'
149 11 | 1, 4 | 1, 4 | '2'
150 12 | 1, 5 | 1, 5 | '\r'
151 13 | 1, 6 | 1, 6 | '\n'
152 14 | 2, 0 | 2, 0 | <eof>
153 "]],
154 );
155}
156
157#[test]
158fn utf_8_multibyte() {
159 // utf-8 encoding has multi-unit characters, utf-16 doesn't
160 // string | ççç
161 // chars | ç ç ç
162 // code points | e7 e7 e7
163 // utf-8 units | c3a7 c3a7 c3a7
164 // utf-16 units | 00e7 00e7 00e7
165 let contents = "ççç\nççç";
166 check_all_offsets(
167 contents,
168 &expect![[r"
169 byte | utf-8 | utf-16 | char
170 0 | 0, 0 | 0, 0 | 'ç'
171 1 | 0, 2 | 0, 1 |
172 2 | 0, 2 | 0, 1 | 'ç'
173 3 | 0, 4 | 0, 2 |
174 4 | 0, 4 | 0, 2 | 'ç'
175 5 | 0, 6 | 0, 3 |
176 6 | 0, 6 | 0, 3 | '\n'
177 7 | 1, 0 | 1, 0 | 'ç'
178 8 | 1, 2 | 1, 1 |
179 9 | 1, 2 | 1, 1 | 'ç'
180 10 | 1, 4 | 1, 2 |
181 11 | 1, 4 | 1, 2 | 'ç'
182 12 | 1, 6 | 1, 3 |
183 13 | 1, 6 | 1, 3 | <eof>
184 "]],
185 );
186}
187
188#[test]
189fn utf_8_multibyte_utf_16_surrogate() {
190 // both encodings have multi-unit characters
191 // string | 𝑓𝑓
192 // chars | 𝑓 𝑓
193 // code points | 1d453 1d453
194 // utf-8 units | f09d9193 f09d9193
195 // utf-16 units | d835 dc53 d835 dc53
196
197 let contents = "𝑓𝑓\n𝑓𝑓";
198 check_all_offsets(
199 contents,
200 &expect![[r"
201 byte | utf-8 | utf-16 | char
202 0 | 0, 0 | 0, 0 | '𝑓'
203 1 | 0, 4 | 0, 2 |
204 2 | 0, 4 | 0, 2 |
205 3 | 0, 4 | 0, 2 |
206 4 | 0, 4 | 0, 2 | '𝑓'
207 5 | 0, 8 | 0, 4 |
208 6 | 0, 8 | 0, 4 |
209 7 | 0, 8 | 0, 4 |
210 8 | 0, 8 | 0, 4 | '\n'
211 9 | 1, 0 | 1, 0 | '𝑓'
212 10 | 1, 4 | 1, 2 |
213 11 | 1, 4 | 1, 2 |
214 12 | 1, 4 | 1, 2 |
215 13 | 1, 4 | 1, 2 | '𝑓'
216 14 | 1, 8 | 1, 4 |
217 15 | 1, 8 | 1, 4 |
218 16 | 1, 8 | 1, 4 |
219 17 | 1, 8 | 1, 4 | <eof>
220 "]],
221 );
222}
223
224#[test]
225fn grapheme_clusters() {
226 // grapheme clusters, both encodings have multi-unit characters
227 // string | 𝑓(𝑥⃗) ≔ Σᵢ 𝑥ᵢ 𝑟ᵢ
228 // chars | 𝑓 ( 𝑥 ⃗ ) ≔ Σ ᵢ 𝑥 ᵢ 𝑟 ᵢ
229 // code points | 1d453 28 1d465 20d7 29 20 2254 20 3a3 1d62 20 1d465 1d62 20 1d45f 1d62
230 // utf-8 units | f09d9193 28 f09d91a5 e28397 29 20 e28994 20 cea3 e1b5a2 20 f09d91a5 e1b5a2 20 f09d919f e1b5a2
231 // utf-16 units | d835 dc53 0028 d835 dc65 20d7 0029 0020 2254 0020 03a3 1d62 0020 d835 dc65 1d62 0020 d835 dc5f 1d62
232
233 let contents = "𝑓(𝑥⃗) ≔ Σᵢ 𝑥ᵢ 𝑟ᵢ";
234 check_all_offsets(
235 contents,
236 &expect![[r"
237 byte | utf-8 | utf-16 | char
238 0 | 0, 0 | 0, 0 | '𝑓'
239 1 | 0, 4 | 0, 2 |
240 2 | 0, 4 | 0, 2 |
241 3 | 0, 4 | 0, 2 |
242 4 | 0, 4 | 0, 2 | '('
243 5 | 0, 5 | 0, 3 | '𝑥'
244 6 | 0, 9 | 0, 5 |
245 7 | 0, 9 | 0, 5 |
246 8 | 0, 9 | 0, 5 |
247 9 | 0, 9 | 0, 5 | '\u{20d7}'
248 10 | 0,12 | 0, 6 |
249 11 | 0,12 | 0, 6 |
250 12 | 0,12 | 0, 6 | ')'
251 13 | 0,13 | 0, 7 | ' '
252 14 | 0,14 | 0, 8 | '≔'
253 15 | 0,17 | 0, 9 |
254 16 | 0,17 | 0, 9 |
255 17 | 0,17 | 0, 9 | ' '
256 18 | 0,18 | 0,10 | 'Σ'
257 19 | 0,20 | 0,11 |
258 20 | 0,20 | 0,11 | 'ᵢ'
259 21 | 0,23 | 0,12 |
260 22 | 0,23 | 0,12 |
261 23 | 0,23 | 0,12 | ' '
262 24 | 0,24 | 0,13 | '𝑥'
263 25 | 0,28 | 0,15 |
264 26 | 0,28 | 0,15 |
265 27 | 0,28 | 0,15 |
266 28 | 0,28 | 0,15 | 'ᵢ'
267 29 | 0,31 | 0,16 |
268 30 | 0,31 | 0,16 |
269 31 | 0,31 | 0,16 | ' '
270 32 | 0,32 | 0,17 | '𝑟'
271 33 | 0,36 | 0,19 |
272 34 | 0,36 | 0,19 |
273 35 | 0,36 | 0,19 |
274 36 | 0,36 | 0,19 | 'ᵢ'
275 37 | 0,39 | 0,20 |
276 38 | 0,39 | 0,20 |
277 39 | 0,39 | 0,20 | <eof>
278 "]],
279 );
280}
281
282#[test]
283fn empty_range() {
284 let contents = "hello";
285 let span = Span { lo: 1, hi: 1 };
286 let range = Range::from_span(Encoding::Utf8, contents, &span);
287 expect![[r"
288 Range {
289 start: Position {
290 line: 0,
291 column: 1,
292 },
293 end: Position {
294 line: 0,
295 column: 1,
296 },
297 }
298 "]]
299 .assert_debug_eq(&range);
300}
301
302#[test]
303fn range_across_lines() {
304 let contents = "line1\nline2";
305 let span = Span { lo: 0, hi: 10 };
306 let range = Range::from_span(Encoding::Utf8, contents, &span);
307 expect![[r"
308 Range {
309 start: Position {
310 line: 0,
311 column: 0,
312 },
313 end: Position {
314 line: 1,
315 column: 4,
316 },
317 }
318 "]]
319 .assert_debug_eq(&range);
320}
321
322#[test]
323fn range_out_of_bounds() {
324 let contents = "hello";
325 let span = Span { lo: 6, hi: 10 };
326 let range = Range::from_span(Encoding::Utf8, contents, &span);
327 expect![[r"
328 Range {
329 start: Position {
330 line: 0,
331 column: 5,
332 },
333 end: Position {
334 line: 0,
335 column: 5,
336 },
337 }
338 "]]
339 .assert_debug_eq(&range);
340}
341
342#[allow(clippy::cast_possible_truncation)]
343fn check_all_offsets(contents: &str, expected: &expect_test::Expect) {
344 let byte_offsets = 0..=contents.len();
345 let positions = byte_offsets
346 .map(|offset| {
347 (
348 offset,
349 Position::from_utf8_byte_offset(
350 Encoding::Utf8,
351 contents,
352 u32::try_from(offset).expect("offset should fit in u32"),
353 ),
354 Position::from_utf8_byte_offset(
355 Encoding::Utf16,
356 contents,
357 u32::try_from(offset).expect("offset should fit in u32"),
358 ),
359 )
360 })
361 .collect::<Vec<_>>();
362
363 // Generate a table for visual validation
364 let mut string = String::new();
365 let _ = writeln!(string, "byte | utf-8 | utf-16 | char");
366 for (offset, utf8pos, utf16pos) in &positions {
367 let char = if *offset == contents.len() {
368 " <eof>".to_string()
369 } else {
370 contents
371 .char_indices()
372 .find_map(|(i, c)| {
373 if i == *offset {
374 Some(format!(" {c:?}"))
375 } else {
376 None
377 }
378 })
379 .unwrap_or(String::new())
380 };
381
382 let _ = writeln!(
383 string,
384 "{offset: >4} | {: >2},{: >2} | {: >2},{: >2} |{}",
385 utf8pos.line, utf8pos.column, utf16pos.line, utf16pos.column, char
386 );
387 }
388
389 expected.assert_eq(&string);
390
391 // also validate that we correctly map back to the original utf-8 byte offset
392 for (offset, utf8pos, utf16pos) in positions {
393 if contents.is_char_boundary(offset) {
394 assert!(utf8pos.to_utf8_byte_offset(Encoding::Utf8, contents) == offset as u32);
395 assert!(utf16pos.to_utf8_byte_offset(Encoding::Utf16, contents) == offset as u32);
396 }
397 }
398}
399