microsoft/onnxruntime-extensions
Publicmirrored fromhttps://github.com/microsoft/onnxruntime-extensionsAvailable
test/static_test/test_strings.cc
38lines · modecode
| 1 | // Copyright (c) Microsoft Corporation. All rights reserved. |
| 2 | // Licensed under the MIT License. |
| 3 | |
| 4 | #include "gtest/gtest.h" |
| 5 | #include "string_utils.h" |
| 6 | #include "text/string_regex_split_re.hpp" |
| 7 | |
| 8 | TEST(strings, regex_split) { |
| 9 | std::string input = "hello world"; |
| 10 | re2::RE2 reg("(\\s)"); |
| 11 | re2::RE2 keep_reg("\\s"); |
| 12 | std::vector<std::string_view> tokens; |
| 13 | std::vector<int64_t> begin_offsets; |
| 14 | std::vector<int64_t> end_offsets; |
| 15 | RegexSplitImpl(input, reg, true, keep_reg, tokens, begin_offsets, end_offsets); |
| 16 | std::vector<std::string_view> expected_tokens{"hello", " ", " ", "world"}; |
| 17 | std::vector<int64_t> expected_begin_offsets{0, 5, 6, 7}; |
| 18 | std::vector<int64_t> expected_end_offsets{5, 6, 7, 12}; |
| 19 | EXPECT_EQ(expected_tokens, tokens); |
| 20 | EXPECT_EQ(expected_begin_offsets, begin_offsets); |
| 21 | EXPECT_EQ(expected_end_offsets, end_offsets); |
| 22 | } |
| 23 | |
| 24 | TEST(strings, regex_split_skip) { |
| 25 | std::string input = "hello world"; |
| 26 | re2::RE2 reg("(\\s)"); |
| 27 | re2::RE2 keep_reg(""); |
| 28 | std::vector<std::string_view> tokens; |
| 29 | std::vector<int64_t> begin_offsets; |
| 30 | std::vector<int64_t> end_offsets; |
| 31 | RegexSplitImpl(input, reg, true, keep_reg, tokens, begin_offsets, end_offsets); |
| 32 | std::vector<std::string_view> expected_tokens{"hello", "world"}; |
| 33 | std::vector<int64_t> expected_begin_offsets{0, 6}; |
| 34 | std::vector<int64_t> expected_end_offsets{5, 11}; |
| 35 | EXPECT_EQ(expected_tokens, tokens); |
| 36 | EXPECT_EQ(expected_begin_offsets, begin_offsets); |
| 37 | EXPECT_EQ(expected_end_offsets, end_offsets); |
| 38 | } |
| 39 | |