microsoft/onnxruntime-extensions

Public

mirrored fromhttps://github.com/microsoft/onnxruntime-extensionsAvailable

CodeCommitsIssuesPull requestsActionsInsightsSecurity
ba200b4a0e391b45c0df4f9b1a506f0a9f574dd4

Branches

Tags

  • No tags available.
0Branches0Tags
Go to file
Add file
Code

Clone

HTTPS

Download ZIP

test/static_test/test_strings.cc

38lines · modecode

1// Copyright (c) Microsoft Corporation. All rights reserved.
2// Licensed under the MIT License.
3
4#include "gtest/gtest.h"
5#include "string_utils.h"
6#include "text/string_regex_split_re.hpp"
7
8TEST(strings, regex_split) {
9 std::string input = "hello world";
10 re2::RE2 reg("(\\s)");
11 re2::RE2 keep_reg("\\s");
12 std::vector<std::string_view> tokens;
13 std::vector<int64_t> begin_offsets;
14 std::vector<int64_t> end_offsets;
15 RegexSplitImpl(input, reg, true, keep_reg, tokens, begin_offsets, end_offsets);
16 std::vector<std::string_view> expected_tokens{"hello", " ", " ", "world"};
17 std::vector<int64_t> expected_begin_offsets{0, 5, 6, 7};
18 std::vector<int64_t> expected_end_offsets{5, 6, 7, 12};
19 EXPECT_EQ(expected_tokens, tokens);
20 EXPECT_EQ(expected_begin_offsets, begin_offsets);
21 EXPECT_EQ(expected_end_offsets, end_offsets);
22}
23
24TEST(strings, regex_split_skip) {
25 std::string input = "hello world";
26 re2::RE2 reg("(\\s)");
27 re2::RE2 keep_reg("");
28 std::vector<std::string_view> tokens;
29 std::vector<int64_t> begin_offsets;
30 std::vector<int64_t> end_offsets;
31 RegexSplitImpl(input, reg, true, keep_reg, tokens, begin_offsets, end_offsets);
32 std::vector<std::string_view> expected_tokens{"hello", "world"};
33 std::vector<int64_t> expected_begin_offsets{0, 6};
34 std::vector<int64_t> expected_end_offsets{5, 11};
35 EXPECT_EQ(expected_tokens, tokens);
36 EXPECT_EQ(expected_begin_offsets, begin_offsets);
37 EXPECT_EQ(expected_end_offsets, end_offsets);
38}
39