microsoft/onnxruntime-extensions

Public

mirrored fromhttps://github.com/microsoft/onnxruntime-extensionsAvailable

CodeCommitsIssuesPull requestsActionsInsightsSecurity
1ae69c0f7aeaab9911cf8ebf86ee92b34dadd26e

Branches

Tags

  • No tags available.
0Branches0Tags
Go to file
Add file
Code

Clone

HTTPS

Download ZIP

operators/string_utils.cc

103lines · modecode

1#ifdef ENABLE_TF_STRING
2#include "farmhash.h"
3#endif
4
5#include "string_utils.h"
6
7std::vector<std::string_view> SplitString(const std::string_view& str, const std::string_view& seps, bool remove_empty_entries) {
8 std::vector<std::string_view> result;
9 std::string ::size_type pre_pos = 0;
10
11 while (true) {
12 auto next_pos = str.find_first_of(seps, pre_pos);
13
14 if (next_pos == std::string::npos) {
15 auto sub_str = str.substr(pre_pos, next_pos);
16 // sub_str is empty means the last sep reach the end of string
17 if (!sub_str.empty()) {
18 result.push_back(sub_str);
19 }
20
21 break;
22 }
23
24 if (pre_pos != next_pos || !remove_empty_entries) {
25 auto sub_str = str.substr(pre_pos, next_pos - pre_pos);
26 result.push_back(sub_str);
27 }
28
29 pre_pos = next_pos + 1;
30 }
31
32 return result;
33}
34
35#ifdef ENABLE_TF_STRING
36// Source: https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/platform/hash.cc#L28
37static inline uint64_t ByteAs64(char c) { return static_cast<uint64_t>(c) & 0xff; }
38
39// Source: https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/platform/raw_coding.h#L41
40uint64_t DecodeFixed32(const char* ptr) {
41 return ((static_cast<uint64_t>(static_cast<unsigned char>(ptr[0]))) |
42 (static_cast<uint64_t>(static_cast<unsigned char>(ptr[1])) << 8) |
43 (static_cast<uint64_t>(static_cast<unsigned char>(ptr[2])) << 16) |
44 (static_cast<uint64_t>(static_cast<unsigned char>(ptr[3])) << 24));
45}
46
47// Source: https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/platform/raw_coding.h#L55
48static uint64_t DecodeFixed64(const char* ptr) {
49 uint64_t lo = DecodeFixed32(ptr);
50 uint64_t hi = DecodeFixed32(ptr + 4);
51 return (hi << 32) | lo;
52}
53
54// Source: https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/platform/hash.cc#L79
55uint64_t Hash64(const char* data, size_t n, uint64_t seed) {
56 const uint64_t m = 0xc6a4a7935bd1e995;
57 const int r = 47;
58
59 uint64_t h = seed ^ (n * m);
60
61 while (n >= 8) {
62 uint64_t k = DecodeFixed64(data);
63 data += 8;
64 n -= 8;
65
66 k *= m;
67 k ^= k >> r;
68 k *= m;
69
70 h ^= k;
71 h *= m;
72 }
73
74 switch (n) {
75 case 7:
76 h ^= ByteAs64(data[6]) << 48;
77 case 6:
78 h ^= ByteAs64(data[5]) << 40;
79 case 5:
80 h ^= ByteAs64(data[4]) << 32;
81 case 4:
82 h ^= ByteAs64(data[3]) << 24;
83 case 3:
84 h ^= ByteAs64(data[2]) << 16;
85 case 2:
86 h ^= ByteAs64(data[1]) << 8;
87 case 1:
88 h ^= ByteAs64(data[0]);
89 h *= m;
90 }
91
92 h ^= h >> r;
93 h *= m;
94 h ^= h >> r;
95
96 return h;
97}
98
99uint64_t Hash64Fast(const char* data, size_t n) {
100 return static_cast<int64_t>(util::Fingerprint64(data, n));
101}
102
103#endif // ENABLE_TF_STRING
104