microsoft/onnxruntime-extensions
Publicmirrored fromhttps://github.com/microsoft/onnxruntime-extensionsAvailable
operators/string_utils.cc
97lines · modecode
| 1 | #include "farmhash.h" |
| 2 | #include "string_utils.h" |
| 3 | |
| 4 | std::vector<std::string_view> SplitString(const std::string_view& str, const std::string_view& seps, bool remove_empty_entries) { |
| 5 | std::vector<std::string_view> result; |
| 6 | std::string ::size_type pre_pos = 0; |
| 7 | |
| 8 | while (true) { |
| 9 | auto next_pos = str.find_first_of(seps, pre_pos); |
| 10 | |
| 11 | if (next_pos == std::string::npos) { |
| 12 | auto sub_str = str.substr(pre_pos, next_pos); |
| 13 | // sub_str is empty means the last sep reach the end of string |
| 14 | if (!sub_str.empty()) { |
| 15 | result.push_back(sub_str); |
| 16 | } |
| 17 | |
| 18 | break; |
| 19 | } |
| 20 | |
| 21 | if (pre_pos != next_pos || !remove_empty_entries) { |
| 22 | auto sub_str = str.substr(pre_pos, next_pos - pre_pos); |
| 23 | result.push_back(sub_str); |
| 24 | } |
| 25 | |
| 26 | pre_pos = next_pos + 1; |
| 27 | } |
| 28 | |
| 29 | return result; |
| 30 | } |
| 31 | |
| 32 | // Source: https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/platform/hash.cc#L28 |
| 33 | static inline uint64_t ByteAs64(char c) { return static_cast<uint64_t>(c) & 0xff; } |
| 34 | |
| 35 | // Source: https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/platform/raw_coding.h#L41 |
| 36 | uint64_t DecodeFixed32(const char* ptr) { |
| 37 | return ((static_cast<uint64_t>(static_cast<unsigned char>(ptr[0]))) | |
| 38 | (static_cast<uint64_t>(static_cast<unsigned char>(ptr[1])) << 8) | |
| 39 | (static_cast<uint64_t>(static_cast<unsigned char>(ptr[2])) << 16) | |
| 40 | (static_cast<uint64_t>(static_cast<unsigned char>(ptr[3])) << 24)); |
| 41 | } |
| 42 | |
| 43 | // Source: https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/platform/raw_coding.h#L55 |
| 44 | static uint64_t DecodeFixed64(const char* ptr) { |
| 45 | uint64_t lo = DecodeFixed32(ptr); |
| 46 | uint64_t hi = DecodeFixed32(ptr + 4); |
| 47 | return (hi << 32) | lo; |
| 48 | } |
| 49 | |
| 50 | // Source: https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/platform/hash.cc#L79 |
| 51 | uint64_t Hash64(const char* data, size_t n, uint64_t seed) { |
| 52 | const uint64_t m = 0xc6a4a7935bd1e995; |
| 53 | const int r = 47; |
| 54 | |
| 55 | uint64_t h = seed ^ (n * m); |
| 56 | |
| 57 | while (n >= 8) { |
| 58 | uint64_t k = DecodeFixed64(data); |
| 59 | data += 8; |
| 60 | n -= 8; |
| 61 | |
| 62 | k *= m; |
| 63 | k ^= k >> r; |
| 64 | k *= m; |
| 65 | |
| 66 | h ^= k; |
| 67 | h *= m; |
| 68 | } |
| 69 | |
| 70 | switch (n) { |
| 71 | case 7: |
| 72 | h ^= ByteAs64(data[6]) << 48; |
| 73 | case 6: |
| 74 | h ^= ByteAs64(data[5]) << 40; |
| 75 | case 5: |
| 76 | h ^= ByteAs64(data[4]) << 32; |
| 77 | case 4: |
| 78 | h ^= ByteAs64(data[3]) << 24; |
| 79 | case 3: |
| 80 | h ^= ByteAs64(data[2]) << 16; |
| 81 | case 2: |
| 82 | h ^= ByteAs64(data[1]) << 8; |
| 83 | case 1: |
| 84 | h ^= ByteAs64(data[0]); |
| 85 | h *= m; |
| 86 | } |
| 87 | |
| 88 | h ^= h >> r; |
| 89 | h *= m; |
| 90 | h ^= h >> r; |
| 91 | |
| 92 | return h; |
| 93 | } |
| 94 | |
| 95 | uint64_t Hash64Fast(const char* data, size_t n) { |
| 96 | return static_cast<int64_t>(util::Fingerprint64(data, n)); |
| 97 | } |
| 98 | |