microsoft/onnxruntime-extensions

Public

mirrored fromhttps://github.com/microsoft/onnxruntime-extensionsAvailable

CodeCommitsIssuesPull requestsActionsInsightsSecurity
zhanxi/debug_linux_wheel

Branches

Tags

  • No tags available.
0Branches0Tags
Go to file
Add file
Code

Clone

HTTPS

Download ZIP

cmake/externals/farmhash/dev/builder.cc

480lines · modecode

1// Copyright (c) 2014 Google, Inc.
2//
3// Permission is hereby granted, free of charge, to any person obtaining a copy
4// of this software and associated documentation files (the "Software"), to deal
5// in the Software without restriction, including without limitation the rights
6// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7// copies of the Software, and to permit persons to whom the Software is
8// furnished to do so, subject to the following conditions:
9//
10// The above copyright notice and this permission notice shall be included in
11// all copies or substantial portions of the Software.
12//
13// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19// THE SOFTWARE.
20//
21// FarmHash, by Geoff Pike
22
23#include <cstdio>
24#include <unordered_map>
25#include <unordered_set>
26#include <iostream>
27#include <fstream>
28#include <string>
29#include <vector>
30#include <assert.h>
31#include <stdlib.h>
32#include <string.h>
33
34static bool debug = false;
35
36using namespace std;
37
38const char* smhasher_dir = "/var/tmp/smhasher";
39const char* get_smhasher = "svn checkout -r151 http://smhasher.googlecode.com/svn/trunk/";
40const char* default_parallelism = "16";
41
42string dir;
43string parallelism;
44
45int System(const string& cmd) {
46 if (debug) cerr << cmd << '\n';
47 return system(cmd.c_str());
48}
49
50string itoa(int n) {
51 char buf[20];
52 sprintf(buf, "%d", n);
53 return string(buf);
54}
55
56string ToUpper(const string& s) {
57 string result(s);
58 for (int i = 0; i < result.size(); i++) {
59 if (islower(result[i])) result[i] += 'A' - 'a';
60 }
61 return result;
62}
63
64void DoWithNoOutputExpected(const string& s) {
65 static int counter = 0;
66 cout << s << '\n';
67 const string o = dir + "/e_" + itoa(counter++);
68 string cmd = "(" + s + ")>&" + o;
69 const int r = System(cmd);
70 const bool success = r == 0;
71 cmd = "cmp -s /dev/null " + o;
72 const bool no_output = System(cmd) == 0;
73 if (no_output && success) return;
74 if (!no_output) {
75 cerr << "Expected no output from '" << s << "' but got:";
76 cmd = "cat " + o;
77 const int q = System(cmd);
78 assert(q == 0);
79 }
80 if (r != 0) {
81 cerr << "Expected exit code 0 from '" << s << "' but got " << r << '\n';
82 }
83 abort();
84}
85
86vector<string> FileContents(const string& path) {
87 fstream f;
88 vector<string> result(1);
89 f.open(path.c_str(), std::fstream::in);
90 while (f.good()) {
91 char c = f.get();
92 if (!f.good()) break;
93 if (c == '\n') {
94 result.push_back("");
95 } else {
96 result.back().push_back(c);
97 }
98 }
99 f.close();
100 if (result.back().empty()) result.resize(result.size() - 1);
101 return result;
102}
103
104vector<string> DoAndCollectOutput(const string& s) {
105 static int counter = 0;
106 cout << s << '\n';
107 string o = dir + "/o_" + itoa(counter++);
108 string cmd = "(" + s + ")>&" + o;
109 int r = System(cmd);
110 if (r != 0) cout << "exit code: " << r << '\n';
111 return FileContents(o);
112}
113
114void DoAndIgnoreOutput(const string& s) {
115 static int counter = 0;
116 cout << s << '\n';
117 string o = dir + "/i_" + itoa(counter++);
118 string cmd = "(" + s + ")>&" + o;
119 int r = System(cmd);
120 if (r != 0) cout << "exit code: " << r << '\n';
121}
122
123void DoAndShowOutput(const string& s) {
124 for (const string& l : DoAndCollectOutput(s)) cout << l << '\n';
125}
126
127string FindFarm(const string& s) {
128 if (s.find("decl") == string::npos) return "";
129 if (s.find(" not ") == string::npos) return "";
130 if (s.find("farmhash") == string::npos) return "";
131 for (int i = 0; i < s.size() - 10; i++) {
132 if (string(s.c_str() + i, s.c_str() + i + 8) == "farmhash" &&
133 isalpha(s[i+8]) &&
134 isalpha(s[i+9])) {
135 return string(s.c_str() + i, s.c_str() + i + 10);
136 }
137 }
138 return "";
139}
140
141void AppendFileToFile(const string& f0, const string& f1) {
142 DoWithNoOutputExpected("cat " + f0 + " >> " + f1);
143}
144
145void AppendToFile(const string& s, const string& filename) {
146 fstream f;
147 f.open(filename.c_str(), std::fstream::out | std::fstream::app);
148 f << s;
149 f.close();
150}
151
152void CreateTestBoilerplate(const string& inputfile, const string& boilerplate) {
153 vector<string> files_to_use;
154 static const char* consider[] = {
155 "Hash32", "Hash32WithSeed",
156 "Hash64", "Hash64WithSeed", "Hash64WithSeeds" };
157 for (const char* c : consider) {
158 string cmd = "fgrep '" + string(c) + "(const char' " + inputfile +
159 " > /dev/null";
160 if (System(cmd) == 0) files_to_use.push_back(c);
161 }
162 DoWithNoOutputExpected("touch " + boilerplate);
163 if (!files_to_use.empty()) {
164 AppendToFile("#if FARMHASH_TEST\n", boilerplate);
165 for (const auto& f : files_to_use) {
166 AppendFileToFile("TESTBOILERPLATE" + f, boilerplate);
167 }
168 AppendToFile("#endif // FARMHASH_TEST\n", boilerplate);
169 }
170}
171
172static unordered_map<string, vector<string>>* deps = NULL;
173
174static int files_included = 0;
175
176// We need the code in the given namespace. If it depends on code
177// in other namespaces then process those first. Do nothing if
178// the given namespace has already been handled by this function.
179void IncludeCode(const string& name) {
180 static unordered_set<string>* started = NULL;
181 static unordered_set<string>* finished = NULL;
182 if (finished == NULL) {
183 finished = new unordered_set<string>();
184 started = new unordered_set<string>();
185 }
186 if (started->count(name) > finished->count(name)) {
187 cerr << "ERROR: Circular dependence involving " << name << "!\n";
188 abort();
189 }
190 if (started->insert(name).second) {
191 for (const string& dep : (*deps)[name]) IncludeCode(dep);
192 cout << "Include " << name << '\n';
193 string inputfile = name + ".cc";
194 string outputfile = dir + "/" + name + "_gen.cc";
195 string boilerplate = dir + "/" + name + "_gen_test_boilerplate.cc";
196 CreateTestBoilerplate(inputfile, boilerplate);
197 DoWithNoOutputExpected("(echo 'namespace " + name + " {'; "
198 "cat " + inputfile + " " + boilerplate + "; "
199 "echo '} // namespace " + name + "')>&" +
200 outputfile + " && chmod 444 " + outputfile);
201 finished->insert(name);
202 ++files_included;
203 }
204}
205
206vector<string> Split(const string& s, char delim) {
207 vector<string> result(1);
208 for (int i = 0; i < s.size(); i++) {
209 char c = s[i];
210 if (c == delim) {
211 result.push_back("");
212 } else {
213 result.back().push_back(c);
214 }
215 }
216 if (result.back().empty()) result.resize(result.size() - 1);
217 return result;
218}
219
220void CheckForDir(const string& dir) {
221 DoWithNoOutputExpected("test -d " + dir + " || echo missing directory");
222}
223
224// For example, from "xu32_871q:" the result is 32.
225string GetFirstNumeral(const string& s) {
226 int i = 0;
227 do {
228 assert(i < s.size());
229 if (isdigit(s[i])) break;
230 } while (++i);
231 int j = i + 1;
232 while (j < s.size() && isdigit(s[j])) ++j;
233 return string(s.c_str() + i, s.c_str() + j);
234}
235
236void ModifySMHasherForTest(const string& nspace,
237 const string& fn,
238 const string& testdir) {
239 IncludeCode(nspace);
240 // For the calculation of v, use an explicit temporary file to avoid
241 // "fgrep: write error: Broken pipe" errors.
242 const string tmp = testdir + "/tmp";
243 vector<string> v =
244 DoAndCollectOutput("cat " + dir + "/" + nspace + "*.cc | "
245 "fgrep -v -i static | "
246 "fgrep '{' > " + tmp +
247 " && "
248 "fgrep --max-count=1 ' " + fn + "(const char' " + tmp +
249 " && " +
250 "rm " + tmp);
251 assert(v.size() == 1);
252 const string defline = v[0];
253 if (defline[defline.size() - 1] != '{') {
254 cerr << "Line defining " << fn << " should end in '{'" << '\n';
255 abort();
256 }
257 // Get summary of declared return type and args
258 v = DoAndCollectOutput("sed s/" + fn + "//<<<'" + defline + "' "
259 "| tr -cd a-z0-9");
260 assert(v.size() == 1);
261 const string bits = GetFirstNumeral(v[0]);
262 // Create wrapper function
263 const string wrapper = testdir + "/wrapper.cc";
264 DoWithNoOutputExpected("sed s/FUNCTION/" + nspace + "::" + fn + "/g"
265 " < WRAPPER" + v[0] + " > " + wrapper);
266 const string testfile = nspace + "_test.cc";
267 // Put relevant farmhash code and wrapper function together in one file, and
268 // include that file in SMHasher's main.cpp.
269 DoWithNoOutputExpected("echo '#define FARMHASH_TEST 1' | "
270 "cat - farmhash.h platform.cc basics.cc > " +
271 testdir + "/" + testfile);
272 DoWithNoOutputExpected("ls -tr " + dir + "/*_gen.cc | "
273 "xargs cat >> " + testdir + "/" + testfile);
274 DoWithNoOutputExpected("cat f.cc " + wrapper + " >> " + testdir + "/" + testfile);
275 DoWithNoOutputExpected("cd " + testdir + " && "
276 "sed -i '/include \"Platform.h\"/a "
277 "#include \"" + testfile + "\"' main.cpp");
278 // Hook up wrapper function so main.cpp can invoke it.
279 const string q = "\"" + nspace + "_" + fn + "\"";
280 DoWithNoOutputExpected("cd " + testdir + " && "
281 "sed -i '/3719DB20/a "
282 " { WRAPPER, " + bits + ", 0/*verification code*/, " +
283 q + ", " + q + " },"
284 "' main.cpp");
285 // Add a special-case to SMHasher for when the expected
286 // "verification value" is zero: If the computed verification value is
287 // non-zero but the expected value is zero then don't report an error.
288 DoWithNoOutputExpected("cd " + testdir + " && "
289 "sed -i 's/\\(.*print.*Verification value.*Failed\\)/"
290 " if(expected == 0) return true;\\1/' KeysetTest.cpp");
291 // Modify CMakeLists.txt if needed
292 char* t = getenv("CMAKE32");
293 if (t != NULL && t[0] != '\0' && t[0] != '0') {
294 string f = testdir + "/CMakeLists.txt";
295 string ftmp = f + ".tmp";
296 DoWithNoOutputExpected("cat cmake_m32 " + f + " > " + ftmp + " && "
297 "mv " + ftmp + " " + f);
298 }
299}
300
301void CreateTestList(const string& test, const string& path) {
302 assert(test.size() > 12);
303 assert(string(test.c_str(), test.c_str() + 8) == "farmhash");
304 assert(isalpha(test[8]) && isalpha(test[9]));
305 assert(test[10] == ':' && test[11] == ':');
306 const string nspace(test.c_str(), test.c_str() + 10);
307 const string fn(test.c_str() + 12);
308
309 static int counter = 0;
310 const string testdir = path + itoa(counter++);
311 DoWithNoOutputExpected("cp -R " + string(smhasher_dir) + " " + dir);
312 DoWithNoOutputExpected("cd " + dir + " && "
313 "mv `basename " + smhasher_dir + "` " + testdir);
314 DoAndShowOutput("cat PATCH | (cd " + testdir + " && patch) "
315 "|| echo patch failed");
316 DoAndShowOutput("cd " + testdir + " && "
317 "for i in *.cpp *.h; "
318 " do sed -i s/uint128_t/blob128/g $i; "
319 "done");
320 ModifySMHasherForTest(nspace, fn, testdir);
321 DoAndShowOutput("(cd " + testdir + " && cmake . && make -j" + parallelism + " VERBOSE=1) "
322 "|| echo building smhasher failed");
323 const int kParts = 10;
324 fstream f;
325 f.open(path.c_str(), std::fstream::out | std::fstream::app);
326 for (int i = 0; i < kParts; i++) {
327 f << testdir << "/SMHasher --noaffinity --part" << i
328 << " " << nspace << "_" << fn << " >& "
329 << testdir << "/part" << i << '\n';
330 }
331 f.close();
332}
333
334void Test(const string& tests) {
335 string parent = string("`dirname ") + smhasher_dir + "`";
336 string basename = string("`basename ") + smhasher_dir + "`";
337 CheckForDir(parent);
338 DoAndIgnoreOutput("cd " + parent + "; test -d " + smhasher_dir + " || " +
339 get_smhasher + " " + basename);
340 CheckForDir(smhasher_dir);
341 vector<string> v = Split(tests, ',');
342 string testsfile = dir + "/tests";
343 for (const string& s : v) CreateTestList(s, testsfile);
344 DoAndShowOutput(string("./do-in-parallel -k ") + parallelism +
345 " " + testsfile + " || echo FAILED");
346 DoAndShowOutput("grep -B 9 -i fail " + dir + "/test*/part* || echo nothing");
347 cout << "\nSummary of '!!!!!' and tests with expected number of collisions in [0.1, 1):\n\n";
348 DoAndShowOutput("egrep 'collisions.*Expected.* 0[.][1-9].*, actual|!!!!!' " + dir + "/test*/part*");
349}
350
351// Note the cases where, for example something in farmhashxy calls something in
352// farmhashqq. Circular deps are disallowed.
353void ComputeDeps() {
354 assert(deps == NULL);
355 deps = new unordered_map<string, vector<string>>(5);
356 for (const string& line : DoAndCollectOutput("grep farmhash..:: *.cc")) {
357 assert(line.find(":") != string::npos); // line should begin with filename
358 if (line.find("::") == string::npos) continue;
359 int c = line.find(":");
360 for (int i = c; i < line.size() - 12; i++) {
361 if (string(line.data() + i, line.data() + i + 8) == "farmhash" &&
362 isalpha(line[i+8]) && isalpha(line[i+9]) &&
363 line[i+10] == ':' && line[i+11] == ':') {
364 assert(string(line.data() + c - 3, line.data() + c) == ".cc");
365 string from(line.data(), line.data() + c - 3);
366 string to(line.data() + i, line.data() + i + 10);
367 cout << "Found usage of " << to << " in " << from << '\n';
368 (*deps)[from].push_back(to);
369 }
370 }
371 }
372}
373
374int main(int argc, char** argv) {
375 // Step 1: What directory are we going to use? Assume it is relative to /tmp.
376 cout << "Step 1\n";
377 const char* d = getenv("DIR");
378 assert(d != NULL && d[0] != '\0' && d[0] != '/');
379 dir = string("/tmp/") + d;
380 assert(dir.find(" ") == string::npos);
381 string cmd = "rm -rf " + dir + " && mkdir " + dir;
382 int r = System(cmd);
383 assert(r == 0);
384 const char* p = getenv("PARALLELISM");
385 parallelism = string(p == NULL ? default_parallelism : p);
386
387 // Step 2: Create naive version of the code to which farmhash functions need
388 // to be added.
389 cout << "Step 2\n";
390 string src = dir + "/a.cc";
391 DoWithNoOutputExpected("cat - platform.cc basics.cc f.cc <<<"
392 "'#include \"farmhash.h\"' > " + src);
393 DoWithNoOutputExpected("cp farmhash.h " + dir);
394 DoWithNoOutputExpected("cd " + dir + " && chmod 444 farmhash.h " + src);
395 vector<string> v =
396 DoAndCollectOutput("cd " + dir + " && g++ -c -fmax-errors=9999 " + src);
397
398 // Step 3: Create final version of the code.
399 cout << "Step 3\n";
400 //for (const string& s : v) cout << s << '\n';
401
402 ComputeDeps();
403 for (const string& s : v) {
404 string name = FindFarm(s);
405 if (!name.empty()) {
406 IncludeCode(name);
407 }
408 }
409 src = dir + "/b.cc";
410 DoWithNoOutputExpected("cat <<<'#include \"farmhash.h\"' > " + src);
411 DoWithNoOutputExpected("ls -tr " + dir + "/*_gen.cc | "
412 "xargs cat platform.cc basics.cc >> " + src);
413 AppendFileToFile("f.cc", src);
414
415 DoWithNoOutputExpected("cd " + dir + " && g++ -c " + src);
416 DoWithNoOutputExpected("cd " + dir + " && g++ -m32 -c " + src);
417 DoWithNoOutputExpected("cd " + dir + " && g++ -O3 -c " + src);
418 DoWithNoOutputExpected("cd " + dir + " && g++ -m32 -O3 -c " + src);
419 const char* build_flag_tests = getenv("BUILD_FLAG_TESTS");
420 if (build_flag_tests != NULL) {
421 string z = build_flag_tests;
422 int b = 0, e = z.size();
423 while (b < e) {
424 int c = b;
425 while (c < e && z[c] != '|') {
426 ++c;
427 }
428 string flags(z.data() + b, z.data() + c);
429 DoWithNoOutputExpected("cd " + dir + " && g++ " + flags + " -c " + src);
430 b = c + 1;
431 }
432 }
433 // Copy files to ../src
434 DoWithNoOutputExpected("cp -f farmhash.h " + src + " ../src && "
435 "mv ../src/$(basename " + src + ") ../src/farmhash.cc");
436 // Strip #if FARMHASH_TEST stuff
437 assert(files_included > 0);
438 for (int i = 0; i < files_included; i++) {
439 DoWithNoOutputExpected("./remove-from-to 'if FARMHASH_TEST' 'endif.*FARMHASH_TEST' ../src/farmhash.cc");
440 }
441 // Fix copyright notices.
442 DoAndShowOutput("./fix-copyright ../src/farmhash.h");
443 DoAndShowOutput("./fix-copyright ../src/farmhash.cc");
444
445 // Step 4: Generate self-test code
446 cout << "Step 4\n";
447 // TODO: what if I need multiple machines to run the various bits of platform-specific code?
448 string m = dir + "/m.cc";
449 DoAndShowOutput("count=$(ls " + dir + "/*_gen.cc | wc -l); "
450 "for i in " + dir + "/*_gen.cc; "
451 "do"
452 " f=${i%_gen.cc}_selftest0.cc; ./create-self-test $i $f &&"
453 " pushd " + dir + " && echo $i &&"
454 " cat " + src + " $f > " + m + " &&"
455 " g++ -maes -msse4.2 -msse4.1 -mssse3 " + m + " && ./a.out > tmp.cc && popd &&"
456 " l=$(fgrep -n 'if TESTING' $f | head -1 | cut -f 1 -d :) &&"
457 " (head -n $l $f | sed -e 's/define TESTING 0/define TESTING 1/' -e \"s/define NUM_SELF_TESTS 0/define NUM_SELF_TESTS $count/\"; cat " + dir + "/tmp.cc;"
458 " tail -n +$((l + 1)) $f) > ${i%_gen.cc}_selftest1.cc; "
459 "done");
460 DoWithNoOutputExpected("(echo; echo '#if FARMHASHSELFTEST'; echo;"
461 " cat " + dir + "/*_selftest1.cc;"
462 " echo; echo 'int main() {';"
463 " for i in " + dir + "/*_gen.cc; "
464 " do"
465 " namespace=$(basename ${i%_gen.cc});"
466 " echo ' '${namespace}'Test::RunTest();';"
467 " done;"
468 " echo ' __builtin_unreachable();';"
469 " echo '}';"
470 " echo; echo '#endif // FARMHASHSELFTEST') "
471 ">> ../src/farmhash.cc");
472
473 // Step 5: Quality testing
474 d = getenv("TEST");
475 if (d != NULL && d[0] != '\0') {
476 cout << "Step 5\n";
477 Test(d);
478 }
479 return 0;
480}
481