[libFuzzer] add collect_data_flow.py that allows to run the data-flow tracer several times on subsets of inputs bytes, to overcome DFSan out-of-label failures

git-svn-id: https://llvm.org/svn/llvm-project/compiler-rt/trunk@333616 91177308-0d34-0410-b5e6-96231b3b80d8
author: Kostya Serebryany <kcc@google.com> 2018-05-31 01:27:07 +0000
committer: Kostya Serebryany <kcc@google.com> 2018-05-31 01:27:07 +0000
commit: a5cc9fe6d4150629fc29768a039b0f2839a87f00 (patch)
tree: 5943da0f31ede55f0c7c011b52e787992b46e56e
parent: ae960e3cf76fbee7d18b428572d162a643036c0e (diff)
2 files changed, 61 insertions, 0 deletions
diff --git a/lib/fuzzer/scripts/collect_data_flow.py b/lib/fuzzer/scripts/collect_data_flow.py
new file mode 100755
index 000000000..d13f6dcc4
--- /dev/null
+++ b/lib/fuzzer/scripts/collect_data_flow.py
@@ -0,0 +1,56 @@
+#!/usr/bin/env python
+#===- lib/fuzzer/scripts/collect_data_flow.py ------------------------------===#
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+#===------------------------------------------------------------------------===#
+# Runs the data-flow tracer several times on the same input in order to collect
+# the complete trace for all input bytes (running it on all bytes at once
+# may fail if DFSan runs out of labels).
+# Usage:
+#   collect_data_flow.py BINARY INPUT [RESULT]
+#===------------------------------------------------------------------------===#
+import atexit
+import sys
+import os
+import subprocess
+import tempfile
+import shutil
+
+tmpdir = ""
+
+def cleanup(d):
+  print "removing: ", d
+  shutil.rmtree(d)
+
+def main(argv):
+  exe = argv[1]
+  inp = argv[2]
+  size = os.path.getsize(inp)
+  q = [[0, size]]
+  tmpdir = tempfile.mkdtemp(prefix="libfuzzer-tmp-")
+  atexit.register(cleanup, tmpdir)
+  print "tmpdir: ", tmpdir
+  outputs = []
+  while len(q):
+    r = q.pop()
+    print "******* Trying:  ", r
+    tmpfile = os.path.join(tmpdir, str(r[0]) + "-" + str(r[1]))
+    ret = subprocess.call([exe, str(r[0]), str(r[1]), inp, tmpfile])
+    if ret and r[1] - r[0] >= 2:
+      q.append([r[0], (r[1] + r[0]) / 2])
+      q.append([(r[1] + r[0]) / 2, r[1]])
+    else:
+      outputs.append(tmpfile)
+      print "******* Success: ", r
+  f = sys.stdout
+  if len(argv) >= 4:
+    f = open(argv[3], "w")
+  merge = os.path.join(os.path.dirname(argv[0]), "merge_data_flow.py")
+  subprocess.call([merge] + outputs, stdout=f)
+
+if __name__ == '__main__':
+  main(sys.argv)
diff --git a/test/fuzzer/dataflow.test b/test/fuzzer/dataflow.test
index 46e5c5d3d..7162b06f6 100644
--- a/test/fuzzer/dataflow.test
+++ b/test/fuzzer/dataflow.test
@@ -54,6 +54,9 @@ RUN:%t-ThreeFunctionsTestDF 2 4 %t/IN/FUZZMU > %t-merge-2
 RUN:%t-ThreeFunctionsTestDF 4 6 %t/IN/FUZZMU > %t-merge-3
 RUN:%libfuzzer_src/scripts/merge_data_flow.py  %t-merge-* | FileCheck %s --check-prefix=IN_FUZZMU
 
+# Test collect_data_flow
+RUN: %libfuzzer_src/scripts/collect_data_flow.py %t-ThreeFunctionsTestDF %t/IN/FUZZMU | FileCheck %s --check-prefix=IN_FUZZMU
+
 IN_FUZZMU-DAG: F{{[012]}} 0000100
 IN_FUZZMU-DAG: F{{[012]}} 1111001
 IN_FUZZMU-DAG: F{{[012]}} 0000011
@@ -65,3 +68,5 @@ OUT_OF_LABELS: ==FATAL: DataFlowSanitizer: out of labels
 RUN: %t-ExplodeDFSanLabelsTestDF 0 2  %t/IN/1234567890123456
 RUN: %t-ExplodeDFSanLabelsTestDF 2 4  %t/IN/1234567890123456
 RUN: %t-ExplodeDFSanLabelsTestDF 4 6  %t/IN/1234567890123456
+# Or we can use collect_data_flow
+RUN: %libfuzzer_src/scripts/collect_data_flow.py %t-ExplodeDFSanLabelsTestDF %t/IN/1234567890123456
author	Kostya Serebryany <kcc@google.com>	2018-05-31 01:27:07 +0000
committer	Kostya Serebryany <kcc@google.com>	2018-05-31 01:27:07 +0000
commit	a5cc9fe6d4150629fc29768a039b0f2839a87f00 (patch)
tree	5943da0f31ede55f0c7c011b52e787992b46e56e
parent	ae960e3cf76fbee7d18b428572d162a643036c0e (diff)