summaryrefslogtreecommitdiff
path: root/scripts/filter-known-issues.py
blob: 1aa77726cf79b852420033e75d9801f41102429d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
#! /usr/bin/python
import argparse
import logging
import mmap
import os
import re
import sys

exclude_regexs = []

noncomment_regex = re.compile(
        "(^[ \t][^#]+.*\n)+"
        , re.MULTILINE)

def config_import_file(filename):
    """
    Imports regular expresions from any file *.conf in the given path

    Each file follows the format::

      #
      # Comments for multiline regex 1...
      #
      multilineregex
      multilineregex
      multilineregex
      #
      # Comments for multiline regex 2...
      #
      multilineregex
      multilineregex
      multilineregex

    etc.
    """
    try:
        with open(filename, "rb") as f:
            mm = mmap.mmap(f.fileno(), 0, access = mmap.ACCESS_READ)
            # That regex basically selects any block of
            # lines that is not a comment block. The
            # finditer() finds all the blocks and selects
            # the bits of mmapped-file that comprises
            # each--we compile it into a regex and append.
            for m in re.finditer("(^\s*[^#].*\n)+", mm, re.MULTILINE):
                origin = "%s:%s-%s" % (filename, m.start(), m.end())
                try:
                    r = re.compile(mm[m.start():m.end()], re.MULTILINE)
                except Exception as e:
                    logging.error("%s: bytes %d-%d: bad regex: %s",
                                  filename, m.start(), m.end(), e)
                    raise
                logging.debug("%s: found regex at bytes %d-%d: %s",
                              filename, m.start(), m.end(),
                              mm[m.start():m.end()])
                exclude_regexs.append((r, origin))
            logging.debug("%s: loaded", filename)
    except Exception as e:
        raise Exception("E: %s: can't load config file: %s" % (filename, e))

def config_import_path(path):
    """
    Imports regular expresions from any file *.conf in the given path
    """
    file_regex = re.compile(".*\.conf$")
    try:
        for dirpath, dirnames, filenames in os.walk(path):
            for _filename in sorted(filenames):
                filename = os.path.join(dirpath, _filename)
                if not file_regex.search(_filename):
                    logging.debug("%s: ignored", filename)
                    continue
                config_import_file(filename)
    except Exception as e:
        raise Exception("E: %s: can't load config files: %s" % (path, e))

def config_import(paths):
    """
    Imports regular expresions from any file *.conf in the list of paths.

    If a path is "" or None, the list of paths until then is flushed
    and only the new ones are considered.
    """
    _paths = []
    # Go over the list, flush it if the user gave an empty path ("")
    for path in paths:
        if path == "" or path == None:
            logging.debug("flushing current config list: %s", _paths)
            _paths = []
        else:
            _paths.append(path)
    logging.debug("config list: %s", _paths)
    for path in _paths:
        config_import_path(path)

arg_parser = argparse.ArgumentParser()
arg_parser.add_argument("-v", "--verbosity", action = "count", default = 0,
                        help = "increase verbosity")
arg_parser.add_argument("-q", "--quiet", action = "count", default = 0,
                        help = "decrease verbosity")
arg_parser.add_argument("-c", "--config-dir", action = "append", nargs = "?",
                        default = [ ".known-issues/" ],
                        help = "configuration directory (multiple can be "
                        "given; if none given, clears the current list) "
                        "%(default)s")
arg_parser.add_argument("FILENAMEs", nargs = "+",
                        help = "files to filter")
args = arg_parser.parse_args()

logging.basicConfig(level = 40 - 10 * (args.verbosity - args.quiet),
                    format = "%(levelname)s: %(message)s")

path = ".known-issues/"
logging.debug("Reading configuration from directory `%s`", path)
config_import(args.config_dir)

exclude_ranges = []

for filename in args.FILENAMEs:
    try:
        with open(filename, "r+b") as f:
            logging.info("%s: filtering", filename)
            # Yeah, this should be more protected in case of exception
            # and such, but this is a short running program...
            mm = mmap.mmap(f.fileno(), 0)
            for ex, origin in exclude_regexs:
                logging.info("%s: searching from %s: %s",
                             filename, origin, ex.pattern)
                for m in re.finditer(ex.pattern, mm, re.MULTILINE):
                    logging.debug("%s: %s-%s: match from from %s",
                                  filename, m.start(), m.end(), origin)
                    exclude_ranges.append((m.start(), m.end()))

            exclude_ranges = sorted(exclude_ranges, key=lambda r: r[0])
            logging.warning("%s: ranges excluded: %s", filename, exclude_ranges)

            # Printd what has not been filtered
            offset = 0
            for b, e in exclude_ranges:
                mm.seek(offset)
                d = b - offset
                logging.debug("%s: exclude range (%d, %d), from %d %dB",
                              filename, b, e, offset, d)
                if b > offset:
                    print(mm.read(d - 1))
                offset = e
            mm.seek(offset)
            if len(mm) != offset:
                print mm.read(len(mm) - offset - 1)
            del mm
    except Exception as e:
        logging.error("%s: cannot load: %s", filename, e)