1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
|
#! /usr/bin/python
import argparse
import logging
import mmap
import os
import re
import sys
exclude_regexs = []
noncomment_regex = re.compile(
"(^[ \t][^#]+.*\n)+"
, re.MULTILINE)
def config_import_file(filename):
"""
Imports regular expresions from any file *.conf in the given path
Each file follows the format::
#
# Comments for multiline regex 1...
#
multilineregex
multilineregex
multilineregex
#
# Comments for multiline regex 2...
#
multilineregex
multilineregex
multilineregex
etc.
"""
try:
with open(filename, "rb") as f:
mm = mmap.mmap(f.fileno(), 0, access = mmap.ACCESS_READ)
# That regex basically selects any block of
# lines that is not a comment block. The
# finditer() finds all the blocks and selects
# the bits of mmapped-file that comprises
# each--we compile it into a regex and append.
for m in re.finditer("(^\s*[^#].*\n)+", mm, re.MULTILINE):
origin = "%s:%s-%s" % (filename, m.start(), m.end())
try:
r = re.compile(mm[m.start():m.end()], re.MULTILINE)
except Exception as e:
logging.error("%s: bytes %d-%d: bad regex: %s",
filename, m.start(), m.end(), e)
raise
logging.debug("%s: found regex at bytes %d-%d: %s",
filename, m.start(), m.end(),
mm[m.start():m.end()])
exclude_regexs.append((r, origin))
logging.debug("%s: loaded", filename)
except Exception as e:
raise Exception("E: %s: can't load config file: %s" % (filename, e))
def config_import_path(path):
"""
Imports regular expresions from any file *.conf in the given path
"""
file_regex = re.compile(".*\.conf$")
try:
for dirpath, dirnames, filenames in os.walk(path):
for _filename in sorted(filenames):
filename = os.path.join(dirpath, _filename)
if not file_regex.search(_filename):
logging.debug("%s: ignored", filename)
continue
config_import_file(filename)
except Exception as e:
raise Exception("E: %s: can't load config files: %s" % (path, e))
def config_import(paths):
"""
Imports regular expresions from any file *.conf in the list of paths.
If a path is "" or None, the list of paths until then is flushed
and only the new ones are considered.
"""
_paths = []
# Go over the list, flush it if the user gave an empty path ("")
for path in paths:
if path == "" or path == None:
logging.debug("flushing current config list: %s", _paths)
_paths = []
else:
_paths.append(path)
logging.debug("config list: %s", _paths)
for path in _paths:
config_import_path(path)
arg_parser = argparse.ArgumentParser()
arg_parser.add_argument("-v", "--verbosity", action = "count", default = 0,
help = "increase verbosity")
arg_parser.add_argument("-q", "--quiet", action = "count", default = 0,
help = "decrease verbosity")
arg_parser.add_argument("-c", "--config-dir", action = "append", nargs = "?",
default = [ ".known-issues/" ],
help = "configuration directory (multiple can be "
"given; if none given, clears the current list) "
"%(default)s")
arg_parser.add_argument("FILENAMEs", nargs = "+",
help = "files to filter")
args = arg_parser.parse_args()
logging.basicConfig(level = 40 - 10 * (args.verbosity - args.quiet),
format = "%(levelname)s: %(message)s")
path = ".known-issues/"
logging.debug("Reading configuration from directory `%s`", path)
config_import(args.config_dir)
exclude_ranges = []
for filename in args.FILENAMEs:
try:
with open(filename, "r+b") as f:
logging.info("%s: filtering", filename)
# Yeah, this should be more protected in case of exception
# and such, but this is a short running program...
mm = mmap.mmap(f.fileno(), 0)
for ex, origin in exclude_regexs:
logging.info("%s: searching from %s: %s",
filename, origin, ex.pattern)
for m in re.finditer(ex.pattern, mm, re.MULTILINE):
logging.debug("%s: %s-%s: match from from %s",
filename, m.start(), m.end(), origin)
exclude_ranges.append((m.start(), m.end()))
exclude_ranges = sorted(exclude_ranges, key=lambda r: r[0])
logging.warning("%s: ranges excluded: %s", filename, exclude_ranges)
# Printd what has not been filtered
offset = 0
for b, e in exclude_ranges:
mm.seek(offset)
d = b - offset
logging.debug("%s: exclude range (%d, %d), from %d %dB",
filename, b, e, offset, d)
if b > offset:
print(mm.read(d - 1))
offset = e
mm.seek(offset)
if len(mm) != offset:
print mm.read(len(mm) - offset - 1)
del mm
except Exception as e:
logging.error("%s: cannot load: %s", filename, e)
|