testing/check_fc_files.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313

#!/usr/bin/env python3
# -*- coding:UTF-8 -*-
# Copyright (C) 2019 Nicolas Iooss
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, version 2.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
# details.
"""Check the .fc files for some common errors

@author: Nicolas Iooss
@license: GPLv2
"""
import argparse
from pathlib import Path
import re


# Common patterns for ending a file pattern, associated with something to
# replace the pattern with, during the checks.
# Order matters as the first ones are tried before the last ones
#
# Use the infinity symbol to replace "any character", in order to keep a meaning
# of any possible character in the pattern after the reduction operations.
COMMON_FILE_END_PATTERNS = (
    ('/.+', ''),  # Match the children of a directory
    ('/.*', ''),  # Like /.+, but it could be empty
    ('(/.*)', ''),  # Like /.*, but with useless parentheses
    ('/[^/]+', ''),  # Match any filename
    ('/[^/]*', ''),  # Match any filename, but it could be empty
    ('(/[^/]*)', ''),  # Like /[^/]*, but with useless parentheses
    ('(/[^/]*)?', ''),  # Match a directory and its direct children
    ('(/.+)?', ''),  # Match a directory and its children
    ('(/.*)?', ''),  # Match a directory and its children
    ('/(sbin/)?.*', ''),  # Weird pattern for postfix, which would better be (/sbin)?(/.*)?
    ('\\.so(\\.[^/]*)*', '\\.so'),  # Match a .so extension, which is really weird because [^/] matches a dot too, so the final star can be replaced with '?'  # noqa
    ('\\.db(\\.[^/]*)*', '\\.db'),  # Match a .db extension, which is really weird because [^/] matches a dot too, so the final star can be replaced with '?'  # noqa
    ('(\\.[^/]+)?', ''),  # Match a possible file extension
    ('(\\..+)', '\\.∞'),  # Match a dot and anything after
    ('(\\..*)?', '\\.∞'),  # Match a dot and anything after or nothing, or nothing at all
    ('\\..*', '\\.'),  # Match a dot and anything after or nothing
    ('.*', ''),  # Match anything after
    ('.+', '∞'),  # Match anything after, but at least one character
    ('[^/]+', '∞'),  # Match anything after which does not create a new directory level
    ('[^/]*', ''),  # Like [^/]+, but may be empty
    ('[^/-]*', ''),  # Like [^/]*, but do not match files with dashes in their names
    ('[a-z]?', ''),  # Match a possible letter
    ('[a-z]*', ''),  # Match some letters
    ('[0-9]?', ''),  # Match a possible digit
    ('[0-9]*', ''),  # Match some digits
    ('[0-9]+', '0'),  # Match at least one digit
    ('(\\.bin)?', ''),  # Match an optional extension
    ('(-.*)?', ''),  # Match an optional suffix with a minus sign
)

# File types in a .fc file
FILE_TYPES = (None, '--', '-b', '-c', '-d', '-l', '-p', '-s')

NONE_CONTEXT = '<<none>>'

MLS_LEVELS = ('s0', 'mls_systemhigh', 's0-mls_systemhigh')


def analyze_fc_file(fc_path):
    """Analyze a .fc file

    Return False if a warning has been generated
    """
    retval = True

    with fc_path.open('r') as fd:
        for lineno, line in enumerate(fd, start=1):
            line = line.strip()

            # Ignore lines that cannot contain a file context
            if not line or line.startswith(('#', "'", 'ifdef(', 'ifndef(')):
                continue

            prefix = f"{fc_path}:{lineno}: "

            matches = re.match(r'^(?P<path>\S+)\s+(?P<ftype>-.)?\s*(?P<context>.+)$', line)
            if matches is None:
                print(f"{prefix}unable to parse a file context file: {repr(line)}")
                retval = False
                continue

            path, ftype, context = matches.group('path', 'ftype', 'context')

            # Check the file type of the pattern
            if ftype not in FILE_TYPES:
                print(f"{prefix}unexpected file type for {path}: {repr(ftype)}")
                retval = False

            # Check the SELinux context
            if context != NONE_CONTEXT:
                matches = re.match(r'^gen_context\((\S*), ?(\S*)\)$', context)
                if not matches:
                    print(f"{prefix}unknown SELinux context format for {path}: {context}")
                    retval = False
                else:
                    context_label, context_mls = matches.groups()
                    if not context_label.startswith('system_u:object_r:'):
                        print(f"{prefix}SELinux context does not begin with 'system_u:object_r:' for {path}: {context}")  # noqa
                        retval = False
                    elif not re.match(r'^system_u:object_r:[0-9A-Za-z_]+$', context_label):
                        print(f"{prefix}SELinux context type uses unexpected characters for {path}: {context}")  # noqa
                        retval = False
                    elif context_mls not in MLS_LEVELS:
                        print(f"{prefix}SELinux context uses an unexpected MLS label for {path}: {context_mls} (in {context})")  # noqa
                        retval = False

            # Find obviously-wrong patterns
            if '(.*)?' in path:
                # As (.*) can match the empty string, the question mark is redundant
                print(f"{prefix}pattern (.*)? is very likely to be a misspelling for .* or (/.*)?, for {path}")
                retval = False

            if '\\d' in path:
                # In the past, "resource.\d" has already been introduced instead of "resource\.d".
                # Detect such bugs by forbidding the use of \d
                print(f"{prefix}escaping d could be a bug in {path}, please use [0-9] instead")
                retval = False

            if re.search(r'[^/]\(\.\*/\)\?', path):
                print(f"{prefix}using (.*/)? without a previous slash could be a bug in {path} as it can match the empty string, please use /(.*/)? instead")  # noqa
                retval = False

            if re.search(r'[^/]\(\[\^/\]\+/\)\?', path):
                print(f"{prefix}using ([^/]+/)? without a previous slash could be a bug in {path} as it can match the empty string, please use /([^/]+/)? instead")  # noqa
                retval = False

            if re.search(r'[^/]\(\.\*/\)\*', path):
                print(f"{prefix}using (.*/)* without a previous slash could be a bug in {path} as it can match the empty string, please use /(.*/)* instead")  # noqa
                retval = False

            if re.search(r'\(\.\*/\)[^?*+]', path):
                print(f"{prefix}using (.*/) without a ?, * or + symbol could be a bug in {path} as it misleads readers into thinking that this part is optional, please use .*/ instead")  # noqa
                retval = False

            reduced_path = path

            # Using "index`'(...)" is a way to prevent an error message from m4:
            # https://github.com/SELinuxProject/refpolicy/commit/cc1eee120263523c7b79ac16acc698c537a4d25e
            # Let's replace the symbols in the path, for the checks,
            # while keeping "path" untouched in the warning messages
            reduced_path = reduced_path.replace("index`'(", 'index(')
            reduced_path = reduced_path.replace("inde(x)(", 'index(')
            reduced_path = reduced_path.replace("include`'(", 'include(')

            # Check the character set of the path
            invalid_characters = set(re.findall(r'[^-0-9A-Za-z_@./()?+*%{}\[\]^|:~\\]', reduced_path))
            if invalid_characters:
                print(f"{prefix}unexpected characters {' '.join(sorted(invalid_characters))} in {path}")
                retval = False

            # Check the start of the path
            if not path.startswith(('/', 'HOME_DIR/', 'HOME_ROOT/')) and path not in ('HOME_DIR', 'HOME_ROOT'):
                print(f"{prefix}unexpected start of file pattern for {path}")
                retval = False

            # Reduce the path in order to check its sanity (like using a non-buggy end pattern):
            # * Truncating common endings
            while True:
                has_truncated = False
                for (common_end, substitute) in COMMON_FILE_END_PATTERNS:
                    if reduced_path.endswith(common_end):
                        reduced_path = f"{reduced_path[:-len(common_end)]}{substitute}"
                        has_truncated = True
                        break
                if not has_truncated:
                    break

            # * Replace back-slashed characters with special ones
            while '\\' in reduced_path:
                backslash_index = reduced_path.index('\\')
                esc_sequence = reduced_path[backslash_index:backslash_index + 2]
                if esc_sequence == '\\.':
                    # Replace \. with U+00B7 middle dot
                    reduced_path = f"{reduced_path[:backslash_index]}\u00b7{reduced_path[backslash_index + 2:]}"
                elif esc_sequence == '\\+':
                    # Replace \+ with U+2020 dagger
                    reduced_path = f"{reduced_path[:backslash_index]}\u2020{reduced_path[backslash_index + 2:]}"
                elif esc_sequence == '\\d':
                    # Replace \d with U+03B4 delta
                    reduced_path = f"{reduced_path[:backslash_index]}\u03b4{reduced_path[backslash_index + 2:]}"
                else:
                    print(f"{prefix}unexpected escape sequence {esc_sequence} in {path} (reduced to {reduced_path})")
                    retval = False
                    break

            # * Replace variables with place-holders
            if '%' in reduced_path:
                reduced_path = reduced_path.replace('%{USERID}', '_USERID_')
                reduced_path = reduced_path.replace('%{USERNAME}', '_USERNAME_')
                if '%{' in reduced_path:
                    print(f"{prefix}unexpected '%{{' in {path} after reduction to {reduced_path}")
                    retval = False

            # * Detect "??", "**", etc. before more reductions occur
            for bad_pattern in sorted(set(re.findall(r'[?*+][?*+]', reduced_path))):
                print(f"{prefix}unexpected pattern {repr(bad_pattern)} in {path} after reduction to {reduced_path}")
                retval = False

            # Remove optional directories and filename parts
            reduced_path = reduced_path.replace('/([^/]+/)?', '/')
            reduced_path = reduced_path.replace('(/[^/]+)?/', '/')
            reduced_path = reduced_path.replace('[^/]*', '')
            reduced_path = reduced_path.replace('[^/]+', '∞')
            reduced_path = reduced_path.replace('[^/-]+', '∞')
            reduced_path = reduced_path.replace('/(.*/)?', '/')
            reduced_path = reduced_path.replace('(/.*)?', '')
            reduced_path = reduced_path.replace('/.*/', '/')

            # * Remove parenthesese around choices options: ((www)|(web)|(public_html)) -> (www|web|public_html)
            while True:
                new_reduced_path = re.sub(r'([|(])\(([-0-9A-Za-z_]+)\)([|)])', r'\1\2\3', reduced_path)
                if new_reduced_path == reduced_path:
                    break
                reduced_path = new_reduced_path

            # * Detect a pipe directly inside a parenthesis, which is wrong (use symbol ? instead)
            if '|)' in reduced_path or '(|' in reduced_path:
                print(f"{prefix}unexpected pipe-parenthesis pattern in {path} after reduction to {reduced_path}")
                retval = False

            # * Remove optional choices (...|...)?
            while True:
                new_reduced_path = re.sub(r'\([-0-9A-Za-z_|/·]+\)\?', '', reduced_path)
                if new_reduced_path == reduced_path:
                    break
                reduced_path = new_reduced_path

            # * Replace mandatory choices (...|...) with the first option
            while True:
                new_reduced_path = re.sub(r'\(([-0-9A-Za-z_/]+)\|[-0-9A-Za-z_|/]+\)', r'\1', reduced_path)
                if new_reduced_path == reduced_path:
                    break
                reduced_path = new_reduced_path

            # * Remove optional characters like c?
            reduced_path = re.sub(r'[-0-9A-Za-z_@·]\?', '', reduced_path)

            # If the reduced path still ends with a special character, something went wrong.
            # Instead of guessing the possible buggy characters, list the allowed ones.
            if reduced_path and not re.match(r'[-0-9A-Za-z_@\]~·†∞]', reduced_path[-1]):
                if path != '/':
                    if reduced_path == path:
                        print(f"{prefix}unexpected end of file pattern for {path}")
                    else:
                        print(f"{prefix}unexpected end of file pattern for {path} after being reduced to {reduced_path}")  # noqa
                    retval = False

            # Now remove and replace some matching patterns from the path, in order to catch invalid characters
            reduced_path = reduced_path.replace('(-.*)?', '')
            reduced_path = reduced_path.replace('(.*-)?', '')
            reduced_path = reduced_path.replace('(-[0-9])?', '')
            reduced_path = reduced_path.replace('(.*)', '')
            reduced_path = reduced_path.replace('.*', '')
            reduced_path = reduced_path.replace('.+', '∞')
            reduced_path = re.sub(r'\[[-0-9A-Za-z_.^]+\][?*]', '', reduced_path)
            reduced_path = re.sub(r'\[[-0-9A-Za-z_.^]+\](\+)?', '∞', reduced_path)
            reduced_path = reduced_path.replace('/[^/]+/', '/∞/')

            if '.' in reduced_path:
                print(f"{prefix}unescaped dot still present {path} after being reduced to {reduced_path} (suggestion: use \\. to match a dot, or a charset like [^/])")  # noqa
                retval = False

            # Check the remaining symbols in the reduced path.
            # Only show a warning if no other ones were reported, in order to reduce the probability of false-positive.
            invalid_symbols = set(re.findall(r'[^-0-9A-Za-z_@~:·†∞/]', reduced_path))
            if retval and invalid_symbols:
                print(f"{prefix}unexpected symbols {' '.join(sorted(invalid_symbols))} in {path} after being reduced to {reduced_path}. This could be due to an error in the pattern or a missing reduction rule in the checker")  # noqa
                retval = False

    return retval


def analyze_all_fc(policy_dirpath):
    """Analyze all .fc files in the specified directory"""
    retval = True
    for file_path in sorted(policy_dirpath.glob('**/*.fc')):
        if not analyze_fc_file(file_path):
            retval = False
    return retval


def main(argv=None):
    policy_dir = (Path(__file__).parent / '..' / 'policy').resolve()

    parser = argparse.ArgumentParser(description="Find missing file contexts")
    parser.add_argument('fc_files', metavar='FC_FILES', nargs='*', type=Path,
                        help=".fc files to analyze (by default: all in the policy)")
    parser.add_argument('-p', '--policy', metavar='POLICY_DIR', type=Path, default=policy_dir,
                        help="path to the policy directory [{0}]".format(policy_dir))
    args = parser.parse_args(argv)

    if args.fc_files:
        retval = True
        for file_path in args.fc_files:
            if not analyze_fc_file(file_path):
                retval = False
        return retval

    return 0 if analyze_all_fc(args.policy) else 1


if __name__ == '__main__':
    import sys
    sys.exit(main())