2 # SPDX-License-Identifier: GPL-2.0
3 # Copyright Thomas Gleixner <tglx@linutronix.de>
5 from argparse
import ArgumentParser
6 from ply
import lex
, yacc
15 class ParserException(Exception):
16 def __init__(self
, tok
, txt
):
20 class SPDXException(Exception):
21 def __init__(self
, el
, txt
):
25 class SPDXdata(object):
27 self
.license_files
= 0
28 self
.exception_files
= 0
32 class dirinfo(object):
38 def update(self
, fname
, basedir
, miss
):
43 bdir
= os
.path
.dirname(fname
)
44 if bdir
== basedir
.rstrip('/'):
45 self
.files
.append(fname
)
47 # Read the spdx data from the LICENSES directory
48 def read_spdxdata(repo
):
50 # The subdirectories of LICENSES in the kernel source
51 # Note: exceptions needs to be parsed as last directory.
52 # OpenOCD specific: Begin
53 license_dirs
= [ "preferred", "stand-alone", "exceptions" ]
54 # OpenOCD specific: End
55 lictree
= repo
.head
.commit
.tree
['LICENSES']
59 for d
in license_dirs
:
60 for el
in lictree
[d
].traverse():
61 if not os
.path
.isfile(el
.path
):
65 for l
in open(el
.path
, encoding
="utf-8").readlines():
66 if l
.startswith('Valid-License-Identifier:'):
67 lid
= l
.split(':')[1].strip().upper()
68 if lid
in spdx
.licenses
:
69 raise SPDXException(el
, 'Duplicate License Identifier: %s' %lid
)
71 spdx
.licenses
.append(lid
)
73 elif l
.startswith('SPDX-Exception-Identifier:'):
74 exception
= l
.split(':')[1].strip().upper()
75 spdx
.exceptions
[exception
] = []
77 elif l
.startswith('SPDX-Licenses:'):
78 for lic
in l
.split(':')[1].upper().strip().replace(' ', '').replace('\t', '').split(','):
79 if not lic
in spdx
.licenses
:
80 raise SPDXException(None, 'Exception %s missing license %s' %(exception
, lic
))
81 spdx
.exceptions
[exception
].append(lic
)
83 elif l
.startswith("License-Text:"):
85 if not len(spdx
.exceptions
[exception
]):
86 raise SPDXException(el
, 'Exception %s is missing SPDX-Licenses' %exception
)
87 spdx
.exception_files
+= 1
89 spdx
.license_files
+= 1
93 class id_parser(object):
95 reserved
= [ 'AND', 'OR', 'WITH' ]
96 tokens
= [ 'LPAR', 'RPAR', 'ID', 'EXC' ] + reserved
98 precedence
= ( ('nonassoc', 'AND', 'OR'), )
102 def __init__(self
, spdx
):
106 self
.lexer
= lex
.lex(module
= self
, reflags
= re
.UNICODE
)
107 # Initialize the parser. No debug file and no parser rules stored on disk
108 # The rules are small enough to be generated on the fly
109 self
.parser
= yacc
.yacc(module
= self
, write_tables
= False, debug
= False)
110 self
.lines_checked
= 0
121 def set_dirinfo(self
, basedir
, dirdepth
):
123 self
.basedir
= basedir
124 bdir
= basedir
.lstrip('./').rstrip('/')
126 parts
= bdir
.split('/')
129 self
.dirdepth
= dirdepth
+ len(parts
)
131 # Validate License and Exception IDs
132 def validate(self
, tok
):
133 id = tok
.value
.upper()
135 if not id in self
.spdx
.licenses
:
136 raise ParserException(tok
, 'Invalid License ID')
138 elif tok
.type == 'EXC':
139 if id not in self
.spdx
.exceptions
:
140 raise ParserException(tok
, 'Invalid Exception ID')
141 if self
.lastid
not in self
.spdx
.exceptions
[id]:
142 raise ParserException(tok
, 'Exception not valid for license %s' %self
.lastid
)
144 elif tok
.type != 'WITH':
148 def t_RPAR(self
, tok
):
150 self
.lasttok
= tok
.type
153 def t_LPAR(self
, tok
):
155 self
.lasttok
= tok
.type
161 if self
.lasttok
== 'EXC':
163 raise ParserException(tok
, 'Missing parentheses')
165 tok
.value
= tok
.value
.strip()
166 val
= tok
.value
.upper()
168 if val
in self
.reserved
:
170 elif self
.lasttok
== 'WITH':
173 self
.lasttok
= tok
.type
177 def t_error(self
, tok
):
178 raise ParserException(tok
, 'Invalid token')
188 def p_error(self
, p
):
190 raise ParserException(None, 'Unfinished license expression')
192 raise ParserException(p
, 'Syntax error')
194 def parse(self
, expr
):
197 self
.parser
.parse(expr
, lexer
= self
.lexer
)
199 def parse_lines(self
, fd
, maxlines
, fname
):
205 line
= line
.decode(locale
.getpreferredencoding(False), errors
='ignore')
207 if self
.curline
> maxlines
:
209 self
.lines_checked
+= 1
210 if line
.find("SPDX-License-Identifier:") < 0:
212 expr
= line
.split(':')[1].strip()
213 # Remove trailing comment closure
214 if line
.strip().endswith('*/'):
215 expr
= expr
.rstrip('*/').strip()
216 # Remove trailing xml comment closure
217 if line
.strip().endswith('-->'):
218 expr
= expr
.rstrip('-->').strip()
219 # Special case for SH magic boot code files
220 if line
.startswith('LIST \"'):
221 expr
= expr
.rstrip('\"').strip()
225 # Should we check for more SPDX ids in the same file and
226 # complain if there are any?
231 except ParserException
as pe
:
233 col
= line
.find(expr
) + pe
.tok
.lexpos
235 sys
.stdout
.write('%s: %d:%d %s: %s\n' %(fname
, self
.curline
, col
, pe
.txt
, tok
))
237 sys
.stdout
.write('%s: %d:0 %s\n' %(fname
, self
.curline
, pe
.txt
))
238 self
.spdx_errors
+= 1
243 base
= os
.path
.dirname(fname
)
244 if self
.dirdepth
> 0:
245 parts
= base
.split('/')
248 while i
< self
.dirdepth
and i
< len(parts
) and len(parts
[i
]):
249 base
+= '/' + parts
[i
]
251 elif self
.dirdepth
== 0:
254 base
= './' + base
.rstrip('/')
257 di
= self
.spdx_dirs
.get(base
, dirinfo())
258 di
.update(fname
, base
, fail
)
259 self
.spdx_dirs
[base
] = di
261 class pattern(object):
262 def __init__(self
, line
):
264 self
.match
= self
.match_file
266 self
.match
= self
.match_dot
267 elif line
.endswith('/'):
268 self
.pattern
= line
[:-1]
269 self
.match
= self
.match_dir
270 elif line
.startswith('/'):
271 self
.pattern
= line
[1:]
272 self
.match
= self
.match_fn
274 def match_dot(self
, fpath
):
275 return os
.path
.basename(fpath
).startswith('.')
277 def match_file(self
, fpath
):
278 return os
.path
.basename(fpath
) == self
.pattern
280 def match_fn(self
, fpath
):
281 return fnmatch
.fnmatchcase(fpath
, self
.pattern
)
283 def match_dir(self
, fpath
):
284 if self
.match_fn(os
.path
.dirname(fpath
)):
286 return fpath
.startswith(self
.pattern
)
288 def exclude_file(fpath
):
289 for rule
in exclude_rules
:
290 if rule
.match(fpath
):
294 def scan_git_tree(tree
, basedir
, dirdepth
):
295 parser
.set_dirinfo(basedir
, dirdepth
)
296 for el
in tree
.traverse():
297 if not os
.path
.isfile(el
.path
):
299 if exclude_file(el
.path
):
302 with
open(el
.path
, 'rb') as fd
:
303 parser
.parse_lines(fd
, args
.maxlines
, el
.path
)
305 def scan_git_subtree(tree
, path
, dirdepth
):
306 for p
in path
.strip('/').split('/'):
308 scan_git_tree(tree
, path
.strip('/'), dirdepth
)
310 def read_exclude_file(fname
):
314 with
open(fname
) as fd
:
317 if line
.startswith('#'):
321 rules
.append(pattern(line
))
324 if __name__
== '__main__':
326 ap
= ArgumentParser(description
='SPDX expression checker')
327 ap
.add_argument('path', nargs
='*', help='Check path or file. If not given full git tree scan. For stdin use "-"')
328 ap
.add_argument('-d', '--dirs', action
='store_true',
329 help='Show [sub]directory statistics.')
330 ap
.add_argument('-D', '--depth', type=int, default
=-1,
331 help='Directory depth for -d statistics. Default: unlimited')
332 ap
.add_argument('-e', '--exclude',
333 help='File containing file patterns to exclude. Default: scripts/spdxexclude')
334 ap
.add_argument('-f', '--files', action
='store_true',
335 help='Show files without SPDX.')
336 ap
.add_argument('-m', '--maxlines', type=int, default
=15,
337 help='Maximum number of lines to scan in a file. Default 15')
338 ap
.add_argument('-v', '--verbose', action
='store_true', help='Verbose statistics output')
339 args
= ap
.parse_args()
341 # Sanity check path arguments
342 if '-' in args
.path
and len(args
.path
) > 1:
343 sys
.stderr
.write('stdin input "-" must be the only path argument\n')
347 # Use git to get the valid license expressions
348 repo
= git
.Repo(os
.getcwd())
351 # Initialize SPDX data
352 spdx
= read_spdxdata(repo
)
354 # Initialize the parser
355 parser
= id_parser(spdx
)
357 except SPDXException
as se
:
359 sys
.stderr
.write('%s: %s\n' %(se
.el
.path
, se
.txt
))
361 sys
.stderr
.write('%s\n' %se.txt
)
364 except Exception as ex
:
365 sys
.stderr
.write('FAIL: %s\n' %ex)
366 sys
.stderr
.write('%s\n' %traceback
.format_exc())
372 fname
= os
.path
.join(os
.path
.dirname(__file__
), 'spdxexclude')
373 exclude_rules
= read_exclude_file(fname
)
374 except Exception as ex
:
375 sys
.stderr
.write('FAIL: Reading exclude file %s: %s\n' %(fname
, ex
))
379 if len(args
.path
) and args
.path
[0] == '-':
380 stdin
= os
.fdopen(sys
.stdin
.fileno(), 'rb')
381 parser
.parse_lines(stdin
, args
.maxlines
, '-')
385 if os
.path
.isfile(p
):
386 parser
.parse_lines(open(p
, 'rb'), args
.maxlines
, p
)
387 elif os
.path
.isdir(p
):
388 scan_git_subtree(repo
.head
.reference
.commit
.tree
, p
,
391 sys
.stderr
.write('path %s does not exist\n' %p
)
395 scan_git_tree(repo
.head
.commit
.tree
, '.', args
.depth
)
397 ndirs
= len(parser
.spdx_dirs
)
400 for di
in parser
.spdx_dirs
.values():
405 sys
.stderr
.write('\n')
406 sys
.stderr
.write('License files: %12d\n' %spdx
.license_files
)
407 sys
.stderr
.write('Exception files: %12d\n' %spdx
.exception_files
)
408 sys
.stderr
.write('License IDs %12d\n' %len(spdx
.licenses
))
409 sys
.stderr
.write('Exception IDs %12d\n' %len(spdx
.exceptions
))
410 sys
.stderr
.write('\n')
411 sys
.stderr
.write('Files excluded: %12d\n' %parser
.excluded
)
412 sys
.stderr
.write('Files checked: %12d\n' %parser
.checked
)
413 sys
.stderr
.write('Lines checked: %12d\n' %parser
.lines_checked
)
415 pc
= int(100 * parser
.spdx_valid
/ parser
.checked
)
416 sys
.stderr
.write('Files with SPDX: %12d %3d%%\n' %(parser
.spdx_valid
, pc
))
417 sys
.stderr
.write('Files with errors: %12d\n' %parser
.spdx_errors
)
419 sys
.stderr
.write('\n')
420 sys
.stderr
.write('Directories accounted: %8d\n' %ndirs
)
421 pc
= int(100 * dirsok
/ ndirs
)
422 sys
.stderr
.write('Directories complete: %8d %3d%%\n' %(dirsok
, pc
))
424 if ndirs
and ndirs
!= dirsok
and args
.dirs
:
426 sys
.stderr
.write('\n')
427 sys
.stderr
.write('Incomplete directories: SPDX in Files\n')
428 for f
in sorted(parser
.spdx_dirs
.keys()):
429 di
= parser
.spdx_dirs
[f
]
431 valid
= di
.total
- di
.missing
432 pc
= int(100 * valid
/ di
.total
)
433 sys
.stderr
.write(' %-80s: %5d of %5d %3d%%\n' %(f
, valid
, di
.total
, pc
))
435 if ndirs
and ndirs
!= dirsok
and args
.files
:
436 if args
.verbose
or args
.dirs
:
437 sys
.stderr
.write('\n')
438 sys
.stderr
.write('Files without SPDX:\n')
439 for f
in sorted(parser
.spdx_dirs
.keys()):
440 di
= parser
.spdx_dirs
[f
]
441 for f
in sorted(di
.files
):
442 sys
.stderr
.write(' %s\n' %f)
446 except Exception as ex
:
447 sys
.stderr
.write('FAIL: %s\n' %ex)
448 sys
.stderr
.write('%s\n' %traceback
.format_exc())