Merge branch 'tca-do-not-fail-if-image-does-not-exist' into 'stable'
[tails.git] / bin / doc-impacted-by
blob84f1d246fe09ac9ae6ebaebcea963595b7a37cf4
1 #!/usr/bin/env ruby
2 require 'deep_merge'
3 require 'English'
4 require 'git'
5 require 'optparse'
6 require 'yaml'
8 require 'test/unit'
9 Test::Unit.run = true
10 # Make all the assert_* methods easily accessible.
11 include Test::Unit::Assertions # rubocop:disable Style/MixinUsage
13 # The Ruby Git module we use needs the Git root directory, and this
14 # prevents it from being able to run the command below.
15 GIT_DIR = `git rev-parse --show-toplevel`.chomp
16 assert_equal(0, $CHILD_STATUS.exitstatus)
17 DEFAULT_RELATIONSHIP_FILE = "#{GIT_DIR}/doc-source-relationships.yml".freeze
19 class Object
20 def arrayify
21 instance_of?(Array) ? self : [self]
22 end
23 end
25 class Array
26 def glob(glob)
27 self.select do |e|
28 e.instance_of?(String) && File.fnmatch(glob, e, File::FNM_EXTGLOB)
29 end
30 end
31 end
33 def parse_argv!
34 options = {}
35 opt_parser = OptionParser.new do |opts|
36 opts.banner = 'Usage: [OPTION]... COMMITISH1 COMMITISH2 MANIFEST1 MANIFEST2'
37 opts.separator ''
38 opts.separator 'Produces a list of documentation pages that might need ' \
39 'attention due to the changes from COMMITISH1 to ' \
40 'COMMITISH2. The corresponding .build-manifest files must ' \
41 'be passed as MANIFEST1 and MANIFEST2.'
42 opts.separator ''
43 opts.separator 'Example:'
44 opts.separator ' bin/doc-impacted-by 3.0 3.2 ' \
45 'tails-amd64-3.0.build-manifest ' \
46 'tails-amd64-3.2.build-manifest'
47 opts.separator ''
48 opts.separator 'Options:'
50 opts.on('-h', '--help', 'Show this message') do
51 puts opts
52 exit
53 end
55 opts.on('-f PATH', '--relationship-file=PATH',
56 'Use a custom PATH for the doc-source relationship description ' \
57 "file (default: #{File.basename(DEFAULT_RELATIONSHIP_FILE)} in " \
58 'the Git root)') do |path|
59 options['relationship-file'] = path
60 end
62 opts.on('-s', '--skip-packages', 'Skip looking at packages, ' \
63 'only look at Git') do
64 options['skip-packages'] = true
65 end
66 end
67 parameters = opt_parser.parse(ARGV)
68 req_nr_parameters = options['skip-packages'] ? 2 : 4
69 assert_equal(req_nr_parameters, parameters.size,
70 "You must pass exactly #{req_nr_parameters} parameters")
71 [options, parameters]
72 end
74 # From a .build-manifest, from its list of packages, generate a
75 # Hash mapping `package` to a Hash containing the remaining package
76 # fields from the .build-manifest (e.g. `arch`, `version`).
77 def read_package_manifest_file_as_package_map(path)
78 package_manifest = YAML.safe_load(File.read(path))
79 packages = package_manifest['packages']['binary'] +
80 package_manifest['packages']['source']
81 packages
82 .map do |entry|
84 entry['package'],
85 entry.clone.delete_if { |k, _| k == 'package' },
87 end
88 .to_h
89 end
91 def canonicalize_relationship(orig_entry)
92 entry = orig_entry.clone
93 field_abbreviations = {
94 'file' => 'files',
95 'package' => 'packages',
96 'page' => 'pages',
97 'test' => 'tests',
99 fields = field_abbreviations.values
100 field_abbreviations.each do |short, long|
101 next unless entry.key?(short)
103 assert(!entry.key?(long),
104 "contains both '#{long}' and its abbreviation '#{short}'")
105 v = entry[short]
106 entry.delete(short)
107 entry[long] = v
109 assert(entry.key?('pages'),
110 "lacks the obligatory 'pages' field")
111 assert(entry.keys.size > 1,
112 "entries with only a 'pages' field are meaningless")
113 # Note: `(a - b).empty?` <==> "a is a subset of b?"
114 assert((entry.keys - fields).empty?,
115 "contains invalid fields: #{entry.keys - fields}")
116 fields.each do |field|
117 next unless entry.key?(field)
119 entry[field] = entry[field].arrayify
121 entry
122 rescue Exception => e
123 warn 'Problematic entry:'
124 warn YAML.dump([orig_entry])
125 STDERR.puts
126 raise e
129 # Reads the `relationship_file` and returns a "documentation impact
130 # map", a Hash which maps all documentation pages to the sources it is
131 # impacted by.
132 def read_relationship_file_as_impact_map(relationship_file)
133 impact_map = {}
134 relationships = YAML.safe_load(File.read(relationship_file))
135 relationships.map { |e| canonicalize_relationship(e) } .each do |entry|
136 entry['pages'].each do |page|
137 source_files = entry.clone.delete_if { |k, _| k == 'pages' }
138 impact_map.deep_merge({ page => source_files })
141 impact_map
144 # Given the "documentation impact map" and the "old" and "new" state,
145 # look at the changes between "old" and "new" and find which
146 # documentation pages are impacted. The return value is a mapping
147 # from each affected documentation page to the list of "reasons",
148 # explanations how the sources impact the page.
149 def find_impacted_docs(impact_map,
150 old_commit, new_commit,
151 old_manifest, new_manifest)
152 git = Git.open(GIT_DIR)
153 git_diff = git.diff(old_commit, new_commit)
154 # Create the list of all wiki files, and use it as an approximation
155 # of all documentation pages. It's a super set, so it only impacts
156 # performance when we search in it later. Ideally we'd like to do
157 # something like `git.object(new_commit).path('wiki/src')` but the
158 # Git module we use seem to not support listing files at a certain
159 # commit.
160 git_cmd_wiki_files = 'git ls-tree -r --full-tree ' \
161 "--name-only #{new_commit} -- wiki/src"
162 doc_pages = `#{git_cmd_wiki_files}`.chomp.split("\n")
163 assert_equal(0, $CHILD_STATUS.exitstatus, 'Error: `git ls-tree` failed')
165 old_packages = old_manifest.keys
166 new_packages = new_manifest.keys
167 removed_packages = old_packages - new_packages
168 introduced_packages = new_packages - old_packages
169 updated_packages = (new_packages & old_packages).reject do |package|
170 old_manifest[package] == new_manifest[package]
173 impacted_docs = {}
174 impact_map.each do |page, sources|
175 file_paths = []
176 package_globs = []
177 test_paths = []
178 sources.each do |type, source|
179 case type
180 when 'packages'
181 package_globs = source
182 when 'tests'
183 test_paths = source.map { |path| "features/#{path}" }
184 when 'files'
185 file_paths = source
186 else
187 raise "Unknown field '#{type}' in impact map; this should not " \
188 'happen, and probably means canonicalize_relationship() ' \
189 'is buggy'
192 all_source_file_paths = file_paths + test_paths
193 doc_pages.glob("wiki/src/#{page}.{html,mdwn}").each do |page_path|
194 all_source_file_paths.each do |source_path|
195 # Git::Diff#path() alters the object so it cannot be used for a
196 # successive call for another path.
197 source_path_diff = git_diff.clone.path(source_path)
198 next if source_path_diff.empty?
200 changed_files = source_path_diff.map(&:path)
201 reasons = changed_files.map do |path|
202 "Changes in source file: #{path}"
204 impacted_docs.deep_merge({ page_path => reasons })
206 package_globs.each do |package_glob|
207 reasons = []
208 removed_impacted_packages = removed_packages.glob(package_glob)
209 introduced_impacted_packages = introduced_packages.glob(package_glob)
210 updated_impacted_packages = updated_packages.glob(package_glob)
211 reasons += removed_impacted_packages.map do |package|
212 "Removed package: #{package}"
214 reasons += introduced_impacted_packages.map do |package|
215 "Introduced package: #{package}"
217 reasons += updated_impacted_packages.map do |package|
218 old = old_manifest[package]
219 new = new_manifest[package]
220 assert_not_equal(
221 old, new,
222 "'#{package}' has identical data in both manifests so it is " \
223 'a bug that we ended up here'
225 package_changes = old_manifest[package]
226 .keys.sort.map do |key|
227 old_val = old[key]
228 new_val = new[key]
229 old_val != new_val ? "#{old_val} → #{new_val}" : nil
231 .compact.join(', ')
232 "Updated package: #{package} (#{package_changes})"
234 impacted_docs.deep_merge({ page_path => reasons }) unless reasons.empty?
238 impacted_docs
241 # Main
243 options, parameters = parse_argv!
244 relationship_file = options['relationship-file'] || DEFAULT_RELATIONSHIP_FILE
245 old_commit, new_commit, old_manifest_path, new_manifest_path = parameters
247 impact_map = read_relationship_file_as_impact_map(relationship_file)
248 if options['skip-packages']
249 old_manifest = {}
250 new_manifest = {}
251 else
252 old_manifest = read_package_manifest_file_as_package_map(old_manifest_path)
253 new_manifest = read_package_manifest_file_as_package_map(new_manifest_path)
255 impacted_docs = find_impacted_docs(
256 impact_map,
257 old_commit, new_commit,
258 old_manifest, new_manifest
261 unless impacted_docs.empty?
262 result =
263 impacted_docs
264 .sort
265 .map do |page, reasons|
266 "#{page}\n" +
267 reasons
268 .sort.map do |reason|
269 "- #{reason}"
271 .join("\n")
273 .join("\n\n")
275 puts 'The following documentation pages need investigation:'
276 puts
277 puts result
279 if options['skip-packages']
280 warn 'Warning! The --skip-packages option makes this ' \
281 'report incomplete!'