1#!/usr/bin/env perl
2# SPDX-License-Identifier: GPL-2.0
3#
4# Treewide grep for references to files under Documentation, and report
5# non-existing files in stderr.
6
7use warnings;
8use strict;
9use Getopt::Long qw(:config no_auto_abbrev);
10
11my $scriptname = $0;
12$scriptname =~ s,.*/([^/]+/),$1,;
13
14# Parse arguments
15my $help = 0;
16my $fix = 0;
17
18GetOptions(
19	'fix' => \$fix,
20	'h|help|usage' => \$help,
21);
22
23if ($help != 0) {
24    print "$scriptname [--help] [--fix]\n";
25    exit -1;
26}
27
28# Step 1: find broken references
29print "Finding broken references. This may take a while...  " if ($fix);
30
31my %broken_ref;
32
33my $doc_fix = 0;
34
35open IN, "git grep ':doc:\`' Documentation/|"
36     or die "Failed to run git grep";
37while (<IN>) {
38	next if (!m,^([^:]+):.*\:doc\:\`([^\`]+)\`,);
39
40	my $d = $1;
41	my $doc_ref = $2;
42
43	my $f = $doc_ref;
44
45	$d =~ s,(.*/).*,$1,;
46	$f =~ s,.*\<([^\>]+)\>,$1,;
47
48	$f ="$d$f.rst";
49
50	next if (grep -e, glob("$f"));
51
52	if ($fix && !$doc_fix) {
53		print STDERR "\nWARNING: Currently, can't fix broken :doc:`` fields\n";
54	}
55	$doc_fix++;
56
57	print STDERR "$f: :doc:`$doc_ref`\n";
58}
59close IN;
60
61open IN, "git grep 'Documentation/'|"
62     or die "Failed to run git grep";
63while (<IN>) {
64	next if (!m/^([^:]+):(.*)/);
65
66	my $f = $1;
67	my $ln = $2;
68
69	# On linux-next, discard the Next/ directory
70	next if ($f =~ m,^Next/,);
71
72	# Makefiles and scripts contain nasty expressions to parse docs
73	next if ($f =~ m/Makefile/ || $f =~ m/\.sh$/);
74
75	# Skip this script
76	next if ($f eq $scriptname);
77
78	if ($ln =~ m,\b(\S*)(Documentation/[A-Za-z0-9\_\.\,\~/\*\[\]\?+-]*)(.*),) {
79		my $prefix = $1;
80		my $ref = $2;
81		my $base = $2;
82		my $extra = $3;
83
84		# some file references are like:
85		# /usr/src/linux/Documentation/DMA-{API,mapping}.txt
86		# For now, ignore them
87		next if ($extra =~ m/^{/);
88
89		# Remove footnotes at the end like:
90		# Documentation/devicetree/dt-object-internal.txt[1]
91		$ref =~ s/(txt|rst)\[\d+]$/$1/;
92
93		# Remove ending ']' without any '['
94		$ref =~ s/\].*// if (!($ref =~ m/\[/));
95
96		# Remove puntuation marks at the end
97		$ref =~ s/[\,\.]+$//;
98
99		my $fulref = "$prefix$ref";
100
101		$fulref =~ s/^(\<file|ref)://;
102		$fulref =~ s/^[\'\`]+//;
103		$fulref =~ s,^\$\(.*\)/,,;
104		$base =~ s,.*/,,;
105
106		# Remove URL false-positives
107		next if ($fulref =~ m/^http/);
108
109		# Remove sched-pelt false-positive
110		next if ($fulref =~ m,^Documentation/scheduler/sched-pelt$,);
111
112		# Discard some build examples from Documentation/target/tcm_mod_builder.txt
113		next if ($fulref =~ m,mnt/sdb/lio-core-2.6.git/Documentation/target,);
114
115		# Check if exists, evaluating wildcards
116		next if (grep -e, glob("$ref $fulref"));
117
118		# Accept relative Documentation patches for tools/
119		if ($f =~ m/tools/) {
120			my $path = $f;
121			$path =~ s,(.*)/.*,$1,;
122			next if (grep -e, glob("$path/$ref $path/$fulref"));
123		}
124
125		if ($fix) {
126			if (!($ref =~ m/(scripts|Kconfig|Kbuild)/)) {
127				$broken_ref{$ref}++;
128			}
129		} else {
130			print STDERR "$f: $fulref\n";
131		}
132	}
133}
134close IN;
135
136exit 0 if (!$fix);
137
138# Step 2: Seek for file name alternatives
139print "Auto-fixing broken references. Please double-check the results\n";
140
141foreach my $ref (keys %broken_ref) {
142	my $new =$ref;
143
144	# get just the basename
145	$new =~ s,.*/,,;
146
147	my $f="";
148
149	# usual reason for breakage: DT file moved around
150	if ($ref =~ /devicetree/) {
151		my $search = $new;
152		$search =~ s,^.*/,,;
153		$f = qx(find Documentation/devicetree/ -iname "*$search*") if ($search);
154		if (!$f) {
155			# Manufacturer name may have changed
156			$search =~ s/^.*,//;
157			$f = qx(find Documentation/devicetree/ -iname "*$search*") if ($search);
158		}
159	}
160
161	# usual reason for breakage: file renamed to .rst
162	if (!$f) {
163		$new =~ s/\.txt$/.rst/;
164		$f=qx(find . -iname $new) if ($new);
165	}
166
167	# usual reason for breakage: use dash or underline
168	if (!$f) {
169		$new =~ s/[-_]/[-_]/g;
170		$f=qx(find . -iname $new) if ($new);
171	}
172
173	# Wild guess: seek for the same name on another place
174	if (!$f) {
175		$f = qx(find . -iname $new) if ($new);
176	}
177
178	my @find = split /\s+/, $f;
179
180	if (!$f) {
181		print STDERR "ERROR: Didn't find a replacement for $ref\n";
182	} elsif (scalar(@find) > 1) {
183		print STDERR "WARNING: Won't auto-replace, as found multiple files close to $ref:\n";
184		foreach my $j (@find) {
185			$j =~ s,^./,,;
186			print STDERR "    $j\n";
187		}
188	} else {
189		$f = $find[0];
190		$f =~ s,^./,,;
191		print "INFO: Replacing $ref to $f\n";
192		foreach my $j (qx(git grep -l $ref)) {
193			qx(sed "s\@$ref\@$f\@g" -i $j);
194		}
195	}
196}
197