xref: /openbmc/linux/scripts/cleanfile (revision 12b315603a1eb95b4e4ea3389ba44bd7ded0aa35)
1*12b31560SH. Peter Anvin#!/usr/bin/perl -w
2*12b31560SH. Peter Anvin#
3*12b31560SH. Peter Anvin# Clean a text file -- or directory of text files -- of stealth whitespace.
4*12b31560SH. Peter Anvin# WARNING: this can be a highly destructive operation.  Use with caution.
5*12b31560SH. Peter Anvin#
6*12b31560SH. Peter Anvin
7*12b31560SH. Peter Anvinuse bytes;
8*12b31560SH. Peter Anvinuse File::Basename;
9*12b31560SH. Peter Anvin
10*12b31560SH. Peter Anvin#
11*12b31560SH. Peter Anvin# Clean up space-tab sequences, either by removing spaces or
12*12b31560SH. Peter Anvin# replacing them with tabs.
13*12b31560SH. Peter Anvinsub clean_space_tabs($)
14*12b31560SH. Peter Anvin{
15*12b31560SH. Peter Anvin    no bytes;			# Tab alignment depends on characters
16*12b31560SH. Peter Anvin
17*12b31560SH. Peter Anvin    my($li) = @_;
18*12b31560SH. Peter Anvin    my($lo) = '';
19*12b31560SH. Peter Anvin    my $pos = 0;
20*12b31560SH. Peter Anvin    my $nsp = 0;
21*12b31560SH. Peter Anvin    my($i, $c);
22*12b31560SH. Peter Anvin
23*12b31560SH. Peter Anvin    for ($i = 0; $i < length($li); $i++) {
24*12b31560SH. Peter Anvin	$c = substr($li, $i, 1);
25*12b31560SH. Peter Anvin	if ($c eq "\t") {
26*12b31560SH. Peter Anvin	    my $npos = ($pos+$nsp+8) & ~7;
27*12b31560SH. Peter Anvin	    my $ntab = ($npos >> 3) - ($pos >> 3);
28*12b31560SH. Peter Anvin	    $lo .= "\t" x $ntab;
29*12b31560SH. Peter Anvin	    $pos = $npos;
30*12b31560SH. Peter Anvin	    $nsp = 0;
31*12b31560SH. Peter Anvin	} elsif ($c eq "\n" || $c eq "\r") {
32*12b31560SH. Peter Anvin	    $lo .= " " x $nsp;
33*12b31560SH. Peter Anvin	    $pos += $nsp;
34*12b31560SH. Peter Anvin	    $nsp = 0;
35*12b31560SH. Peter Anvin	    $lo .= $c;
36*12b31560SH. Peter Anvin	    $pos = 0;
37*12b31560SH. Peter Anvin	} elsif ($c eq " ") {
38*12b31560SH. Peter Anvin	    $nsp++;
39*12b31560SH. Peter Anvin	} else {
40*12b31560SH. Peter Anvin	    $lo .= " " x $nsp;
41*12b31560SH. Peter Anvin	    $pos += $nsp;
42*12b31560SH. Peter Anvin	    $nsp = 0;
43*12b31560SH. Peter Anvin	    $lo .= $c;
44*12b31560SH. Peter Anvin	    $pos++;
45*12b31560SH. Peter Anvin	}
46*12b31560SH. Peter Anvin    }
47*12b31560SH. Peter Anvin    $lo .= " " x $nsp;
48*12b31560SH. Peter Anvin    return $lo;
49*12b31560SH. Peter Anvin}
50*12b31560SH. Peter Anvin
51*12b31560SH. Peter Anvin$name = basename($0);
52*12b31560SH. Peter Anvin
53*12b31560SH. Peter Anvinforeach $f ( @ARGV ) {
54*12b31560SH. Peter Anvin    print STDERR "$name: $f\n";
55*12b31560SH. Peter Anvin
56*12b31560SH. Peter Anvin    if (! -f $f) {
57*12b31560SH. Peter Anvin	print STDERR "$f: not a file\n";
58*12b31560SH. Peter Anvin	next;
59*12b31560SH. Peter Anvin    }
60*12b31560SH. Peter Anvin
61*12b31560SH. Peter Anvin    if (!open(FILE, '+<', $f)) {
62*12b31560SH. Peter Anvin	print STDERR "$name: Cannot open file: $f: $!\n";
63*12b31560SH. Peter Anvin	next;
64*12b31560SH. Peter Anvin    }
65*12b31560SH. Peter Anvin
66*12b31560SH. Peter Anvin    binmode FILE;
67*12b31560SH. Peter Anvin
68*12b31560SH. Peter Anvin    # First, verify that it is not a binary file; consider any file
69*12b31560SH. Peter Anvin    # with a zero byte to be a binary file.  Is there any better, or
70*12b31560SH. Peter Anvin    # additional, heuristic that should be applied?
71*12b31560SH. Peter Anvin    $is_binary = 0;
72*12b31560SH. Peter Anvin
73*12b31560SH. Peter Anvin    while (read(FILE, $data, 65536) > 0) {
74*12b31560SH. Peter Anvin	if ($data =~ /\0/) {
75*12b31560SH. Peter Anvin	    $is_binary = 1;
76*12b31560SH. Peter Anvin	    last;
77*12b31560SH. Peter Anvin	}
78*12b31560SH. Peter Anvin    }
79*12b31560SH. Peter Anvin
80*12b31560SH. Peter Anvin    if ($is_binary) {
81*12b31560SH. Peter Anvin	print STDERR "$name: $f: binary file\n";
82*12b31560SH. Peter Anvin	next;
83*12b31560SH. Peter Anvin    }
84*12b31560SH. Peter Anvin
85*12b31560SH. Peter Anvin    seek(FILE, 0, 0);
86*12b31560SH. Peter Anvin
87*12b31560SH. Peter Anvin    $in_bytes = 0;
88*12b31560SH. Peter Anvin    $out_bytes = 0;
89*12b31560SH. Peter Anvin    $blank_bytes = 0;
90*12b31560SH. Peter Anvin
91*12b31560SH. Peter Anvin    @blanks = ();
92*12b31560SH. Peter Anvin    @lines  = ();
93*12b31560SH. Peter Anvin
94*12b31560SH. Peter Anvin    while ( defined($line = <FILE>) ) {
95*12b31560SH. Peter Anvin	$in_bytes += length($line);
96*12b31560SH. Peter Anvin	$line =~ s/[ \t\r]*$//;		# Remove trailing spaces
97*12b31560SH. Peter Anvin	$line = clean_space_tabs($line);
98*12b31560SH. Peter Anvin
99*12b31560SH. Peter Anvin	if ( $line eq "\n" ) {
100*12b31560SH. Peter Anvin	    push(@blanks, $line);
101*12b31560SH. Peter Anvin	    $blank_bytes += length($line);
102*12b31560SH. Peter Anvin	} else {
103*12b31560SH. Peter Anvin	    push(@lines, @blanks);
104*12b31560SH. Peter Anvin	    $out_bytes += $blank_bytes;
105*12b31560SH. Peter Anvin	    push(@lines, $line);
106*12b31560SH. Peter Anvin	    $out_bytes += length($line);
107*12b31560SH. Peter Anvin	    @blanks = ();
108*12b31560SH. Peter Anvin	    $blank_bytes = 0;
109*12b31560SH. Peter Anvin	}
110*12b31560SH. Peter Anvin    }
111*12b31560SH. Peter Anvin
112*12b31560SH. Peter Anvin    # Any blanks at the end of the file are discarded
113*12b31560SH. Peter Anvin
114*12b31560SH. Peter Anvin    if ($in_bytes != $out_bytes) {
115*12b31560SH. Peter Anvin	# Only write to the file if changed
116*12b31560SH. Peter Anvin	seek(FILE, 0, 0);
117*12b31560SH. Peter Anvin	print FILE @lines;
118*12b31560SH. Peter Anvin
119*12b31560SH. Peter Anvin	if ( !defined($where = tell(FILE)) ||
120*12b31560SH. Peter Anvin	     !truncate(FILE, $where) ) {
121*12b31560SH. Peter Anvin	    die "$name: Failed to truncate modified file: $f: $!\n";
122*12b31560SH. Peter Anvin	}
123*12b31560SH. Peter Anvin    }
124*12b31560SH. Peter Anvin
125*12b31560SH. Peter Anvin    close(FILE);
126*12b31560SH. Peter Anvin}
127