1*12b31560SH. Peter Anvin#!/usr/bin/perl -w 2*12b31560SH. Peter Anvin# 3*12b31560SH. Peter Anvin# Clean a text file -- or directory of text files -- of stealth whitespace. 4*12b31560SH. Peter Anvin# WARNING: this can be a highly destructive operation. Use with caution. 5*12b31560SH. Peter Anvin# 6*12b31560SH. Peter Anvin 7*12b31560SH. Peter Anvinuse bytes; 8*12b31560SH. Peter Anvinuse File::Basename; 9*12b31560SH. Peter Anvin 10*12b31560SH. Peter Anvin# 11*12b31560SH. Peter Anvin# Clean up space-tab sequences, either by removing spaces or 12*12b31560SH. Peter Anvin# replacing them with tabs. 13*12b31560SH. Peter Anvinsub clean_space_tabs($) 14*12b31560SH. Peter Anvin{ 15*12b31560SH. Peter Anvin no bytes; # Tab alignment depends on characters 16*12b31560SH. Peter Anvin 17*12b31560SH. Peter Anvin my($li) = @_; 18*12b31560SH. Peter Anvin my($lo) = ''; 19*12b31560SH. Peter Anvin my $pos = 0; 20*12b31560SH. Peter Anvin my $nsp = 0; 21*12b31560SH. Peter Anvin my($i, $c); 22*12b31560SH. Peter Anvin 23*12b31560SH. Peter Anvin for ($i = 0; $i < length($li); $i++) { 24*12b31560SH. Peter Anvin $c = substr($li, $i, 1); 25*12b31560SH. Peter Anvin if ($c eq "\t") { 26*12b31560SH. Peter Anvin my $npos = ($pos+$nsp+8) & ~7; 27*12b31560SH. Peter Anvin my $ntab = ($npos >> 3) - ($pos >> 3); 28*12b31560SH. Peter Anvin $lo .= "\t" x $ntab; 29*12b31560SH. Peter Anvin $pos = $npos; 30*12b31560SH. Peter Anvin $nsp = 0; 31*12b31560SH. Peter Anvin } elsif ($c eq "\n" || $c eq "\r") { 32*12b31560SH. Peter Anvin $lo .= " " x $nsp; 33*12b31560SH. Peter Anvin $pos += $nsp; 34*12b31560SH. Peter Anvin $nsp = 0; 35*12b31560SH. Peter Anvin $lo .= $c; 36*12b31560SH. Peter Anvin $pos = 0; 37*12b31560SH. Peter Anvin } elsif ($c eq " ") { 38*12b31560SH. Peter Anvin $nsp++; 39*12b31560SH. Peter Anvin } else { 40*12b31560SH. Peter Anvin $lo .= " " x $nsp; 41*12b31560SH. Peter Anvin $pos += $nsp; 42*12b31560SH. Peter Anvin $nsp = 0; 43*12b31560SH. Peter Anvin $lo .= $c; 44*12b31560SH. Peter Anvin $pos++; 45*12b31560SH. Peter Anvin } 46*12b31560SH. Peter Anvin } 47*12b31560SH. Peter Anvin $lo .= " " x $nsp; 48*12b31560SH. Peter Anvin return $lo; 49*12b31560SH. Peter Anvin} 50*12b31560SH. Peter Anvin 51*12b31560SH. Peter Anvin$name = basename($0); 52*12b31560SH. Peter Anvin 53*12b31560SH. Peter Anvinforeach $f ( @ARGV ) { 54*12b31560SH. Peter Anvin print STDERR "$name: $f\n"; 55*12b31560SH. Peter Anvin 56*12b31560SH. Peter Anvin if (! -f $f) { 57*12b31560SH. Peter Anvin print STDERR "$f: not a file\n"; 58*12b31560SH. Peter Anvin next; 59*12b31560SH. Peter Anvin } 60*12b31560SH. Peter Anvin 61*12b31560SH. Peter Anvin if (!open(FILE, '+<', $f)) { 62*12b31560SH. Peter Anvin print STDERR "$name: Cannot open file: $f: $!\n"; 63*12b31560SH. Peter Anvin next; 64*12b31560SH. Peter Anvin } 65*12b31560SH. Peter Anvin 66*12b31560SH. Peter Anvin binmode FILE; 67*12b31560SH. Peter Anvin 68*12b31560SH. Peter Anvin # First, verify that it is not a binary file; consider any file 69*12b31560SH. Peter Anvin # with a zero byte to be a binary file. Is there any better, or 70*12b31560SH. Peter Anvin # additional, heuristic that should be applied? 71*12b31560SH. Peter Anvin $is_binary = 0; 72*12b31560SH. Peter Anvin 73*12b31560SH. Peter Anvin while (read(FILE, $data, 65536) > 0) { 74*12b31560SH. Peter Anvin if ($data =~ /\0/) { 75*12b31560SH. Peter Anvin $is_binary = 1; 76*12b31560SH. Peter Anvin last; 77*12b31560SH. Peter Anvin } 78*12b31560SH. Peter Anvin } 79*12b31560SH. Peter Anvin 80*12b31560SH. Peter Anvin if ($is_binary) { 81*12b31560SH. Peter Anvin print STDERR "$name: $f: binary file\n"; 82*12b31560SH. Peter Anvin next; 83*12b31560SH. Peter Anvin } 84*12b31560SH. Peter Anvin 85*12b31560SH. Peter Anvin seek(FILE, 0, 0); 86*12b31560SH. Peter Anvin 87*12b31560SH. Peter Anvin $in_bytes = 0; 88*12b31560SH. Peter Anvin $out_bytes = 0; 89*12b31560SH. Peter Anvin $blank_bytes = 0; 90*12b31560SH. Peter Anvin 91*12b31560SH. Peter Anvin @blanks = (); 92*12b31560SH. Peter Anvin @lines = (); 93*12b31560SH. Peter Anvin 94*12b31560SH. Peter Anvin while ( defined($line = <FILE>) ) { 95*12b31560SH. Peter Anvin $in_bytes += length($line); 96*12b31560SH. Peter Anvin $line =~ s/[ \t\r]*$//; # Remove trailing spaces 97*12b31560SH. Peter Anvin $line = clean_space_tabs($line); 98*12b31560SH. Peter Anvin 99*12b31560SH. Peter Anvin if ( $line eq "\n" ) { 100*12b31560SH. Peter Anvin push(@blanks, $line); 101*12b31560SH. Peter Anvin $blank_bytes += length($line); 102*12b31560SH. Peter Anvin } else { 103*12b31560SH. Peter Anvin push(@lines, @blanks); 104*12b31560SH. Peter Anvin $out_bytes += $blank_bytes; 105*12b31560SH. Peter Anvin push(@lines, $line); 106*12b31560SH. Peter Anvin $out_bytes += length($line); 107*12b31560SH. Peter Anvin @blanks = (); 108*12b31560SH. Peter Anvin $blank_bytes = 0; 109*12b31560SH. Peter Anvin } 110*12b31560SH. Peter Anvin } 111*12b31560SH. Peter Anvin 112*12b31560SH. Peter Anvin # Any blanks at the end of the file are discarded 113*12b31560SH. Peter Anvin 114*12b31560SH. Peter Anvin if ($in_bytes != $out_bytes) { 115*12b31560SH. Peter Anvin # Only write to the file if changed 116*12b31560SH. Peter Anvin seek(FILE, 0, 0); 117*12b31560SH. Peter Anvin print FILE @lines; 118*12b31560SH. Peter Anvin 119*12b31560SH. Peter Anvin if ( !defined($where = tell(FILE)) || 120*12b31560SH. Peter Anvin !truncate(FILE, $where) ) { 121*12b31560SH. Peter Anvin die "$name: Failed to truncate modified file: $f: $!\n"; 122*12b31560SH. Peter Anvin } 123*12b31560SH. Peter Anvin } 124*12b31560SH. Peter Anvin 125*12b31560SH. Peter Anvin close(FILE); 126*12b31560SH. Peter Anvin} 127