1cb77f0d6SKamil Rytarowski#!/usr/bin/env perl 2*b2441318SGreg Kroah-Hartman# SPDX-License-Identifier: GPL-2.0 312b31560SH. Peter Anvin# 412b31560SH. Peter Anvin# Clean a text file -- or directory of text files -- of stealth whitespace. 512b31560SH. Peter Anvin# WARNING: this can be a highly destructive operation. Use with caution. 612b31560SH. Peter Anvin# 712b31560SH. Peter Anvin 8cb77f0d6SKamil Rytarowskiuse warnings; 912b31560SH. Peter Anvinuse bytes; 1012b31560SH. Peter Anvinuse File::Basename; 1112b31560SH. Peter Anvin 12cb3ed5b7SH. Peter Anvin# Default options 13cb3ed5b7SH. Peter Anvin$max_width = 79; 14cb3ed5b7SH. Peter Anvin 1512b31560SH. Peter Anvin# Clean up space-tab sequences, either by removing spaces or 1612b31560SH. Peter Anvin# replacing them with tabs. 1712b31560SH. Peter Anvinsub clean_space_tabs($) 1812b31560SH. Peter Anvin{ 1912b31560SH. Peter Anvin no bytes; # Tab alignment depends on characters 2012b31560SH. Peter Anvin 2112b31560SH. Peter Anvin my($li) = @_; 2212b31560SH. Peter Anvin my($lo) = ''; 2312b31560SH. Peter Anvin my $pos = 0; 2412b31560SH. Peter Anvin my $nsp = 0; 2512b31560SH. Peter Anvin my($i, $c); 2612b31560SH. Peter Anvin 2712b31560SH. Peter Anvin for ($i = 0; $i < length($li); $i++) { 2812b31560SH. Peter Anvin $c = substr($li, $i, 1); 2912b31560SH. Peter Anvin if ($c eq "\t") { 3012b31560SH. Peter Anvin my $npos = ($pos+$nsp+8) & ~7; 3112b31560SH. Peter Anvin my $ntab = ($npos >> 3) - ($pos >> 3); 3212b31560SH. Peter Anvin $lo .= "\t" x $ntab; 3312b31560SH. Peter Anvin $pos = $npos; 3412b31560SH. Peter Anvin $nsp = 0; 3512b31560SH. Peter Anvin } elsif ($c eq "\n" || $c eq "\r") { 3612b31560SH. Peter Anvin $lo .= " " x $nsp; 3712b31560SH. Peter Anvin $pos += $nsp; 3812b31560SH. Peter Anvin $nsp = 0; 3912b31560SH. Peter Anvin $lo .= $c; 4012b31560SH. Peter Anvin $pos = 0; 4112b31560SH. Peter Anvin } elsif ($c eq " ") { 4212b31560SH. Peter Anvin $nsp++; 4312b31560SH. Peter Anvin } else { 4412b31560SH. Peter Anvin $lo .= " " x $nsp; 4512b31560SH. Peter Anvin $pos += $nsp; 4612b31560SH. Peter Anvin $nsp = 0; 4712b31560SH. Peter Anvin $lo .= $c; 4812b31560SH. Peter Anvin $pos++; 4912b31560SH. Peter Anvin } 5012b31560SH. Peter Anvin } 5112b31560SH. Peter Anvin $lo .= " " x $nsp; 5212b31560SH. Peter Anvin return $lo; 5312b31560SH. Peter Anvin} 5412b31560SH. Peter Anvin 55cb3ed5b7SH. Peter Anvin# Compute the visual width of a string 56cb3ed5b7SH. Peter Anvinsub strwidth($) { 57cb3ed5b7SH. Peter Anvin no bytes; # Tab alignment depends on characters 58cb3ed5b7SH. Peter Anvin 59cb3ed5b7SH. Peter Anvin my($li) = @_; 60cb3ed5b7SH. Peter Anvin my($c, $i); 61cb3ed5b7SH. Peter Anvin my $pos = 0; 62cb3ed5b7SH. Peter Anvin my $mlen = 0; 63cb3ed5b7SH. Peter Anvin 64cb3ed5b7SH. Peter Anvin for ($i = 0; $i < length($li); $i++) { 65cb3ed5b7SH. Peter Anvin $c = substr($li,$i,1); 66cb3ed5b7SH. Peter Anvin if ($c eq "\t") { 67cb3ed5b7SH. Peter Anvin $pos = ($pos+8) & ~7; 68cb3ed5b7SH. Peter Anvin } elsif ($c eq "\n") { 69cb3ed5b7SH. Peter Anvin $mlen = $pos if ($pos > $mlen); 70cb3ed5b7SH. Peter Anvin $pos = 0; 71cb3ed5b7SH. Peter Anvin } else { 72cb3ed5b7SH. Peter Anvin $pos++; 73cb3ed5b7SH. Peter Anvin } 74cb3ed5b7SH. Peter Anvin } 75cb3ed5b7SH. Peter Anvin 76cb3ed5b7SH. Peter Anvin $mlen = $pos if ($pos > $mlen); 77cb3ed5b7SH. Peter Anvin return $mlen; 78cb3ed5b7SH. Peter Anvin} 79cb3ed5b7SH. Peter Anvin 8012b31560SH. Peter Anvin$name = basename($0); 8112b31560SH. Peter Anvin 82cb3ed5b7SH. Peter Anvin@files = (); 83cb3ed5b7SH. Peter Anvin 84cb3ed5b7SH. Peter Anvinwhile (defined($a = shift(@ARGV))) { 85cb3ed5b7SH. Peter Anvin if ($a =~ /^-/) { 86cb3ed5b7SH. Peter Anvin if ($a eq '-width' || $a eq '-w') { 87cb3ed5b7SH. Peter Anvin $max_width = shift(@ARGV)+0; 88cb3ed5b7SH. Peter Anvin } else { 89cb3ed5b7SH. Peter Anvin print STDERR "Usage: $name [-width #] files...\n"; 90cb3ed5b7SH. Peter Anvin exit 1; 91cb3ed5b7SH. Peter Anvin } 92cb3ed5b7SH. Peter Anvin } else { 93cb3ed5b7SH. Peter Anvin push(@files, $a); 94cb3ed5b7SH. Peter Anvin } 95cb3ed5b7SH. Peter Anvin} 96cb3ed5b7SH. Peter Anvin 97cb3ed5b7SH. Peter Anvinforeach $f ( @files ) { 9812b31560SH. Peter Anvin print STDERR "$name: $f\n"; 9912b31560SH. Peter Anvin 10012b31560SH. Peter Anvin if (! -f $f) { 10112b31560SH. Peter Anvin print STDERR "$f: not a file\n"; 10212b31560SH. Peter Anvin next; 10312b31560SH. Peter Anvin } 10412b31560SH. Peter Anvin 10512b31560SH. Peter Anvin if (!open(FILE, '+<', $f)) { 10612b31560SH. Peter Anvin print STDERR "$name: Cannot open file: $f: $!\n"; 10712b31560SH. Peter Anvin next; 10812b31560SH. Peter Anvin } 10912b31560SH. Peter Anvin 11012b31560SH. Peter Anvin binmode FILE; 11112b31560SH. Peter Anvin 11212b31560SH. Peter Anvin # First, verify that it is not a binary file; consider any file 11312b31560SH. Peter Anvin # with a zero byte to be a binary file. Is there any better, or 11412b31560SH. Peter Anvin # additional, heuristic that should be applied? 11512b31560SH. Peter Anvin $is_binary = 0; 11612b31560SH. Peter Anvin 11712b31560SH. Peter Anvin while (read(FILE, $data, 65536) > 0) { 11812b31560SH. Peter Anvin if ($data =~ /\0/) { 11912b31560SH. Peter Anvin $is_binary = 1; 12012b31560SH. Peter Anvin last; 12112b31560SH. Peter Anvin } 12212b31560SH. Peter Anvin } 12312b31560SH. Peter Anvin 12412b31560SH. Peter Anvin if ($is_binary) { 12512b31560SH. Peter Anvin print STDERR "$name: $f: binary file\n"; 12612b31560SH. Peter Anvin next; 12712b31560SH. Peter Anvin } 12812b31560SH. Peter Anvin 12912b31560SH. Peter Anvin seek(FILE, 0, 0); 13012b31560SH. Peter Anvin 13112b31560SH. Peter Anvin $in_bytes = 0; 13212b31560SH. Peter Anvin $out_bytes = 0; 13312b31560SH. Peter Anvin $blank_bytes = 0; 13412b31560SH. Peter Anvin 13512b31560SH. Peter Anvin @blanks = (); 13612b31560SH. Peter Anvin @lines = (); 137cb3ed5b7SH. Peter Anvin $lineno = 0; 13812b31560SH. Peter Anvin 13912b31560SH. Peter Anvin while ( defined($line = <FILE>) ) { 140cb3ed5b7SH. Peter Anvin $lineno++; 14112b31560SH. Peter Anvin $in_bytes += length($line); 14212b31560SH. Peter Anvin $line =~ s/[ \t\r]*$//; # Remove trailing spaces 14312b31560SH. Peter Anvin $line = clean_space_tabs($line); 14412b31560SH. Peter Anvin 14512b31560SH. Peter Anvin if ( $line eq "\n" ) { 14612b31560SH. Peter Anvin push(@blanks, $line); 14712b31560SH. Peter Anvin $blank_bytes += length($line); 14812b31560SH. Peter Anvin } else { 14912b31560SH. Peter Anvin push(@lines, @blanks); 15012b31560SH. Peter Anvin $out_bytes += $blank_bytes; 15112b31560SH. Peter Anvin push(@lines, $line); 15212b31560SH. Peter Anvin $out_bytes += length($line); 15312b31560SH. Peter Anvin @blanks = (); 15412b31560SH. Peter Anvin $blank_bytes = 0; 15512b31560SH. Peter Anvin } 156cb3ed5b7SH. Peter Anvin 157cb3ed5b7SH. Peter Anvin $l_width = strwidth($line); 158cb3ed5b7SH. Peter Anvin if ($max_width && $l_width > $max_width) { 159cb3ed5b7SH. Peter Anvin print STDERR 160cb3ed5b7SH. Peter Anvin "$f:$lineno: line exceeds $max_width characters ($l_width)\n"; 161cb3ed5b7SH. Peter Anvin } 16212b31560SH. Peter Anvin } 16312b31560SH. Peter Anvin 16412b31560SH. Peter Anvin # Any blanks at the end of the file are discarded 16512b31560SH. Peter Anvin 16612b31560SH. Peter Anvin if ($in_bytes != $out_bytes) { 16712b31560SH. Peter Anvin # Only write to the file if changed 16812b31560SH. Peter Anvin seek(FILE, 0, 0); 16912b31560SH. Peter Anvin print FILE @lines; 17012b31560SH. Peter Anvin 17112b31560SH. Peter Anvin if ( !defined($where = tell(FILE)) || 17212b31560SH. Peter Anvin !truncate(FILE, $where) ) { 17312b31560SH. Peter Anvin die "$name: Failed to truncate modified file: $f: $!\n"; 17412b31560SH. Peter Anvin } 17512b31560SH. Peter Anvin } 17612b31560SH. Peter Anvin 17712b31560SH. Peter Anvin close(FILE); 17812b31560SH. Peter Anvin} 179