#!/usr/bin/perl # # datsanitise: strip excess whitespace and punctuation from TEXT # usage: cat *.dat | datparse | datsanitise > output # while () { next if /^$/; my @incoming = split /\t/; if (@incoming != 6) { print STDERR "Warning, bogus line: $_"; next; } $incoming[5] =~ s/[^()!'"A-Za-z0-9\s]/ /g; $incoming[5] =~ s/\s{2,}/ /g; print (join "\t", @incoming); print "\n"; }