#!/usr/bin/perl
#
# datsanitise: strip excess whitespace and punctuation from TEXT
# usage: cat *.dat | datparse | datsanitise > output
#

while (<STDIN>) {
  next if /^$/;
  my @incoming = split /\t/;
  if (@incoming != 6) {
    print STDERR "Warning, bogus line: $_";
    next;
  }
  $incoming[5] =~ s/[^()!'"A-Za-z0-9\s]/ /g;
  $incoming[5] =~ s/\s{2,}/ /g;
  print (join "\t", @incoming);
  print "\n";
}
