#!/usr/bin/perl # # datparse: parse .dat file # outputs: WHO VOICE WHEN ACTION SOUND TEXT # notes: does NOT sanitise TEXT # usage: cat *.dat | datparse 2>/dev/null | datsanitise # # expects: # WHO WHEN (comment) state 0 # ACTION state 1 # # n VOICE [x] "text" state 2 # # SOUND.EXT ([x]) "text"? state 2 # ([x]) "text"? state 2 # state 3 # # WHO and WHEN refer to events happening to a certain character # ACTION specifies the event # eg: SOV_AMER10 RANDOM_1 HUNT is said at a random time when a # Sovereign America guy is searching for officers # eg: BLUE_TEAM TOBIAS_S SUSPECT_NEUT is said when Blue sees Tobias Stromm # go down # note that WHO is not the character actually saying the chat, it's the # character who "sees" the event, thus causing the trigger # # n is the order in the dialogue # eg: 1 ELEMENT [N] "TOC, this is entry team. We are in position." # 2 LT [N] "Copy entry team. Go when ready." # both sounds are played in order # # VOICE is the character who actually speaks # eg: ELEMENT shows that it's the element talking # # x? dunno what that's for # # finally we have TEXT, which may span multiple lines, and SOUND, # which is (almost) the name of the .cmp file # s/\.// to get the custom chat filename, add .cmp to THAT to get the filename # $state = 0; while () { print STDERR "!$_"; if (/^\s+([A-Za-z0-9]{8})\.([A-Za-z0-9]{3})\s*/) { $found{"$1.$2"}++; } eval "do_state_$state(\$_)"; } eval "do_state_$state(\$_)"; foreach (keys %found) { print STDERR "Warning: $_ not parsed\n" unless ($parsed{$_} == $found{$_}); } # looking for WHO WHEN and possibly a comment sub do_state_0 { my $line = shift; if ($line =~ /^([A-Z][^\s]+)\s+([^\s]+).*$/) { $state = 1; ($who, $when, $action, $voice, $text, $sound) = (); ($who, $when) = ($1, $2); print STDERR "-> $state: $who, $when\n"; return 1; } return 0; } # looking for ACTION sub do_state_1 { my $line = shift; return if ($line =~ /^\s*$/); if ($line =~ /^\s+([A-Z][^\s]+)\s*$/) { $state = 2; $action = $1; print STDERR "-> $state: $action\n"; } } # looking for VOICE and TEXT or SOUND sub do_state_2 { my $line = shift; if ($line =~ /^\s+\d+\s+([^\s]+)[^("]+([("]+.+)$/) { # look for n VOICE [x] TEXT ($voice, $match) = ($1, $2); chomp $match; $match =~ s/[\s\r]+$/ /; # check for brackets only first, it's easier if ($match =~ /^\(([^)]+)\)[."\s]*$/) { # (Cough, cough.... gasp, cough, cough) print STDERR "bingo 1: '$match'\n"; $text = $1; } elsif ($match =~ /^(\([^)]+\)\s*)?"?(.+)[."\s]*$/) { # (Russian accent) "AHHHHhh!" # (Angry whisper)"Geezzs, (gumble, grumble)..." # "(choke), ah... (cough, cough), ah...(cough, cough, cough)..." print STDERR "bingo 2: '$match' -> '$1' '$2'\n"; ($text = $2) =~ s/"\s*$//; } else { # hmm, couldn't parse this one (SHOULD NEVER GET HERE!) print STDERR "oops: '$match'\n"; } chomp $text; $text =~ s/[\s\r]+$/ /; print STDERR "-> ${state}a: $voice, $text\n"; return 1; # needed if we're called from state 3 } elsif ($line =~ /^\s+\d+\s+([^\s]+)\s\[.\]\s+(.+)$/) { # no quotes at all ($voice, $text) = ($1, $2); chomp $text; $text =~ s/[\s\r]+$/ /; print STDERR "bingo 0: '$text'\n"; } elsif ($line =~ /^\s+([A-Za-z0-9]{8})\.([A-Za-z0-9]{3})\s*(\[.\]\s+)?"?([^"]*)["\s]*$/) { # got SOUND $sound = "$1$2"; $parsed{"$1.$2"}++; # maybe TEXT continues after the filename $text .= $4 if $4; chomp $text; $text =~ s/[\s\r]+$/ /; print STDERR "-> ${state}b: $sound, $text\n"; } elsif ($line =~ /^\s{15,}([^]]+\]\s*)?"?([^"]*)["\s]*$/ && $voice) { # maybe TEXT continues, indented, on a new line $text .= $2; chomp $text; $text =~ s/[\s\r]+$/ /; print STDERR "-> ${state}b: $sound, $text\n"; } elsif ($line =~ /^\s*$/) { $state = 3; print "$who\t$voice\t$when\t$action\t$sound\t$text\n"; } } # looking either for another text or a whole new entity sub do_state_3 { my $line = shift; next if ($line =~ /^\s*$/); # let's try to parse out VOICE and TEXT $state = 2; &do_state_2($line) and return; # no, we're still here, must be WHO WHEN so skip back to state 0 $state = 0; &do_state_0($line); }