#!/usr/local/bin/perl
use Getopt::Std;
getopts('t');
print doc_start();
$n = 0;
while (<>) {
s/.-SBAR$/O/;
s/I-PC$/0/ if (/I-PC$/ && !$inside_pp);
if (/^(.*)-(.*)\t(.*)\/(.*)$/) {
$token[$n] = $1;
$tag[$n] = $2;
$chunk[$n] = $4;
if ($chunk[$n] =~ /^(.*)-(.*)$/) {
$flag[$n] = $1;
$chunk[$n] = $2;
}
else {
undef $flag[$n];
undef $chunk[$n];
}
print_sentence() if $token[$n] eq '.';
$n++;
}
else {
$markup[$n] .= $_;
}
}
print_sentence();
print doc_end();
sub print_sentence {
my($i,$chunk);
for( $i=0; $i<=$n; $i++ ) {
if ($flag[$i] eq 'I' && $chunk ne $chunk[$i]) {
$flag[$i] = 'B';
}
if ($flag[$i] eq 'B') {
if (defined $chunk) {
$cetags[$i-1] .= end_tag($chunk);
}
if ($chunk[$i] eq 'PC') {
for( $k=$i+1; $k<=$n; $k++ ) {
last if ($flag[$k] eq 'B');
}
for( $k++; $k<=$n; $k++ ) {
last if ($flag[$k] ne 'I');
}
if ($k <= $n && $flag[$k] eq 'E' && $chunk[$k] eq 'PC') {
$markup[$k+1] .= end_tag('PC');
undef $flag[$k];
undef $chunk[$k];
}
else {
$markup[$k] .= end_tag('PC');
}
undef $chunk;
}
else {
$chunk = $chunk[$i];
}
$cbtags[$i] .= start_tag($chunk[$i]);
}
elsif ($flag[$i] eq 'E') {
if ($chunk[$i] eq $chunk) {
$cetags[$i] .= end_tag($chunk);
undef $chunk;
}
elsif ($chunk[$i] eq 'PC') {
$cetags[$i-1] .= end_tag($chunk) if defined $chunk;
$cetags[$i] .= end_tag("PC");
my $k;
for( $k=$i; $k>=0; $k-- ) {
if ($flag[$k] eq 'B') {
$cbtags[$k] = start_tag("PC").$cbtags[$k];
last;
}
}
undef $chunk;
}
else {
die;
}
}
elsif ($flag[$i] ne 'I' && defined $chunk) {
$cetags[$i-1] .= end_tag($chunk);
undef $chunk;
}
}
for( $i=0; $i<=$n; $i++ ) {
print $markup[$i];
print $cbtags[$i];
print token_and_tag($token[$i],$tag[$i]) if defined $token[$i];
print $cetags[$i];
}
undef @token;
undef @tag;
undef @chunk;
undef @cbtags;
undef @cetags;
undef @flag;
undef @markup;
}
sub doc_start {
return '' unless defined $opt_t;
return "\n\n";
}
sub doc_end {
return '' unless defined $opt_t;
return "\n";
}
sub start_tag {
my $t=shift;
return "<$t>\n" unless defined $opt_t;
return " \n";
}
sub end_tag {
my $t=shift;
return "$t>\n" unless defined $opt_t;
return " \n";
}
sub token_and_tag {
my ($token,$tag)=@_;
return "$token\t$tag\n" unless defined $opt_t;
return " \n";
}