#!/usr/local/bin/perl use Getopt::Std; getopts('t'); print doc_start(); $n = 0; while (<>) { s/.-SBAR$/O/; s/I-PC$/0/ if (/I-PC$/ && !$inside_pp); if (/^(.*)-(.*)\t(.*)\/(.*)$/) { $token[$n] = $1; $tag[$n] = $2; $chunk[$n] = $4; if ($chunk[$n] =~ /^(.*)-(.*)$/) { $flag[$n] = $1; $chunk[$n] = $2; } else { undef $flag[$n]; undef $chunk[$n]; } print_sentence() if $token[$n] eq '.'; $n++; } else { $markup[$n] .= $_; } } print_sentence(); print doc_end(); sub print_sentence { my($i,$chunk); for( $i=0; $i<=$n; $i++ ) { if ($flag[$i] eq 'I' && $chunk ne $chunk[$i]) { $flag[$i] = 'B'; } if ($flag[$i] eq 'B') { if (defined $chunk) { $cetags[$i-1] .= end_tag($chunk); } if ($chunk[$i] eq 'PC') { for( $k=$i+1; $k<=$n; $k++ ) { last if ($flag[$k] eq 'B'); } for( $k++; $k<=$n; $k++ ) { last if ($flag[$k] ne 'I'); } if ($k <= $n && $flag[$k] eq 'E' && $chunk[$k] eq 'PC') { $markup[$k+1] .= end_tag('PC'); undef $flag[$k]; undef $chunk[$k]; } else { $markup[$k] .= end_tag('PC'); } undef $chunk; } else { $chunk = $chunk[$i]; } $cbtags[$i] .= start_tag($chunk[$i]); } elsif ($flag[$i] eq 'E') { if ($chunk[$i] eq $chunk) { $cetags[$i] .= end_tag($chunk); undef $chunk; } elsif ($chunk[$i] eq 'PC') { $cetags[$i-1] .= end_tag($chunk) if defined $chunk; $cetags[$i] .= end_tag("PC"); my $k; for( $k=$i; $k>=0; $k-- ) { if ($flag[$k] eq 'B') { $cbtags[$k] = start_tag("PC").$cbtags[$k]; last; } } undef $chunk; } else { die; } } elsif ($flag[$i] ne 'I' && defined $chunk) { $cetags[$i-1] .= end_tag($chunk); undef $chunk; } } for( $i=0; $i<=$n; $i++ ) { print $markup[$i]; print $cbtags[$i]; print token_and_tag($token[$i],$tag[$i]) if defined $token[$i]; print $cetags[$i]; } undef @token; undef @tag; undef @chunk; undef @cbtags; undef @cetags; undef @flag; undef @markup; } sub doc_start { return '' unless defined $opt_t; return "\n\n"; } sub doc_end { return '' unless defined $opt_t; return "\n"; } sub start_tag { my $t=shift; return "<$t>\n" unless defined $opt_t; return " \n"; } sub end_tag { my $t=shift; return "\n" unless defined $opt_t; return " \n"; } sub token_and_tag { my ($token,$tag)=@_; return "$token\t$tag\n" unless defined $opt_t; return " \n"; }