#!/bin/sh # Set these paths appropriately BIN=./bin CMD=./cmd LIB=./lib TOKENIZER=${BIN}/separate-punctuation ABBR_LIST=${LIB}/english-abbreviations TAGGER=${BIN}/tree-tagger PARFILE1=${LIB}/english.par PARFILE2=${LIB}/english-chunker.par FILTER=${CMD}/filter-chunker-output.perl # put all on one line cat $* | # do tokenization $TOKENIZER +1 +s +l $ABBR_LIST | # remove empty lines grep -v '^$' | # tagging $TAGGER $PARFILE1 -token -sgml $* | perl -nae 'if ($#F==0){print}else{print "$F[0]-$F[1]\n"}' | $TAGGER $PARFILE2 -token -sgml -eps 0.00000001 -quiet | $FILTER