#! /bin/sh
# "ixacat2ud" for Linux (64bit), which requires:
#   default-jre libncursesw5

TMP=/tmp/ixakat$$
trap "rm -f $TMP ; exit 2" 1 2 3 15

D=`dirname $0`
case $D in
*/bin) D=`dirname $D` ;;
.) D=`/bin/pwd`
   D=`dirname $D` ;;
*) D=$D/.. ;;
esac
IXA_PREFIX=$D/ixa-pipe-pos-eu
PATH=$IXA_PREFIX/bin:"$PATH"
LD_LIBRARY_PATH=$IXA_PREFIX/lib:`echo $IXA_PREFIX/lib/swipl*/lib/* | tr ' \012' ::`"$LD_LIBRARY_PATH"
export IXA_PREFIX PATH LD_LIBRARY_PATH

( cd $IXA_PREFIX
  ixa-pipe-pos-eu
) | java -jar $D/ixa-pipe-dep-eu/ixa-pipe-dep-eu-2.0.0-exec.jar -b $D/dep-eu-resources-v2.0.0 -c $TMP |
sed -e '/<terms>/,/<\/terms>/d' -e '/<deps>/,/<\/deps>/d' |
java -jar $D/ixa-pipes-1.1.1/ixa-pipe-pos-1.5.1-exec.jar tag -m $D/ixa-pipes-1.1.1/ud-morph-models-1.5.0/eu/eu-pos-perceptron-ud.bin -lm $D/ixa-pipes-1.1.1/ud-morph-models-1.5.0/eu/eu-lemma-perceptron-ud.bin |
( sed -n -e 's/>/ /g' -e 's/^ *<wf id=/# &/p' -e 's/^ *<term id=/# &/p'
  cat $TMP
) | awk '
BEGIN{
  dd["apocmod"]=dd["apoxmod"]="parataxis";
  dd["aponcmod"]="appos";
  dd["auxmod"]=dd["galdemod"]=dd["prtmod"]=dd["xpred"]="aux";
  dd["ccomp_obj"]=dd["xcomp_obj"]="ccomp";
  dd["ccomp_subj"]=dd["xcomp_subj"]="csubj";
  dd["ccomp_zobj"]=dd["xcomp_zobj"]="advcl";
  dd["entios"]="flat";
  dd["gradmod"]="advmod";
  dd["haos"]="compound";
  dd["itj_out"]=dd["itjout"]="vocative";
  dd["lot"]="conj";
  dd["lot_at"]=dd["lotat"]="discourse";
  dd["menos"]="mark";
  dd["ncobj"]="obj";
  dd["ncsubj"]="nsubj";
  dd["nczobj"]="iobj";
  dd["postos"]="nmod";
  uu["PUNCT"]="punct";
  uu["NUM"]="nummod";
  uu["DET"]="det";
  m=n=w=0;
}
{
  if($1=="#"&&$2=="<wf"){
    for(i=3;i<=NF;i++){
      if($i~/^offset=/)
        off[m]=substr($i,9,length($i)-9);
      else if($i~/^length=/)
        ln[m]=substr($i,9,length($i)-9);
    }
    m++
  }
  else if($1=="#"&&$2=="<term"){
    for(i=3;i<=NF;i++){
      if($i~/^morphofeat=/){
	upos[n]=substr($i,13,length($i)-13);
	if(upos[n]=="CONJ")
	  upos[n]="CCONJ"
      }
    }
    n++;
  }
  else if(NF==10&&$1~/^[1-9][0-9]*$/){
    if(off[w+1]-off[w]-ln[w]==0)
      misc="SpaceAfter=No";
    else
      misc="_";
    dep="dep:" $8;
    if($7==0)
      dep="root";
    else if(dd[$8]!="")
      dep=dd[$8];
    else if(uu[upos[w]]!="")
      dep=uu[upos[w]];
    else if($8~/^[cx]mod$/){
      if(upos[$7-$1+w]~/^(VERB|AUX|ADJ|ADV)$/)
	dep="advcl";
      else
	deps="acl";
    }
    else if($8=="ncmod"){
      dep="nmod";
      if(upos[w]=="ADV")
	dep="advmod";
      else if(upos[w]=="ADJ")
	dep="amod";
      else if(upos[$7-$1+w]~/^(VERB|AUX|ADJ|ADV)$/)
	dep="obl";
    }
    else if($8=="ncpred"){
      dep="obj";
      if(upos[w]=="VERB")
	dep="ccomp";
    }
    printf("%d\t%s\t%s\t%s\t%s\t_\t%d\t%s\t_\t%s\n",$1,$2,$3,upos[w],$5,$7,dep,misc);
    w++;
  }
  else
    printf("%s\n",$0);
}'

rm -f $TMP
exit 0
