/ - Diff - Plateforme TXM - Forge du Centre Blaise Pascal

Révision 3567

     <?xml version="1.0" encoding="UTF-8"?>
     <projectDescription>
     	<name>org.txm.conllu.core</name>
     	<comment></comment>
     	<projects>
     	</projects>
     	<buildSpec>
     		<buildCommand>
     			<name>org.eclipse.jdt.core.javabuilder</name>
     			<arguments>
     			</arguments>
     		</buildCommand>
     		<buildCommand>
     			<name>org.eclipse.pde.ManifestBuilder</name>
     			<arguments>
     			</arguments>
     		</buildCommand>
     		<buildCommand>
     			<name>org.eclipse.pde.SchemaBuilder</name>
     			<arguments>
     			</arguments>
     		</buildCommand>
     	</buildSpec>
     	<natures>
     		<nature>org.eclipse.jdt.groovy.core.groovyNature</nature>
     		<nature>org.eclipse.pde.PluginNature</nature>
     		<nature>org.eclipse.jdt.core.javanature</nature>
     	</natures>
     </projectDescription>

     <!-- The Identity Transformation -->
     <xsl:stylesheet version="2.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:tei="http://www.tei-c.org/ns/1.0" xmlns:xs="http://www.w3.org/2001/XMLSchema">
       <!-- This stylesheet comments out the sentences containing more than 100 terminal nodes in Tiger XML files.
         To change the theshold, puse the maxLength parameter.
       Written by A. Lavrentiev, CNRS, UMR IHRIM 2021-07-16
       Licence: GNU GPL v.3
       -->
       <!-- Whenever you match any node or any attribute -->
       <xsl:template match="node()|@*">
         <!-- Copy the current node -->
         <xsl:copy>
           <!-- Including any attributes it has and any child nodes -->
           <xsl:apply-templates select="@*|node()"/>
         </xsl:copy>
       </xsl:template>
       <xsl:param name="maxLength" as="xs:integer">100</xsl:param>
       <xsl:template match="s">
         <xsl:choose>
           <xsl:when test="count(graph/terminals/t) gt $maxLength">
             <xsl:comment>
               Sentence too long (<xsl:value-of select="count(graph/terminals/t)"/> tokens) :
               <xsl:for-each select="graph/terminals/t"><xsl:value-of select="@word"/><xsl:text> </xsl:text></xsl:for-each>
             </xsl:comment>
           </xsl:when>
           <xsl:otherwise>
             <xsl:copy-of select="."/>
           </xsl:otherwise>
         </xsl:choose>
       </xsl:template>
     </xsl:stylesheet>

     package org.txm.scripts.importer.conllu
     import org.txm.Toolbox
     import org.txm.importer.xtz.ImportModule;
     import org.txm.metadatas.Metadatas
     import org.txm.utils.io.FileCopy
     import org.txm.utils.io.IOUtils
     import org.txm.importer.xtz.*
     import org.txm.scripts.importer.xtz.*
     import org.txm.conllu.core.function.ImportCoNLLUAnnotations
     import org.txm.conllu.core.preferences.UDPreferences
     import org.txm.importer.ApplyXsl2;
     import javax.xml.stream.*
     import org.txm.utils.AsciiUtils
     import org.txm.utils.ConsoleProgressBar
     import org.txm.utils.FileUtils
     import org.txm.conllu.core.preferences.UDPreferences
     /**
      * Only build the Metadatas object since all XML-TXM files already exists.
      * Metadatas is used to build text order.
+     *
+     *
      * @author mdecorde
+     *
      */
     class CoNLLUImporter extends XTZImporter {
     	public CoNLLUImporter(ImportModule module) {
     		super(module);
+    	}
     	@Override
     	public void process() {
     		File connluSrcDirectory = inputDirectory
     		boolean usenewdocid =  UDPreferences.getInstance().getString(UDPreferences.IMPORT_USE_NEW_DOC_ID); // THE conllu -> Tiger XSL MUST HAVE THE SAME BEHAVIOR BEFORE //
     		if (usenewdocid) {
     			connluSrcDirectory = new File(outputDirectory.getParentFile().getParentFile(), "conllu")
     			connluSrcDirectory.deleteDir();
     			connluSrcDirectory.mkdirs();
     			println "Convert CoNLL-U to XML-TEI..."
     			if (!splitCoNLLUFiles(inputDirectory, connluSrcDirectory, project)) {
     				return;
+    			}
+    		}
     		File srcDirectory = new File(outputDirectory.getParentFile().getParentFile(), "conllu2tei")
     		srcDirectory.deleteDir();
     		srcDirectory.mkdirs();
     		println "Convert CoNLL-U to XML-TEI..."
     		convertCoNLLU2TEI(connluSrcDirectory, srcDirectory, project)
     		inputDirectory = srcDirectory // switch source directory
     		super.process();
+    	}
     	public static def splitCoNLLUFiles(File inputDirectory, File srcDirectory, def project) {
     		def files = inputDirectory.listFiles()
     		if (files == null) {
     			println "Aborting. No CONLL file found in $inputDirectory."
     			return false
+    		}
     		ConsoleProgressBar cpb_texts = new ConsoleProgressBar(files.size())
     		println "Splitting CoNLL-U files..."
     		for (File master : files) {
     			cpb_texts.tick()
     			if (!master.getName().endsWith(".conllu")) {
     				continue;
+    			}
     			String orig_text_id = FileUtils.stripExtension(master)
     			String current_text_id = FileUtils.stripExtension(master)
     			File conlluFile = new File(srcDirectory, current_text_id+".conllu")
     			def writer = conlluFile.newWriter("UTF-8", true)
     			master.eachLine("UTF-8") { line ->
     				if (line.startsWith("# newdoc id = ")) {
     					String text_id = line.substring("# newdoc id = ".length())
     					if (!text_id.equals(current_text_id)) {
     						writer.close()
     						current_text_id = text_id
     						conlluFile = new File(srcDirectory, current_text_id+".conllu")
     						writer = conlluFile.newWriter("UTF-8", true)
+    					}
+    				}
     				writer.println(line)
+    			}
+    		}
     		cpb_texts.done()
     		return true
+    	}
     	public static def convertCoNLLU2TEI(File inputDirectory, File srcDirectory, def project) {
     		def files = inputDirectory.listFiles()
     		if (files == null) {
     			println "Aborting. No CONLL file found in $inputDirectory."
     			return false
+    		}
     		def properties = Arrays.asList(ImportCoNLLUAnnotations.UD_PROPERTY_NAMES)
     		String prefix = UDPreferences.getInstance().getProjectPreferenceValue(project, UDPreferences.UDPREFIX, UDPreferences.getInstance().getString(UDPreferences.UDPREFIX));
     		UDPreferences.getInstance().setProjectPreferenceValue(project, UDPreferences.UDPREFIX, prefix); // copy the current preference into the corpus preference
     		boolean keepContractions =  UDPreferences.getInstance().getString(UDPreferences.KEEP_CONTRACTIONS)
     		def headPropertiesToProject = UDPreferences.getInstance().getString(UDPreferences.IMPORT_HEAD_TO_PROJECT).split(",") as Set
     		def depsPropertiesToProject = UDPreferences.getInstance().getString(UDPreferences.IMPORT_DEPS_TO_PROJECT).split(",") as Set
     		ConsoleProgressBar cpb_texts = new ConsoleProgressBar(files.size())
     		println "Parsing CoNLL-U files..."
     		for (File master : files) {
     			cpb_texts.tick()
     			if (!master.getName().endsWith(".conllu")) {
     				continue;
+    			}
     			def content = []; // list of sentence
     			String text_id = FileUtils.stripExtension(master);
     			String sent_id = "";
     			String par_id = "1";
     			def comments = []; // /text/par/sent
     			def words = []
     			master.eachLine("UTF-8") { line ->
     				if (line.startsWith("# newdoc id = ")) {
     					// already set or ignored
     				} else if (line.startsWith("# sent_id = ")) {
     					sent_id = line.substring("# sent_id = ".length())
     				} else if (line.startsWith("# newpar id = ")) {
     					par_id = line.substring("# newpar id = ".length())
     				} else if (line.startsWith("#")) {
     					comments << line
     				} else if (line.trim().isEmpty()) {
     					if (words.size() > 0) {
     						def sentence = [par_id, sent_id, words, comments]
     						content.add(sentence)
     						sent_id = "";
     						par_id = "1";
     						comments = [];
     						words = []
+    					}
     				} else {
     					HashMap<String, String> wProperties = new HashMap<String, String>()
     					def split = line.split("\t")
     					if (split.size() == properties.size()) {
     						String id = split[0]
     						for (int i = 0 ; i < split.size() ; i++) {
     							wProperties[properties[i]] = split[i]
+    						}
     						if (wProperties.get("id").equals("1")) { // it's a new sentence, store the current if any and starts a new sentence
     							if (words.size() > 0) {
     								def sentence = [par_id, sent_id, words, comments]
     								content.add(sentence)
     								sent_id = "";
     								par_id = "1";
     								comments = [];
     								words = []
+    							}
+    						}
     						words << wProperties
     					} else {
     						//println "Warning: not a line: "+line
+    					}
+    				}
+    			}
     			if (content.size() == 0) {
     				continue;
+    			}
     			File xmlFile = new File(srcDirectory, text_id+".xml")
     			// println "xmlFile=$xmlFile"
     			BufferedOutputStream output = new BufferedOutputStream(new FileOutputStream(xmlFile))
     			XMLOutputFactory factory = XMLOutputFactory.newInstance()
     			XMLStreamWriter writer = factory.createXMLStreamWriter(output, "UTF-8")
     			writer.writeStartDocument("UTF-8","1.0")
     			writer.writeStartElement ("TEI")
     			writer.writeDefaultNamespace("http://www.tei-c.org/ns/1.0")
     			writer.writeNamespace("txm", "http://textometrie.org/1.0")
     			writer.writeCharacters("\n")
     			writer.writeStartElement ("teiHeader")
     			writer.writeEndElement()
     			writer.writeCharacters("\n")
     			writer.writeStartElement ("text")
     			writer.writeCharacters("\n")
     			String current_par_id = null
     			for (def sentence : content) { // for all paragraph of the current text
     				par_id = sentence[0]
     				sent_id = sentence[1]
     				words = sentence[2]
     				comments = sentence[3]
     				if (current_par_id == null || par_id != current_par_id) {
     					if (current_par_id != null) {
     						writer.writeEndElement() // p
+    					}
     					writer.writeStartElement ("p");
     					writer.writeAttribute("id", par_id)
     					writer.writeCharacters("\n")
     					current_par_id = par_id
+    				}
     				writer.writeStartElement ("s")
     				writer.writeAttribute("id", sent_id)
     				writer.writeCharacters("\n")
     				for (def comment : comments) {
     					writer.writeComment(comment.replace("--", "―"))
     					writer.writeCharacters("\n")
+    				}
     				if (!keepContractions) { // merge properties in the "-" word and remove the parts
     					for (int i = 0 ; i < words.size() ; i++) {
     						def word = words[i]
     						String id = word[0]
     						if (id.contains("-")) { // multi-word line
     							int index = id.indexOf("-")
     							String id1 = id.substring(0, index)
     							String id2 = id.substring(index+1)
     							def token1 = sentence[id1]
     							def token2 = sentence[id2]
     							if (token1 == null || token2 == null) {
     								println "Error: text $text_id paragraph $par_id sent $sent_id word $id has wrong token ids $id1 and $id2 -> $token1 and $token2"
     								continue
+    							}
     							for (String p : properties) {
     								if (p == "id") continue // don't merge the form property
     									if (p == "form") continue // don't merge the form property
     									word[p] =  token1[p] + "+" + token2[p]
+    							}
     							words.remove(i+1) // remove the token
     							words.remove(i+1) // remove the token
+    						}
+    					}
+    				}
     				if (headPropertiesToProject.size() > 0 || depsPropertiesToProject.size() > 0) {
     					LinkedHashMap sentencehash = new LinkedHashMap()
     					//println "WORDS="+words
     					for (def word : words) {
     						sentencehash[word["id"]] = word
+    					}
     					//println "SENTENCE="+sentencehash
     					ImportCoNLLUAnnotations.buildPropertiesProjections(sentencehash, headPropertiesToProject, depsPropertiesToProject)
+    				}
     				for (def word : words) {
     					def id = word["id"]
     					writer.writeStartElement ("w")
     					for (String p : word.keySet()) {
     						if (p == "feats") word[p] = "|"+word[p]+"|"
     						//println "WORD="+word
     						writer.writeAttribute(prefix+p, word[p])
+    					}
     					writer.writeCharacters(word["form"])
     					writer.writeEndElement() // w
     					writer.writeCharacters(" ")
+    				}
     				writer.writeCharacters("\n")
     				writer.writeEndElement() // s
+    			}
     			if (current_par_id != null) {
     				writer.writeEndElement() // p
     				writer.writeCharacters("\n")
+    			}
     			writer.writeEndElement() // text
     			writer.writeCharacters("\n")
     			writer.writeEndElement() // TEI
     			writer.close()
     			output.close()
+    		}
     		cpb_texts.done()
     		return true
+    	}
+    }

     #!/usr/bin/perl
     use File::Basename;
     my $CMD      = "conll2tiger.pl";
     my $VERSION  = "1.5";
     my $MODIFIED = "8/12/2015";        # angepasst für Perseus CoNLL erzeugt mit conll.pl -l.  CHECK: funktioniert SRCMF noch?
     # columns (default are the predicted values in CoNLL 2009 format)
     my $coll   = 2;                    # lemma
     my $colm   = 3;                    # morph (pos)
     my $colf   = 5;                    # features
     my $colh   = 6;                    # head
     my $cold   = 7;                    # deprel
     my $outdir = "conllexport";        # deprel
     my $split  = 1000;                 # split output after nr sentences
     # tree structure
     my %dominates          = ();
     my %deprel             = ();                                         # deprel{nr} = deprel
     my @daughters          = ();                                         # daughter nodes, stored in %dominates
     my %duplicates         = my %relators = my %notes = my %aux = ();    # store nodes of duplicates, relators
     my $type               = "--";                                       # node attribute
     my $vform              = my $vlemma = "--";                          # node attributes for verbs store form and lemma
     my $label              = "D";                                        # default edge label
     my $nt_features_header = '';                                         # option -x
     my $nt_features        = '';                                         # option -x
     my $nt_empty_features  = '';                                         # option -x
     my @scodes             = ();                                         # option -x
     my $add_to_sentcode    = '';
     my $rootname           = 'root';                                     # default
     my $featcol            = 13;
     ######################################################################
     #  conll2tiger.pl: converts CoNLL-U from the Universal Dependecies
     #  treebanks to TigerXML
     #          Achim Stein <achim.stein@ling.uni-stuttgart.de>
     # License : GNU GPL v. 3 (see the LICENSE file)
     ######################################################################
     # TO DO:
     # - coordination
     # - Umgang mit zu langen, gesplitteten Sätzen (conll.pl -r 100)
     #   - dafür wurde als Wort-ID statt $w (for-Zähler) $wnr verwendet
     #   - es gibt aber noch unbound nodes wenn Regens entfernt (im anderen Teil) ist
     ######################################################################
     # Modifications by Alexei Lavrentiev <alexei.lavrentev@ens-lyon.fr>
     # for Profiterole project (2019-2021)
     # 2019-09-25
     # - updated default column numbers for CONNL-U SRCMF format
     # - added processing for comment lines
     # - added @textid to terminal nodes
     # - deleted ppos, pmor et plemma (predicted tags and lemmas)
     # - replaced specific SRCMF with standard UD tags
     # Update 2020-05-13
     # - added @editionId for synchronization with BFM word ID
     # Update 2021-03-22
     # - using $infilename for @textid
     # - added support for .conllu extension
     # Update 2021-03-29
     # - added editionId to declarations in main.xml
     # Update 2021-07-16
     # - added "punct" to cat values
     # Update 2021-07-20
     # - added cat value list compiled from
     #   https://universaldependencies.org/ext-dep-index.html and the previous
     #   version. All relation types and subtypes from the UD 2.8 corpora
     #   should be there.
     # - contractions indexed
     ######################################################################
     my $HELP = "
     ==================================================================
     $CMD $VERSION: Help
     ==================================================================
     FUNKTION: converts CoNLL parser output to TigerXML (for mate tools)
               creates master file, splits input files, corrects unbound nodes
     SYNTAX:      $CMD [Options] <CoNLL file>
     OPTIONEN:
      -c          ignore coordination (delete coordx- prefix in deprel)
      -C str      corpus specials: nca
      -h          show help
      -o          create all files in this output directory (default: $outdir)
     set COLUMNS for required info (0 = column 1, 1 = column 2, etc.)
      -D nr       colum for deprel default=$cold
      -H nr       colum for head default=$colh
      -M nr       colum for morphology (POS) default=$colm
      -F nr       colum for morph. features default=$colf
      -R str      Root category (default: $rootname)
      -s nr       split output files after each nr sentence (default = $split)
      -x str,...  include these attributes if present in the -X column of the first word
                  (the first code is also copied into the sentence id)
      -X nr       the column where attributes are stored (default: $featcol)
     EXAMPLE:
       - For mate parser output: no further options required
         $CMD parsed.conll
       - For Le Monde 2005: include attributes
         gunzip -c parsed.conll.gz | conll2tiger.pl -x date,artnr,rubr
       - For NCA:
         conll2tiger.pl -C nca -x id,deaf,titreDees,editionDees,manuscritDees,regionDees,coefficientRegionDees,dateMoyenneDees,codeRegional,coefficientRegional,vers,ponctuation,mots,passage,commentairePhilologique,qualite,sourceQualite,commentaireForme,auteur,dateComposition,dateManuscrit,lieuComposition,lieuManuscrit,sourceDateComposition,sourceDateManuscrit,sourceLieuComposition,sourceLieuManuscrit,genre,traditionTextuelle,analyses,lignes,editionNCA tagged-oldfrench-lrec2014-dep.conll
     ";
     ###########################################################################
     #                    DO NOT MODIFY FOLLOWING CODE !
     ###########################################################################
     ###########################################################################
     # parse the command line
     ###########################################################################
     use Getopt::Std;
     getopts('c:C:hD:H:M:o:R:s:x:X:');
     if ( defined($opt_h) ) {
         print STDERR "$HELP";
         exit(0);
+    }
     if ( defined($opt_o) ) {
         $outdir = $opt_o;
+    }
     if ( defined($opt_C) ) {
         $corpus = $opt_C;
+    }
     if ( defined($opt_D) ) {
         $cold = $opt_D;
+    }
     if ( defined($opt_H) ) {
         $colh = $opt_H;
+    }
     if ( defined($opt_M) ) {
         $colm = $opt_M;
+    }
     if ( defined($opt_R) ) {
         $rootname = $opt_R;
+    }
     if ( defined($opt_s) ) {
         $split = $opt_s;
+    }
     if ( defined($opt_X) ) {
         $featcol = $opt_X;
+    }
     if ( defined($opt_x) ) {
         @scodes = split( ",", $opt_x );
         for ( my $i = 0 ; $i <= $#scodes ; $i++ ) {
             $nt_features_header = $nt_features_header . sprintf( "<feature name=\"%s\" domain=\"NT\"></feature>\n", $scodes[$i] );
+        }
         $nt_features_header =~ s/\bid\b/ncaid/;    # avoid reserved Tiger attribute "id"
+    }
     my @colnames = ( "url", "ID", "FORM", "LEMMA", "PLEMMA", "POS", "PPOS", "FEAT", "PFEAT", "HEAD", "PHEAD", "DEPREL", "PDEPREL" );
     # my %pos = %lemma = %form = %deprel = %head = %governs = %cat2abbrev = %abbrev2cat = %coordnr_cat = %coordnr_head = %w_coordnr = %w_head = %w_deprel = %first_coordnr = %coordnr_first = %verb_head = ();
     my @coordelements = ();
     my $id = my $form = my $lemma = my $plemma = my $pos = my $ppos = my $feat = my $pfeat = my $head = my $phead = my $deprel = my $pdeprel = my $edition_id = "";
     my $timestamp = `date`;
     chomp($timestamp);
     my $infile = $ARGV[0];
     $infile =~ s/\.conllu?//i;
     if ( $infile eq '' ) {
         $infile = 'subcorpus';
+    }
     my $counter = 1;
     $suffix = sprintf( "%05d", $counter );
     $infilename = basename($infile);
     $foo = `if [ ! -d $outdir ];then mkdir $outdir;fi`;
     open( XML,    ">$outdir/$infilename-$suffix.xml" ) or die "\nopen file error of $outdir/$infilename-$suffix.xml\n";
     open( LOG,    ">$outdir/conversion.log" )          or die "\nopen file error of conversion.log\n";
     open( MASTER, ">$outdir/main.xml" )                or die "\nopen file error of main.xml\n";
     write_xml_header();
     write_master_header();
     # flush output for log and master file
     select(LOG);
     $| = 1;
     select(MASTER);
     $| = 1;
     $commandline = $0 . " " . ( join " ", @ARGV );
     print LOG "$commandline\n\n";
     print MASTER "<subcorpus name='$infilename-$suffix' external='file:$infilename-$suffix.xml'/>\n";
     $/ = "";    # treat empty line as RS
     while (<>) {
         if ( $. % $split == 0 ) {
             print XML "</subcorpus>\n";
             close(XML);
             $suffix = sprintf( "%05d", ++$counter );
             open( XML, ">$outdir/$infilename-$suffix.xml" ) or die "\nopen file error\n";
             write_xml_header();
             print MASTER "<subcorpus name='$infilename-$suffix' external='file:$infilename-$suffix.xml'/>\n";
+        }
         # ----------------------------------------
         # set root (or fake root if ROOT is missing)
         # ----------------------------------------
         $rootnode = $fakeroot = 0;    # m = Treat string as multiple lines, so that ^ matches beginning of line
         $thisrootname = $rootname;
         ($rootnode) = (/^(\d+?)\s.*?\b$rootname\b/m);    # real root marked by parser
         if ( $rootnode == 0 ) {
             #    ($rootnode) = (/^(\d+?)\t.*?\t-1\t0\t/m);  # no marked ROOT, but top node (head = 0)   TOO SPECIFIC
             #    ($rootnode) = (/^(\d+?)\t.*?\t.*?\t.*?\t.*?\t.*?\t.*?\t.*?\t0\t/m);  # no marked ROOT, but top node (head = 0) in col9 (original by Achim Stein)
             ($rootnode) = (/^(\d+?)\t.*?\t.*?\t.*?\t.*?\t.*?\t0\t/m);    # no marked ROOT, but top node (head = 0) in col7 (updated by AL)
             print LOG " Warning sentence $.: not marked root ($rootname), using top node $rootnode\n";
             $fakeroot     = 1;
             $thisrootname = 'nSnt';
+        }
         if ( $rootnode == 0 ) {
             $rootnode = 1;                                               # set fake root if nothing goes
             print LOG " Error sentence $.: setting fake root to first word:\n$_\n";
             $fakeroot     = 2;
             $thisrootname = 'Err';
+        }
         my @cols = ();
         @words     = split(/\n/);
         @terminals = ();
         %dominates = ();                                                 # empty at beginning of sentence
         %deprel    = ();                                                 # empty at beginning of sentence
         %aux       = ();                                                 # empty at beginning of sentence
         @daughters = ();
         my $commentlines = 0;                                            #added by AL
         #  my $contractions = 0; #added by AL
         #  my $text_id = "unknown_text";
         my $text_id = $infilename;
         my $sent_id = "0";
         # ----------------------------------------
         # loop through words #1: write tokens (terminal nodes) to XML file
         # store tree relevant information for loop #2
         # ----------------------------------------
         for ( my $w = 0 ; $w <= $#words ; $w++ ) {
             # Added by AL for comment lines
             if ( $words[$w] =~ /^#/ ) {
                 if ( $words[$w] =~ /^# newdoc/ ) {
                     $text_id = $words[$w];
                     $text_id =~ s/# newdoc id = //;
+                }
                 elsif ( $words[$w] =~ /^# sent_id/ ) {
                     $sent_id = $words[$w];
                     $sent_id =~ s/# sent_id = //;
+                }
                 #	print LOG "Comment line loop 1: $words[$w]\n";
                 $commentlines++;
                 next;
+            }
             # Added by AL for contractions
             elsif ( $words[$w] =~ /^\d+-\d+/ ) {
                 #	print LOG "Contraction line loop 1: $words[$w]\n";
                 $commentlines++;
                 #	$contractions++;
                 next;
+            }
             else {
                 if ( defined($opt_c) ) {
                     $words[$w] =~ s/coord(\d+)-//g;
+                }
                 @cols       = split( /\t/, $words[$w] );
                 $wnr        = $cols[0];
                 $word       = $cols[1];
                 $lemma      = $cols[2];
                 $plemma     = $cols[2];                    # predicted
                 $pos        = $cols[3];
                 $ppos       = $cols[4];                    # predicted
                 $mor        = $cols[5];
                 $pmor       = $cols[5];                    # predicted
                 $cat        = $cols[$cold];
                 $edition_id = $cols[9];
                 $edition_id =~ s/^.*XmlId=([^|]+).*$/\1/g;
                 if ( $cat =~ /[<>]/ ) {
                     print LOG "Warning in line $.: illegal node name: \"$cat\" -> \"Err2\"\n";
                     $cat = 'Err2';
+                }
                 # NCA: enclose lemmas in underscores (easier for regex construction)
                 if ( $corpus =~ /nca/i ) {
                     $lemma = "_" . "$lemma" . "_";
+                }
                 clean_data();
                 # get attribute-value pairs from col #13 of first word (option -x)
                 if ( $opt_x == "all" ) {
                     $cols[$featcol] = "all=" . $cols[$featcol];
+                }
                 if ( $w == 0 && $cols[$featcol] =~ /=/ ) {
                     #      print STDERR "========== getting att-value for word $w: $cols[$featcol] scodes=@scodes\n";
                     $nt_features = $nt_empty_features = '';
                     #      while($cols[$featcol] =~ m/ (.*?)="([^"]*)"/gs) {   # quoted values
                     while ( $cols[$featcol] =~ m/ ?([^=]*?)="?([^, ]+)\b"?\b/gs ) {    # maybe unquoted values (e.g. Le Monde 2005)
                         $att = $1;
                         $val = $2;
                         # pick the attributes that match those of the command line option -x
                         for ( my $t = 0 ; $t <= $#scodes ; $t++ ) {
                             if ( $att eq $scodes[$t] ) {
                                 $val =~ s/\&/\&amp;/g;                                 #  replace "&" in values (appears in URLs)
                                 if ( $t == 0 ) { $add_to_sentcode = "_$att$val"; }
                                 $nt_features = $nt_features . " $att=\"$val\"";
                                 #	    print STDERR "$./$w/$featcol: $cols[$featcol] --- nt_features: $nt_features\n";
+                            }
                             if ( $att eq $scodes[$t] ) { $nt_empty_features = $nt_empty_features . " $att=\"--\""; }
+                        }
+                    }
                     # replace the reserved feature 'id' (Tiger)
                     $add_to_sentcode =~ s/\bid=/ncaid=/;
                     $nt_features =~ s/\bid=/ncaid=/;
                     $nt_empty_features =~ s/\bid=/ncaid=/;
                 }    # if col 13 contains attributes
                 else {
                     if ( defined($opt_x) && ( $w == 0 ) ) {
                         print STDERR "Warning: sentence=$.  option -x is defined, but no attribute=value declarations were found!\n";
+                    }
+                }
                 # store output for terminal node in array, output later. For double categories make a duplicate node.
                 $tempid = sprintf( "%d_%d", $., $wnr );
     #    push(@terminals, sprintf("      <t id=\"s%d_%d\" word=\"%s\" pos=\"%s\" mor=\"%s\" lemma=\"%s\" ppos=\"%s\" pmor=\"%s\" plemma=\"%s\" textid=\"%s\" editionId=\"%s\"/>\n", $., $wnr, $word, $pos, $mor, $lemma, $ppos, $pmor, $plemma, $text_id, $edition_id));
                 push( @terminals,
                     sprintf( "      <t id=\"s%d_%d\" word=\"%s\" pos=\"%s\" mor=\"%s\" lemma=\"%s\" textid=\"%s\" editionId=\"%s\"/>\n", $., $wnr, $word, $pos, $mor, $lemma, $text_id, $edition_id ) );
                 if ( $cat =~ /_/ ) {
     #      push(@terminals, sprintf("      <t id=\"s%d_%d_dupl\" word=\"%s\" pos=\"%s\" mor=\"%s\" lemma=\"%s\" ppos=\"%s\" pmor=\"%s\" plemma=\"%s\" textid=\"%s\" editionId=\"%s\"/>\n", $., $wnr, "*", "_", "_", "_", "_", "_", "_", $text_id, $edition_id));
                     push( @terminals,
                         sprintf( "      <t id=\"s%d_%d_dupl\" word=\"%s\" pos=\"%s\" mor=\"%s\" lemma=\"%s\" textid=\"%s\" editionId=\"%s\"/>\n", $., $wnr, "*", "_", "_", "_", $text_id, $edition_id ) );
                     $duplicates{$tempid} = 1;    # store, check later to attach the duplicates to the mother
+                }
                 # associate Aux with main verb, to create an attribute in the verb node in loop #2 (TODO: more than one Aux)
                 if ( $cat =~ /Aux/ ) {
                     $aux{ $cols[$colh] } = "$word" . "_" . "$plemma";    # $aux{head} = word_lemma (of Aux)
+                }
                 # ----------------------------------------
                 # store information needed for tree
                 # ----------------------------------------
                 # if fake rootnode == 1: nSnt as root node
                 if ( ( $fakeroot == 1 ) && ( $w - $commentlines + 1 == $rootnode ) ) {
                     $cat = 'nSnt';
                     $notes{$tempid} = 'Warning no marked ROOT node in CoNLL';    # TODO: geht nicht
+                }
                 # if fake rootnode == 2: flatten structure: attach all words to the first word
                 if ( ( $fakeroot == 2 ) && ( $w - $commentlines + 1 != $rootnode ) ) {
                     $cols[$colh] = 1;
                     $notes{$tempid} = 'Error neither ROOT node nor top node in CoNLL';
+                }
                 # correct unbound words in parser output (phead = 0, but not marked as ROOT)
                 if ( ( $cols[$colh] eq "0" ) && ( $w - $commentlines + 1 != $rootnode ) ) {    #AL: added: -$commentlines
                     printf LOG " Warning sentence $. ($tempid): unbound node %d (attached to root %d)\n", ( $w - $commentlines + 1 ), $rootnode;
                     $cols[$colh]    = $rootnode;
                     $cat            = 'Err';                                                   # let Err instead of deprel appear in dom attribute
                     $notes{$tempid} = 'Warning unbound node in CoNLL';
+                }
                 # store for R edge labels
                 if ( $cols[$cold] =~ /RelN?C/ ) {
                     $relators{$tempid} = 1;
+                }
                 # store deprel for dom attribute
                 $deprel{$tempid} = $cat;                                                       # $cols[$cold];
                                                                                                # if real root, add this node to daughter array, store array in hash dominates{head}{@daughters}
                 if ( ( $fakeroot < 2 ) && ( $w - $commentlines + 1 != $rootnode ) ) {
                     @daughters = @{ $dominates{ $cols[$colh] } };                              # get the array from the hash of the dominating node
                     push( @daughters, $wnr );
                     $dominates{ $cols[$colh] } = [@daughters];
+                }
             }    # for each word loop #1
         }    # AL condition end
         # print graph code (needs root attribute) and terminal nodes
         if ( $rootnode == 0 ) {
             $noroot++;
             print LOG "Error sentence $. ($tempid): root node not found:\n$_\n";
             next;
+        }
         else {
             printf XML "<s id=\"s%s%s\" textid=\"$text_id\" sentid=\"$sent_id\">\n", $., $add_to_sentcode;
             print XML "  <graph root=\"n$._$rootnode\">\n";
             print XML "    <terminals>\n";
             for ( my $t = 0 ; $t <= $#terminals ; $t++ ) {
                 print XML $terminals[$t];
+            }
             print XML "    </terminals>\n";
+        }
         # ----------------------------------------
         # loop through words #2 to build Tiger tree (non terminal nodes)
         # ----------------------------------------
         print XML "    <nonterminals>\n";
         for ( my $i = 0 ; $i <= $#words ; $i++ ) {
             #Added AL for comment lines
             if ( $words[$i] =~ /^#/ ) {
                 #       print LOG "Comment line loop 2 : $words[$i]\n";
                 next;
+            }
             #Added AL for contractions
             if ( $words[$i] =~ /^\d+-\d+/ ) {
                 #       print LOG "Contraction loop 2 : $words[$i]\n";
                 next;
+            }
             else {
                 @cols = split( /\t/, $words[$i] );
                 $w = $cols[0];
                 ### TODO: redundante Variablenzuweisung (= loop #1)??
                 $word   = $cols[1];
                 $lemma  = $cols[2];
                 $plemma = $cols[3];       # predicted
                 $pos    = $cols[4];
                 $ppos   = $cols[5];       # predicted
                 $mor    = $cols[6];
                 $pmor   = $cols[7];       # predicted
                 $cat    = $cols[$cold];
                 if ( $cat =~ /[<>]/ ) {
                     print Log "Warning in line $.: illegal node name: \"$cat\" -> \"Err2\"\n";
                     $cat = 'Err2';
+                }
                 #  OF parser has not learned punctuation: set cat for punctuation to PON
                 if ( ( $corpus =~ /nca/i ) && ( $pos eq 'PON' ) ) {
                     $cols[$cold] = $cat = 'Pon';
+                }
                 clean_data();
                 # retrieve daughters, make dom attribute (string of dominated nodes)
                 @daughters = @{ $dominates{"$w"} };
                 $dom       = '';
                 for ( my $d = 0 ; $d <= $#daughters ; $d++ ) {
                     $dom = $dom . "_" . $deprel{"$._$daughters[$d]"};
+                }
                 if ( $dom =~ /_/ ) {
                     $dom =~ s/^_//;
+                }
                 else {
                     $dom = '--';
+                }
                 # if verbal, set node attributes for verb form and lemma
                 $type = "nV";
                 $vform = $vlemma = "--";
                 if ( $pos =~ /VER/ ) {    # AL: $ppos -> $pos
                     if    ( $mor =~ /infi/ )       { $type = "VInf"; }    #AL: $pmor -> $mor
                     elsif ( $pmor =~ /pper|ppre/ ) { $type = "VPar"; }
                     else                           { $type = "VFin"; }
                     # if Aux is present, create attribute for main verb
                     if ( $aux{$w} =~ /(.*?)_(.*)/ ) {
                         $vform  = "$1";
                         $vlemma = "$2";
+                    }
                     # else create attr for simple verb
                     else {
                         $vform  = $word;
                         $vlemma = $lemma;    # AL: $plemma -> $lemma (always void in SRCMF)
+                    }
                     # NCA: enclose lemmas in underscores (easier for regex construction)
                     if ( $corpus =~ /nca/i ) {
                         $vlemma = "_" . "$vlemma" . "_";
+                    }
+                }
                 # call output function (twice for duplicate categories)
                 if ( $cat =~ /(.*?)_(.*)/ ) {
                     write_nonterminals( "$2", "" );         # RelNC is always node (see clean categories), function is duplicate, e.g. SjPer_RelNC
                     write_nonterminals( "$1", "_dupl" );    # other category is duplicate
+                }
                 else {
                     write_nonterminals($cat);
+                }
             }    # for words
         }    #AL end condition
         print XML "    </nonterminals>\n";
         print XML "  </graph>\n";
         print XML "</s>\n";
         if ( $. % 100 == 0 ) { print STDERR "\b\b\b\b\b\b\b\b"; printf STDERR "%08d", $.; }
     }    # main
     print XML "</subcorpus>\n";
     print STDERR "\n$CMD: $. sentences converted. Results in $outdir. Log in $outdir/conversion.log.\n";
     print STDERR "   Hint 1: on OS X convert master file to MacRoman, e.g  iconv -f latin1 -t macroman\n";
     print STDERR "   Hint 2: use tiger.pl -c <Tiger XML file> to detect unbound nodes.\n";
     print STDERR "   Hint 3: build reliable feature declarations using tiger.sh\n";
     print STDERR "           tiger.sh -a \"lemma word pos ppos\"  (for terminals)\n";
     print STDERR "           tiger.sh -A \"lemma word pos ppos\"  (for non-terminals)\n";
     if ( $noroot > 0 ) { print STDERR "$noroot sentences ignored: root not found (see log file)\n"; }
     write_master_footer();
     close(MASTER);
     close(XML);
     close(LOG);
     exit;
     # ----------------------------------------
     # sub
     # ----------------------------------------
     sub write_xml_header {
         print XML "<?xml version=\"1.0\" encoding=\"UTF-8\"?>
       <subcorpus name=\"$infilename-$suffix\">
     ";
+    }
     sub write_master_header {
         printf MASTER '<?xml version="1.0" encoding="UTF-8"?>
     ';
         printf MASTER "<corpus id=\"$corpus\">
     <head>
       <meta><name>$corpus</name>
         <author>ILR Stuttgart</author>
         <date></date>
         <description>Parsed with mate tools using a SRCMF-based grammar model (http://srcmf.org). </description>
         <format>SRCMF</format>
         <history>TigerXML converted by conll2tiger.pl</history>
       </meta>
     ";
         #  printf MASTER '<annotation>
         #<feature name="word" domain="T" ></feature>
         #<feature name="pos" domain="T" ></feature>
         #<feature name="mor" domain="T" ></feature>
         #<feature name="lemma" domain="T" ></feature>
         #<feature name="ppos" domain="T" ></feature>
         #<feature name="pmor" domain="T" ></feature>
         #<feature name="plemma" domain="T" ></feature>
         #<feature name="cat" domain="NT" >
         #  <value name="Apst">apostrophe</value>
         #  <value name="AtObj">attribut d objet</value>
         #  <value name="AtRfc">attribut réfléchi</value>
         #  <value name="AtSj">attribut de sujet</value>
         #  <value name="AttributReflechi">attribut réfléchi</value>
         #  <value name="Aux">auxilié</value>
         #  <value name="AuxA">auxilié actif</value>
         #  <value name="AuxP">auxilié passif</value>
         #  <value name="Circ">circonstant</value>
         #  <value name="Circ_RelNC">circonstant pronom relatif</value>
         #  <value name="Cmpl">complément</value>
         #  <value name="Cmpl_RelNC">complément pronom relatif</value>
         #  <value name="Coo">coordination</value>
         #  <value name="Det">déterminant</value>
         #  <value name="Err">unbound node in CoNLL input</value>
         #  <value name="Err2">illegal node name was replaced</value>
         #  <value name="GpCoo">coordonné</value>
         #  <value name="Ignorer">Ignorer</value>
         #  <value name="Insrt">incidente</value>
         #  <value name="Intj">interjection</value>
         #  <value name="Lac">lacune</value>
         #  <value name="ModA">modifieur attaché</value>
         #  <value name="ModD">modifieur détaché</value>
         #  <value name="Ng">négation</value>
         #  <value name="NgPrt">forclusif</value>
         #  <value name="Obj">objet</value>
         #  <value name="Obj_RelNC">direct object pronom relatif</value>
         #  <value name="Pon">ponctuation</value>
         #  <value name="PON">ponctuation</value>
         #  <value name="Regim">régime</value>
         #  <value name="RelC">relateur coordonnant</value>
         #  <value name="RelNC">relateur non coordonnant</value>
         #  <value name="Rfc">réfléchi</value>
         #  <value name="Rfx">réfléxif renforcé</value>
         #  <value name="SjImp">sujet impersonnel</value>
         #  <value name="SjPer">sujet personnel</value>
         #  <value name="SjPer_RelNC">sujet personnel pronom relatif</value>
         #  <value name="Snt">phrase</value>
         #  <value name="ROOT">phrase</value>
         #  <value name="StructureMaximale">structure maximale</value>
         #  <value name="VFin">verbe fini</value>
         #  <value name="VInf">verbe infinitif</value>
         #  <value name="nMax">structure non-maximale</value>
         #  <value name="nSnt">non-phrase</value>
         #</feature>
         #<feature name="coord" domain="NT" ></feature>
         #<feature name="dom" domain="NT" ></feature>
         #<feature name="type" domain="NT" >
         #  <value name="nV">élément non-verbal</value>
         #  <value name="VFin">verbe fini</value>
         #  <value name="VInf">verbe infinitif</value>
         #  <value name="VPar">verbe participial</value>
         #  <value name="--">nil</value>
         #</feature>
         #<feature name="vform" domain="NT"></feature>
         #<feature name="vlemma" domain="NT"></feature>
         #<feature name="note" domain="NT"></feature>
         #<feature name="snr" domain="NT"></feature>
         #';
         printf MASTER '<annotation>
     <feature name="word" domain="T" ></feature>
     <feature name="pos" domain="T" ></feature>
     <feature name="mor" domain="T" ></feature>
     <feature name="lemma" domain="T" ></feature>
     <feature name="textid" domain="T" ></feature>
     <feature name="editionId" domain="T" ></feature>
     <feature name="cat" domain="NT" >
       <value name="__UNDEF__">UNDEFINED !!!</value>
       <value name="acl:adv">acl:adv</value> <!-- Ukrainian -->
       <value name="acl:attr">acl:attr</value> <!-- Chukchi -->
       <value name="acl:cleft">acl:cleft</value> <!-- Norwegian, Swedish -->
       <value name="acl:fixed">acl:fixed</value> <!-- Beja -->
       <value name="acl:inf">acl:inf</value> <!-- Portuguese -->
       <value name="acl:relat">acl:relat</value> <!-- Chukchi -->
       <value name="acl:relcl">relative clause modifier</value> <!-- Akkadian, Albanian, Apurina, Arabic, Armenian, Assyrian, Beja, Belarusian, Breton, Bulgarian, Chinese, Czech, Danish, Dutch, English, Erzya, Estonian, Faroese, Finnish, French, German, Greek, Hebrew, Hindi, Hindi English, Icelandic, Indonesian, Irish, Italian, Karelian, Kazakh, Komi Permyak, Komi Zyrian, Korean, Latin, Lithuanian, Livvi, Manx, Marathi, Moksha, Naija, North Sami, Norwegian, Old East Slavic, Old French, Persian, Polish, Portuguese, Russian, Sanskrit, Scottish Gaelic, Slovak, Spanish, Swedish, Swedish Sign Language, Swiss German, Tagalog, Tamil, Telugu, Thai, Ukrainian, Urdu, Welsh, Western Armenian, Wolof -->
       <value name="acl">clausal modifier of noun (adnominal clause)</value>
       <value name="advcl:abs">advcl:abs</value> <!-- Latin -->
       <value name="advcl:cau">advcl:cau</value> <!-- Moksha -->
       <value name="advcl:cleft">advcl:cleft</value> <!-- French, Naija -->
       <value name="advcl:cmpr">advcl:cmpr</value> <!-- Latin, Polish -->
       <value name="advcl:cond">advcl:cond</value> <!-- Tamil, Telugu, Uyghur -->
       <value name="advcl:coverb">advcl:coverb</value> <!-- Cantonese -->
       <value name="advcl:eval">advcl:eval</value> <!-- Komi Zyrian -->
       <value name="advcl:lcl">advcl:lcl</value> <!-- Komi Permyak -->
       <value name="advcl:lto">advcl:lto</value> <!-- Komi Zyrian -->
       <value name="advcl:mcl">advcl:mcl</value> <!-- Komi Permyak -->
       <value name="advcl:pred">advcl:pred</value> <!-- Latin -->
       <value name="advcl:relcl">advcl:relcl</value> <!-- Polish, Western Armenian -->
       <value name="advcl:sp">advcl:sp</value> <!-- Ukrainian -->
       <value name="advcl:svc">advcl:svc</value> <!-- Ukrainian -->
       <value name="advcl:tcl">advcl:tcl</value> <!-- Apurina, Erzya, Komi Permyak, Komi Zyrian, Moksha, Romanian, Skolt Sami -->
       <value name="advcl">adverbial clause modifier</value>
       <value name="advmod:arg">advmod:arg</value> <!-- Polish -->
       <value name="advmod:cau">advmod:cau</value> <!-- Erzya, Komi Zyrian, Moksha -->
       <value name="advmod:comp">advmod:comp</value> <!-- Erzya -->
       <value name="advmod:deg">advmod:deg</value> <!-- Erzya, Komi Permyak, Komi Zyrian, Moksha, Skolt Sami -->
       <value name="advmod:det">advmod:det</value> <!-- Ukrainian -->
       <value name="advmod:df">advmod:df</value> <!-- Cantonese, Chinese -->
       <value name="advmod:emph">emphasizing word, intensifier</value> <!-- Akkadian, Arabic, Armenian, Catalan, Chukchi, Croatian, Czech, Indonesian, Komi Zyrian, Latin, Lithuanian, Polish, Sanskrit, Slovak, South Levantine Arabic, Tamil, Turkish, Turkish German, Upper Sorbian, Uyghur, Western Armenian -->
       <value name="advmod:eval">advmod:eval</value> <!-- Erzya, Komi Zyrian, Moksha, Skolt Sami -->
       <value name="advmod:fixed">advmod:fixed</value> <!-- Beja -->
       <value name="advmod:foc">advmod:foc</value> <!-- Erzya, Komi Zyrian, Moksha, Skolt Sami -->
       <value name="advmod:freq">advmod:freq</value> <!-- Komi Zyrian, Moksha -->
       <value name="advmod:lfrom">advmod:lfrom</value> <!-- Erzya, Komi Zyrian, Moksha -->
       <value name="advmod:lmod">locative adverbial modifier</value> <!-- Apurina, Danish, Erzya, Komi Permyak, Komi Zyrian, Moksha, Skolt Sami -->
       <value name="advmod:lmp">advmod:lmp</value> <!-- Erzya, Komi Zyrian -->
       <value name="advmod:locy">advmod:locy</value> <!-- Hungarian -->
       <value name="advmod:lto">advmod:lto</value> <!-- Erzya, Komi Zyrian, Moksha -->
       <value name="advmod:mmod">advmod:mmod</value> <!-- Erzya, Komi Permyak, Komi Zyrian, Moksha, Skolt Sami -->
       <value name="advmod:mode">advmod:mode</value> <!-- Hungarian -->
       <value name="advmod:neg">advmod:neg</value> <!-- Apurina, Buryat, Kiche, Kurmanji, Latin, Maltese, Polish, Skolt Sami -->
       <value name="advmod:obl">adverbial modifier + oblique nominal</value> <!-- Old French -->
       <value name="advmod:que">advmod:que</value> <!-- Hungarian -->
       <value name="advmod:tfrom">advmod:tfrom</value> <!-- Hungarian -->
       <value name="advmod:tlocy">advmod:tlocy</value> <!-- Hungarian -->
       <value name="advmod:tmod">advmod:tmod</value> <!-- Apurina, Erzya, Komi Permyak, Komi Zyrian, Moksha, Romanian, Skolt Sami -->
       <value name="advmod:to">advmod:to</value> <!-- Hungarian -->
       <value name="advmod:tto">advmod:tto</value> <!-- Hungarian -->
       <value name="advmod">adverbial modifier</value>
       <value name="amod:att">amod:att</value> <!-- Hungarian -->
       <value name="amod:attlvc">amod:attlvc</value> <!-- Hungarian -->
       <value name="amod:flat">amod:flat</value> <!-- Polish -->
       <value name="amod">adjectival modifier</value>
       <value name="appos:trans">appos:trans</value> <!-- Turkish German -->
       <value name="appos">appositional modifier</value>
       <value name="aux:aff">aux:aff</value> <!-- Beja -->
       <value name="aux:aspect">aux:aspect</value> <!-- Komi Zyrian -->
       <value name="aux:caus">aux:caus</value> <!-- Armenian, French, Western Armenian -->
       <value name="aux:clitic">aux:clitic</value> <!-- Polish -->
       <value name="aux:cnd">aux:cnd</value> <!-- Erzya, Komi Permyak, Komi Zyrian, Polish -->
       <value name="aux:ex">aux:ex</value> <!-- Armenian, Western Armenian -->
       <value name="aux:imp">aux:imp</value> <!-- Erzya, Polish -->
       <value name="aux:nec">aux:nec</value> <!-- Komi Zyrian, Moksha, Skolt Sami -->
       <value name="aux:neg">aux:neg</value> <!-- Chukchi, Erzya, Komi Permyak, Komi Zyrian, Maltese, Moksha, North Sami, Skolt Sami, Tamil -->
       <value name="aux:opt">aux:opt</value> <!-- Erzya, Moksha -->
       <value name="aux:part">aux:part</value> <!-- Maltese -->
       <value name="aux:pass">passive auxilary</value> <!-- Afrikaans, Ancient Greek, Arabic, Assyrian, Belarusian, Bhojpuri, Breton, Bulgarian, Buryat, Chinese, Czech, Dutch, English, Faroese, Finnish, French, Frisian Dutch, Galician, German, Hindi, Italian, Kangri, Karelian, Latin, Latvian, Lithuanian, Maltese, Marathi, Norwegian, Old Church Slavonic, Old East Slavic, Old French, Persian, Polish, Portuguese, Romanian, Russian, Slovak, Spanish, Swedish, Swiss German, Tamil, Thai, Turkish German, Upper Sorbian, Vietnamese -->
       <value name="aux:pot">aux:pot</value> <!-- Komi Zyrian -->
       <value name="aux:q">aux:q</value> <!-- Erzya, Turkish, Turkish German -->
       <value name="aux:tense">aux:tense</value> <!-- French, Komi Zyrian, Skolt Sami -->
       <value name="aux">auxiliary</value>
       <value name="case:acc">case:acc</value> <!-- Hebrew -->
       <value name="case:adv">case:adv</value> <!-- Indonesian -->
       <value name="case:aff">case:aff</value> <!-- Beja -->
       <value name="case:det">preposition with determiner</value> <!-- Maltese, Old French -->
       <value name="case:gen">case:gen</value> <!-- Hebrew -->
       <value name="case:loc">case:loc</value> <!-- Armenian, Cantonese, Chinese, Western Armenian -->
       <value name="case:pred">case:pred</value> <!-- Welsh -->
       <value name="case:voc">case:voc</value> <!-- Irish, Scottish Gaelic -->
       <value name="case">case marking</value>
       <value name="cc:nc">cc:nc</value> <!-- Old French -->
       <value name="cc:nc">Coordinated conjunct : non coordonant</value>
       <value name="cc:preconj">cc:preconj</value> <!-- Arabic, English, Erzya, Estonian, Faroese, Finnish, German, Indonesian, Komi Permyak, Komi Zyrian, Moksha, North Sami, Persian, Polish, Portuguese, Romanian, Slovenian, Spanish, Thai, Turkish -->
       <value name="cc:preconj">preconjunct</value>
       <value name="cc">Coordinating conjunction</value>
       <value name="cc">coordinating conjunction</value>
       <value name="ccomp:cleft">ccomp:cleft</value> <!-- Polish -->
       <value name="ccomp:obj">ccomp:obj</value> <!-- Hungarian, Polish -->
       <value name="ccomp:obl">ccomp:obl</value> <!-- Hungarian -->
       <value name="ccomp:pmod">ccomp:pmod</value> <!-- Romanian -->
       <value name="ccomp:pred">ccomp:pred</value> <!-- Hungarian -->
       <value name="ccomp">clausal complement</value>
       <value name="clf">classifier</value>
       <value name="compound:a">compound:a</value> <!-- Indonesian -->
       <value name="compound:affix">compound:affix</value> <!-- Hebrew -->
       <value name="compound:dir">compound:dir</value> <!-- Cantonese, Chinese -->
       <value name="compound:ext">compound:ext</value> <!-- Cantonese, Chinese -->
       <value name="compound:lvc">compound:lvc</value> <!-- Armenian, Hindi, Kazakh, Khunsari, Korean, Kurmanji, Marathi, Nayini, Persian, Soi, Tamil, Telugu, Turkish, Turkish German, Uyghur, Western Armenian -->
       <value name="compound:lvc">light verb construction</value>
       <value name="compound:nn">compound:nn</value> <!-- Finnish, Livvi, North Sami -->
       <value name="compound:preverb">compound:preverb</value> <!-- Hungarian -->
       <value name="compound:prt">compound:prt</value> <!-- Afrikaans, Arabic, Danish, Dutch, English, Erzya, Estonian, Faroese, Finnish, Frisian Dutch, German, Icelandic, Irish, Karelian, Komi Permyak, Naija, Norwegian, Persian, Spanish, Swedish, Swedish Sign Language, Swiss German, Tamil, Thai, Turkish German, Wolof, Yoruba -->
       <value name="compound:prt">phrasal verb particle</value>
       <value name="compound:quant">compound:quant</value> <!-- Cantonese -->
       <value name="compound:redup">reduplicated compounds</value> <!-- Armenian, Bambara, Classical Chinese, Erzya, Hindi, Kurmanji, Marathi, Naija, Tagalog, Tamil, Telugu, Turkish, Turkish German, Uyghur, Welsh, Western Armenian -->
       <value name="compound:smixut">compound:smixut</value> <!-- Hebrew -->
       <value name="compound:svc">serial verb compounds</value> <!-- Amharic, Armenian, Marathi, Mbya Guarani, Naija, Swedish Sign Language, Telugu, Ukrainian, Western Armenian, Wolof, Yoruba -->
       <value name="compound:vo">compound:vo</value> <!-- Cantonese, Chinese -->
       <value name="compound:vv">compound:vv</value> <!-- Cantonese, Chinese -->
       <value name="compound">compound</value>
       <value name="conj:expl">conj:expl</value> <!-- Latin -->
       <value name="conj:extend">conj:extend</value> <!-- Slovenian -->
       <value name="conj:svc">conj:svc</value> <!-- Ukrainian -->
       <value name="conj">conjunct</value>
       <value name="cop:expl">cop:expl</value> <!-- Maltese -->
       <value name="cop:locat">cop:locat</value> <!-- Polish -->
       <value name="cop:own">cop:own</value> <!-- Finnish, Karelian, Livvi, Marathi -->
       <value name="cop">copula</value>
       <value name="csubj:cleft">csubj:cleft</value> <!-- Irish, Latin, Manx, Scottish Gaelic -->
       <value name="csubj:cop">csubj:cop</value> <!-- Erzya, Estonian, Finnish, Irish, Komi Zyrian, Livvi, Manx, Moksha, Scottish Gaelic, Turkish -->
       <value name="csubj:pass">clausal passive subject</value> <!-- Albanian, Amharic, Ancient Greek, Arabic, Armenian, Belarusian, Bulgarian, Catalan, Chinese, Classical Chinese, Czech, English, French, German, Gothic, Greek, Indonesian, Italian, Korean, Latin, Latvian, Lithuanian, Norwegian, Old Church Slavonic, Old East Slavic, Polish, Portuguese, Romanian, Russian, Sanskrit, Slovak, Spanish, Swedish, Western Armenian -->
       <value name="csubj">clausal subject</value>
       <value name="dep:aff">dep:aff</value> <!-- Beja -->
       <value name="dep:agr">dep:agr</value> <!-- Kiche -->
       <value name="dep:alt">dep:alt</value> <!-- Upper Sorbian -->
       <value name="dep:ana">dep:ana</value> <!-- Yupik -->
       <value name="dep:aux">dep:aux</value> <!-- Yupik -->
       <value name="dep:comp">dep:comp</value> <!-- Beja, French -->
       <value name="dep:conj">dep:conj</value> <!-- Beja -->
       <value name="dep:cop">dep:cop</value> <!-- Yupik -->
       <value name="dep:emo">dep:emo</value> <!-- Yupik -->
       <value name="dep:infl">dep:infl</value> <!-- Yupik -->
       <value name="dep:mark">dep:mark</value> <!-- Yupik -->
       <value name="dep:mod">dep:mod</value> <!-- Mbya Guarani -->
       <value name="dep:pos">dep:pos</value> <!-- Yupik -->
       <value name="dep:redup">dep:redup</value> <!-- Beja -->
       <value name="dep:ss">dep:ss</value> <!-- Kiche -->
       <value name="dep">unspecified dependency</value>
       <value name="det:adj">det:adj</value> <!-- Albanian -->
       <value name="det:noun">det:noun</value> <!-- Albanian -->
       <value name="det:numgov">pronominal quantifier governing the case of the noun</value> <!-- Czech, Polish, Serbian, Slovak, Ukrainian, Upper Sorbian -->
       <value name="det:nummod">pronominal quantifier agreeing in case with the noun</value> <!-- Czech, Polish, Ukrainian -->
       <value name="det:poss">possessive determiner</value> <!-- Akkadian, Armenian, German, Italian, Korean, Polish, Portuguese, Western Armenian -->
       <value name="det:predet">det:predet</value> <!-- English, Italian, Persian -->
       <value name="det:pron">det:pron</value> <!-- Albanian -->
       <value name="det:rel">det:rel</value> <!-- Bambara -->
       <value name="det">determiner</value>
       <value name="discourse:emo">discourse:emo</value> <!-- Irish, Italian, Polish -->
       <value name="discourse:filler">discourse:filler</value> <!-- Norwegian, Slovenian -->
       <value name="discourse:intj">discourse:intj</value> <!-- Polish -->
       <value name="discourse:sp">discourse:sp</value> <!-- Cantonese, Chinese, Classical Chinese -->
       <value name="discourse">discourse element</value>
       <value name="dislocated:cleft">dislocated:cleft</value> <!-- Mbya Guarani -->
       <value name="dislocated:csubj">dislocated:csubj</value> <!-- Latin -->
       <value name="dislocated:nsubj">dislocated:nsubj</value> <!-- Latin -->
       <value name="dislocated:obj">dislocated:obj</value> <!-- Latin -->
       <value name="dislocated:subj">dislocated:subj</value> <!-- Beja -->
       <value name="dislocated">dislocated elements</value>
       <value name="expl:comp">expl:comp</value> <!-- French -->
       <value name="expl:impers">impersonal expletive</value> <!-- Italian, Polish, Romanian, Spanish -->
       <value name="expl:pass">reflexive pronoun used in reflexive passive</value> <!-- Catalan, Czech, French, Italian, Latin, Portuguese, Romanian, Slovak, Spanish, Upper Sorbian -->
       <value name="expl:poss">expl:poss</value> <!-- Romanian -->
       <value name="expl:pv">reflexive clitic with an inherently reflexive verb</value> <!-- Czech, Dutch, German, Old East Slavic, Polish, Portuguese, Romanian, Slovak, Spanish, Turkish German, Upper Sorbian -->
       <value name="expl:subj">expl:subj</value> <!-- French, Naija -->
       <value name="expl">expletive</value>
       <value name="fixed">fixed multiword expression</value>
       <value name="flat:abs">flat:abs</value> <!-- Ukrainian -->
       <value name="flat:dist">flat:dist</value> <!-- Western Armenian -->
       <value name="flat:foreign">foreign words</value> <!-- Arabic, Belarusian, Buryat, Chinese, Chukchi, Croatian, Czech, English, Estonian, Faroese, Finnish, French, Galician, Icelandic, Indonesian, Irish, Italian, Komi Zyrian, Latin, Latvian, Lithuanian, Manx, Naija, Norwegian, Persian, Polish, Portuguese, Russian, Scottish Gaelic, Slovak, Slovenian, South Levantine Arabic, Ukrainian, Upper Sorbian -->
       <value name="flat:name">names</value> <!-- Ancient Greek, Belarusian, Breton, Chinese, Chukchi, Erzya, Faroese, Finnish, French, Frisian Dutch, Galician, German, Gothic, Hebrew, Hindi, Hungarian, Icelandic, Indonesian, Irish, Italian, Karelian, Kazakh, Komi Permyak, Komi Zyrian, Korean, Latin, Latvian, Livvi, Maltese, Moksha, Norwegian, Old Church Slavonic, Old East Slavic, Persian, Portuguese, Russian, Scottish Gaelic, Skolt Sami, Slovenian, Spanish, Swedish, Thai, Ukrainian, Welsh, Western Armenian -->
       <value name="flat:num">flat:num</value> <!-- Komi Zyrian, Persian -->
       <value name="flat:range">flat:range</value> <!-- Ukrainian, Western Armenian -->
       <value name="flat:repeat">flat:repeat</value> <!-- Ukrainian -->
       <value name="flat:sibl">flat:sibl</value> <!-- Ukrainian -->
       <value name="flat:title">flat:title</value> <!-- Ukrainian -->
       <value name="flat:vv">flat:vv</value> <!-- Classical Chinese -->
       <value name="flat">name multiword expression</value>
       <value name="goeswith">goes with</value>
       <value name="iobj:agent">iobj:agent</value> <!-- Armenian, French, Western Armenian -->
       <value name="iobj:appl">iobj:appl</value> <!-- Wolof -->
       <value name="iobj:patient">iobj:patient</value> <!-- Tagalog -->
       <value name="iobj">indirect object</value>
       <value name="list">list</value>
       <value name="mark:adv">mark:adv</value> <!-- Cantonese, Chinese -->
       <value name="mark:advmod">adverbial modifier confusable with a subordination marker</value> <!-- Old French -->
       <value name="mark:aff">mark:aff</value> <!-- Beja -->
       <value name="mark:obj">marker + object</value> <!--Old French, no doc -->
       <value name="mark:obl">marker + oblique nominal</value> <!--Old French, no doc -->
       <value name="mark:prt">mark:prt</value> <!-- Chinese, Irish, Scottish Gaelic -->
       <value name="mark:q">mark:q</value> <!-- Hebrew -->
       <value name="mark:rel">mark:rel</value> <!-- Cantonese, Chinese -->
       <value name="mark">marker</value>
       <value name="nmod:agent">nmod:agent</value> <!-- Welsh -->
       <value name="nmod:appos">nmod:appos</value> <!-- French, Komi Zyrian, Moksha -->
       <value name="nmod:arg">nmod:arg</value> <!-- Polish, Yupik -->
       <value name="nmod:att">nmod:att</value> <!-- Hungarian -->
       <value name="nmod:attlvc">nmod:attlvc</value> <!-- Hungarian -->
       <value name="nmod:attr">nmod:attr</value> <!-- Chukchi -->
       <value name="nmod:bahuv">nmod:bahuv</value> <!-- Moksha -->
       <value name="nmod:cau">nmod:cau</value> <!-- Uyghur -->
       <value name="nmod:comp">nmod:comp</value> <!-- Erzya, Komi Zyrian, Moksha, Turkish, Uyghur -->
       <value name="nmod:flat">nmod:flat</value> <!-- Polish -->
       <value name="nmod:gen">nmod:gen</value> <!-- Breton -->
       <value name="nmod:gobj">nmod:gobj</value> <!-- Erzya, Finnish -->
       <value name="nmod:gsubj">nmod:gsubj</value> <!-- Erzya, Finnish, Karelian -->
       <value name="nmod:lfrom">nmod:lfrom</value> <!-- Komi Zyrian -->
       <value name="nmod:lmod">nmod:lmod</value> <!-- Erzya, Indonesian, Komi Permyak, Komi Zyrian, Moksha -->
       <value name="nmod:npmod">nmod:npmod</value> <!-- Armenian, English, Western Armenian -->
       <value name="nmod:obj">nmod:obj</value> <!-- Komi Zyrian -->
       <value name="nmod:obl">nmod:obl</value> <!-- Hungarian -->
       <value name="nmod:part">nmod:part</value> <!-- Turkish, Uyghur -->
       <value name="nmod:poss">possessive nominal modifier</value> <!-- Akkadian, Albanian, Apurina, Arabic, Armenian, Assyrian, Bambara, Beja, Breton, Chukchi, Danish, Dutch, English, Erzya, Faroese, Finnish, Frisian Dutch, German, Hebrew, Hindi, Icelandic, Indonesian, Irish, Karelian, Kazakh, Khunsari, Komi Permyak, Komi Zyrian, Korean, Kurmanji, Latin, Livvi, Maltese, Manx, Marathi, Moksha, Naija, Nayini, North Sami, Persian, Polish, Sanskrit, Scottish Gaelic, Skolt Sami, Soi, South Levantine Arabic, Swedish, Swedish Sign Language, Swiss German, Tagalog, Tamil, Telugu, Thai, Turkish, Uyghur, Warlpiri, Welsh, Western Armenian, Wolof -->
       <value name="nmod:pred">nmod:pred</value> <!-- Polish -->
       <value name="nmod:prp">nmod:prp</value> <!-- Komi Zyrian -->
       <value name="nmod:redup">nmod:redup</value> <!-- Welsh -->
       <value name="nmod:relat">nmod:relat</value> <!-- Chukchi -->
       <value name="nmod:subj">nmod:subj</value> <!-- Komi Zyrian -->
       <value name="nmod:tmod">temporal modifier</value> <!-- Chinese, English, Indonesian, Moksha, Romanian, Telugu, Uyghur -->
       <value name="nmod">nominal modifier</value>
       <value name="nsubj:advmod">fused subject pronoun and adverb</value> <!-- Old French -->
       <value name="nsubj:aff">nsubj:aff</value> <!-- Beja -->
       <value name="nsubj:bfoc">nsubj:bfoc</value> <!-- Tagalog -->
       <value name="nsubj:caus">nsubj:caus</value> <!-- Armenian, French, Western Armenian -->
       <value name="nsubj:cleft">nsubj:cleft</value> <!-- Latin -->
       <value name="nsubj:cop">nsubj:cop</value> <!-- Apurina, Breton, Erzya, Estonian, Finnish, Hebrew, Karelian, Komi Permyak, Komi Zyrian, Livvi, Moksha, Sanskrit, Skolt Sami, Turkish -->
       <value name="nsubj:ifoc">nsubj:ifoc</value> <!-- Tagalog -->
       <value name="nsubj:lfoc">nsubj:lfoc</value> <!-- Tagalog -->
       <value name="nsubj:lvc">nsubj:lvc</value> <!-- Hungarian -->
       <value name="nsubj:nc">nsubj:nc</value> <!-- Persian, Tamil, Telugu -->
       <value name="nsubj:obj">fused subject and object pronoun</value> <!-- Old French -->
       <value name="nsubj:pass">passive nominal subject</value> <!-- Afrikaans, Amharic, Ancient Greek, Arabic, Armenian, Assyrian, Belarusian, Bulgarian, Buryat, Cantonese, Catalan, Chinese, Classical Chinese, Czech, Dutch, English, Faroese, French, Frisian Dutch, Galician, German, Gothic, Greek, Hindi, Indonesian, Italian, Korean, Latin, Latvian, Lithuanian, Maltese, Marathi, Moksha, Norwegian, Old Church Slavonic, Old East Slavic, Persian, Polish, Portuguese, Romanian, Russian, Sanskrit, Skolt Sami, Slovak, Spanish, Swedish, Swiss German, Tagalog, Tamil, Thai, Turkish German, Upper Sorbian, Western Armenian -->
       <value name="nsubj:periph">nsubj:periph</value> <!-- Cantonese -->
       <value name="nsubj">Nominal subject</value>
       <value name="nummod:det">nummod:det</value> <!-- Beja -->
       <value name="nummod:entity">numeric modifier governed by a noun</value> <!-- Russian -->
       <value name="nummod:flat">nummod:flat</value> <!-- Polish -->
       <value name="nummod:gov">numeric modifier governing the case of the noun</value> <!-- Belarusian, Czech, Lithuanian, Old East Slavic, Polish, Russian, Sanskrit, Serbian, Ukrainian, Upper Sorbian -->
       <value name="nummod">numeric modifier</value>
       <value name="obj:advmod">fused adverb and object pronoun</value> <!-- Old French -->
       <value name="obj:advneg">fused negation and object pronoun</value> <!-- no doc for advneg -->
       <value name="obj:agent">obj:agent</value> <!-- Apurina, French, Tagalog -->
       <value name="obj:appl">obj:appl</value> <!-- Wolof -->
       <value name="obj:caus">obj:caus</value> <!-- Wolof -->
       <value name="obj:lvc">obj:lvc</value> <!-- French, Hungarian, Naija -->
       <value name="obj:obl">fused oblique and object pronoun</value> <!-- Old French -->
       <value name="obj:periph">obj:periph</value> <!-- Cantonese, Chinese -->
       <value name="obj">object</value>
       <value name="obl:advmod">adverbial modifier confusable with an oblique dependent</value> <!-- Old French -->
       <value name="obl:agent">agent modifier</value> <!-- Ancient Greek, Armenian, Belarusian, Breton, Cantonese, Chinese, Czech, Dutch, Erzya, French, German, Gothic, Greek, Hindi, Indonesian, Italian, Komi Zyrian, Latin, Lithuanian, Maltese, Moksha, Naija, Old Church Slavonic, Old East Slavic, Polish, Portuguese, Romanian, Russian, Sanskrit, Skolt Sami, Swedish, Tamil, Turkish, Welsh, Western Armenian -->
       <value name="obl:appl">obl:appl</value> <!-- Wolof -->
       <value name="obl:arg">oblique argument</value> <!-- Arabic, Beja, Czech, French, German, Greek, Icelandic, Latin, Lithuanian, Maltese, Naija, Persian, Polish, Sanskrit, Slovak, South Levantine Arabic, Tamil -->
       <value name="obl:cau">obl:cau</value> <!-- Erzya, Komi Zyrian, Moksha, Telugu -->
       <value name="obl:cmp">obl:cmp</value> <!-- Telugu -->
       <value name="obl:cmpr">obl:cmpr</value> <!-- Latin, Polish, Tamil -->
       <value name="obl:comp">obl:comp</value> <!-- Moksha -->
       <value name="obl:dat">obl:dat</value> <!-- Kurmanji -->
       <value name="obl:freq">obl:freq</value> <!-- Moksha -->
       <value name="obl:inst">obl:inst</value> <!-- Erzya, Moksha, Tamil -->
       <value name="obl:lfrom">obl:lfrom</value> <!-- Erzya, Komi Zyrian, Moksha -->
       <value name="obl:lmod">locative modifier</value> <!-- Apurina, Classical Chinese, Danish, Erzya, Komi Permyak, Komi Zyrian, Moksha, Skolt Sami, Tamil -->
       <value name="obl:lmp">obl:lmp</value> <!-- Erzya, Komi Zyrian, Moksha -->
       <value name="obl:lto">obl:lto</value> <!-- Erzya, Komi Zyrian, Moksha -->
       <value name="obl:lvc">obl:lvc</value> <!-- Hungarian -->
       <value name="obl:mcl">obl:mcl</value> <!-- Komi Zyrian -->
       <value name="obl:mod"> oblique modifier</value> <!-- Beja, French, Naija, Yupik -->
       <value name="obl:npmod">obl:npmod</value> <!-- Coptic, English -->
       <value name="obl:orphan">obl:orphan</value> <!-- Polish -->
       <value name="obl:own">obl:own</value> <!-- Kazakh -->
       <value name="obl:patient">obl:patient</value> <!-- Cantonese, Chinese -->
       <value name="obl:pmod">obl:pmod</value> <!-- Romanian, Tamil -->
       <value name="obl:poss">obl:poss</value> <!-- Thai -->
       <value name="obl:prep">obl:prep</value> <!-- Irish -->
       <value name="obl:sentcon">obl:sentcon</value> <!-- Mbya Guarani -->
       <value name="obl:smod">obl:smod</value> <!-- Scottish Gaelic -->
       <value name="obl:tmod">obl:tmod</value> <!-- Apurina, Arabic, Cantonese, Chinese, Classical Chinese, Danish, English, Erzya, Frisian Dutch, German, Hindi, Indonesian, Irish, Italian, Komi Permyak, Komi Zyrian, Korean, Manx, Moksha, Portuguese, Scottish Gaelic, Skolt Sami, Spanish, Tamil, Telugu, Thai, Turkish, Uyghur, Warlpiri -->
       <value name="obl:tmod">temporal modifier</value>
       <value name="obl">oblique nominal</value>
       <value name="orphan:missing">textual gap in the source</value> <!-- Latin -->
       <value name="orphan">remnant in ellipsis</value>
       <value name="parataxis:appos">parataxis:appos</value> <!-- Italian -->
       <value name="parataxis:conj">parataxis:conj</value> <!-- Naija -->
       <value name="parataxis:coord">parataxis:coord</value> <!-- Beja -->
       <value name="parataxis:deletion">parataxis:deletion</value> <!-- Norwegian -->
       <value name="parataxis:discourse">parataxis:discourse</value> <!-- Italian, Naija, Slovenian, Turkish German, Ukrainian -->
       <value name="parataxis:dislocated">parataxis:dislocated</value> <!-- Naija -->
       <value name="parataxis:hashtag">parataxis:hashtag</value> <!-- Irish, Italian -->
       <value name="parataxis:insert">parataxis:insert</value> <!-- French, Italian, Polish -->
       <value name="parataxis:mod">parataxis:mod</value> <!-- Beja -->
       <value name="parataxis:newsent">parataxis:newsent</value> <!-- Ukrainian -->
       <value name="parataxis:nsubj">parataxis:nsubj</value> <!-- Italian -->
       <value name="parataxis:obj">parataxis:obj</value> <!-- Bambara, Italian, Polish -->
       <value name="parataxis:parenth">parataxis:parenth</value> <!-- French, Naija -->
       <value name="parataxis:rel">parataxis:rel</value> <!-- Ukrainian -->
       <value name="parataxis:rep">parataxis:rep</value> <!-- Chukchi, Latin, Mbya Guarani -->
       <value name="parataxis:restart">parataxis:restart</value> <!-- Slovenian -->
       <value name="parataxis:rt">parataxis:rt</value> <!-- Irish -->
       <value name="parataxis:sentence">parataxis:sentence</value> <!-- Irish -->
       <value name="parataxis:trans">parataxis:trans</value> <!-- Turkish German -->
       <value name="parataxis:url">parataxis:url</value> <!-- Irish -->
       <value name="parataxis">parataxis</value>
       <value name="punct">punctuation</value>
       <value name="remnant">Remnant ?</value> <!-- no doc, replace with orphan? -->
       <value name="reparandum">overridden disfluency</value>
       <value name="root">root</value>
       <value name="vocative:cl">vocative:cl</value> <!-- Ukrainian -->
       <value name="vocative:mention">vocative:mention</value> <!-- Irish, Italian -->
       <value name="vocative">vocative</value>
       <value name="xcomp:cleft">xcomp:cleft</value> <!-- Polish -->
       <value name="xcomp:ds">xcomp:ds</value> <!-- Erzya, Finnish, Karelian, Komi Permyak, Livvi -->
       <value name="xcomp:obj">xcomp:obj</value> <!-- North Sami, Polish -->
       <value name="xcomp:pred">xcomp:pred</value> <!-- Irish, Latin, Manx, North Sami, Polish, Scottish Gaelic -->
       <value name="xcomp:sp">xcomp:sp</value> <!-- Ukrainian -->
       <value name="xcomp:subj">xcomp:subj</value> <!-- Polish -->
       <value name="xcomp">open clausal complement</value>
     </feature>
     <feature name="coord" domain="NT" ></feature>
     <feature name="dom" domain="NT" ></feature>
     <feature name="type" domain="NT" >
       <value name="nV">élément non-verbal</value>
       <value name="VFin">finite verb</value>
       <value name="VInf">infinitive</value>
       <value name="VPar">participle</value>
       <value name="--">nil</value>
     </feature>
     <feature name="vform" domain="NT"></feature>
     <feature name="vlemma" domain="NT"></feature>
     <feature name="note" domain="NT"></feature>
     <feature name="snr" domain="NT"></feature>
     ';
         printf MASTER "$nt_features_header";
         printf MASTER '
     <edgelabel>
       <value name="D">dependency</value>
       <value name="L">lexical</value>
       <value name="R">relator</value>
       <value name="*">not bound</value>
     </edgelabel>
     <secedgelabel>
       <value name="cluster">between elements of GpCoo</value>
       <value name="coord">between members of Coo</value>
       <value name="dupl">between duplicated nodes</value>
     </secedgelabel>
     </annotation>
     </head>
     <body>
     ';
+    }
     #  <value name="M">main</value>
     #  <value name="P">part</value>
     sub write_master_footer {
         print MASTER '</body>
     </corpus>
     ';
+    }
     $TEMP = '
     <feature name="nodom" domain="NT" ></feature>
     <feature name="headpos" domain="NT" ></feature>
     <feature name="annotationFile" domain="NT" ></feature>
     <feature name="annotationUri" domain="NT" ></feature>
     ';
     sub define_cat_hashes {
         #  $abbrev2cat{"Apst"} = "Apostrophe";
         #  $abbrev2cat{"AtObj"} = "AttributObjet";
         #  $abbrev2cat{"AtRfc"} = "AttributReflechi";
         #  $abbrev2cat{"AtSj"} = "AttributSujet";
         #  $abbrev2cat{"AuxA"} = "Auxilie-Actif";
         #  $abbrev2cat{"AuxP"} = "Auxilie-Passif";
         #  $abbrev2cat{"Circ"} = "Circonstant";
         #  $abbrev2cat{"Cmpl"} = "Complement";
         #  $abbrev2cat{"GpCoo"} = "Coordonne";
         #  $abbrev2cat{"Coo"} = "Coordination";
         #  $abbrev2cat{"Det"} = "Determinant";
         #  $abbrev2cat{"NgPrt"} = "Forclusif";
         #  $abbrev2cat{"Insrt"} = "Incidente";
         #  $abbrev2cat{"Intj"} = "Interjection";
         #  $abbrev2cat{"ModA"} = "ModifieurAttache";
         #  $abbrev2cat{"ModD"} = "ModifieurDetache";
         #  $abbrev2cat{"Ng"} = "Negation";
         #  $abbrev2cat{"VInf"} = "NoeudVerbal-Infinitif";
         #  $abbrev2cat{"VPrt"} = "NoeudVerbal-Participe"; #?
         #  $abbrev2cat{"VFin"} = "NoeudVerbal-Personnel";
         #  $abbrev2cat{"nSnt"} = "NonPhrase";
         #  $abbrev2cat{"Obj"} = "Objet";
         #  $abbrev2cat{"Snt"} = "Phrase";
         #  $abbrev2cat{"Pon"} = "Ponctuation";
         #  $abbrev2cat{"Rfc"} = "Reflechi";
         #  $abbrev2cat{"Rfx"} = "ReflexifRenforce";
         #  $abbrev2cat{"RelC"} = "Relateur-Coordonnant";
         #  $abbrev2cat{"RelNC"} = "Relateur-NonCoordonnant";
         #  $abbrev2cat{"nMax"} = "StructureNonMaximale";
         #  $abbrev2cat{"SjImp"} = "SujetImpersonnel";
         #  $abbrev2cat{"SjPer"} = "SujetPersonnel";
         #  $abbrev2cat{"Lac"} = "Lacune";
         #  $abbrev2cat{"Aux"} = "Auxilie";
         #  $abbrev2cat{"Regim"} = "Regime";
         $abbrev2cat{"acl"}        = "Clausal modifier of noun";
         $abbrev2cat{"advcl"}      = "Adverbial clause modifier";
         $abbrev2cat{"advmod"}     = "Adverbial modifier";
         $abbrev2cat{"amod"}       = "Adjectival modifier";
         $abbrev2cat{"appos"}      = "Appositional modifier";
         $abbrev2cat{"aux"}        = "Auxiliary";
         $abbrev2cat{"cc-nc"}      = "Coordinated conjunct : non coordonant";
         $abbrev2cat{"cc"}         = "Coordinating conjunction";
         $abbrev2cat{"ccomp"}      = "Clausal complement";
         $abbrev2cat{"conj"}       = "Conjunct";
         $abbrev2cat{"cop"}        = "Copula";
         $abbrev2cat{"csubj"}      = "Clausal subject";
         $abbrev2cat{"det"}        = "Determiner";
         $abbrev2cat{"dislocated"} = "Dislocated elements";
         $abbrev2cat{"expl"}       = "Expletive";
         $abbrev2cat{"iobj"}       = "Indirect object";
         $abbrev2cat{"mark"}       = "Marker";
         $abbrev2cat{"nmod"}       = "Nominal modifier";
         $abbrev2cat{"nsubj"}      = "Nominal subject";
         $abbrev2cat{"nummod"}     = "Numeric modifier";
         $abbrev2cat{"obj"}        = "Object";
         $abbrev2cat{"obl"}        = " Oblique nominal";
         $abbrev2cat{"orphan"}     = "Remnant in ellipsis";
         $abbrev2cat{"remnant"}    = "Remnant ?";
         $abbrev2cat{"vocative"}   = "Vocative";
         $abbrev2cat{"xcomp"}      = "Open clausal complement";
+    }
     sub print_sentence {
         for ( my $q = 0 ; $q <= $#words ; $q++ ) {
             print "$words[$q]\n";
+        }
+    }
     sub write_nonterminals {
         my $print_nt_features;
         if ( $_[1] =~ /dupl/ ) {
             $dupl = '_dupl';
+        }
         else {
             $dupl = '';

... Ce différentiel a été tronqué car il excède la taille maximale pouvant être affichée.

Formats disponibles : Unified diff

Laboratoire ICAR » Plateforme TXM

Révision 3567