/ - Diff - Plateforme TXM - Forge du Centre Blaise Pascal

     package org.txm.macro.urs.democrat
     import org.apache.commons.lang.*
     import org.kohsuke.args4j.*
     import groovy.transform.*
     import org.txm.*
     import org.txm.rcp.swt.widget.parameters.*
     import org.txm.annotation.urs.*
     import org.txm.searchengine.cqp.*
     import org.txm.searchengine.cqp.corpus.*
     import visuAnalec.Message.*
     import visuAnalec.donnees.*
     import visuAnalec.elements.*
     import visuAnalec.vue.*
     /*
      Calcule la longueur des mentions et attribue une valeur (1, 2, 3 ou plus) dans la propriété "LONGUEUR"
      */
     nLongueur1 = 0
     nLongueur2 = 0
     nLongueur3 = 0
     def testRules(def positions, def Mention) {
     	if (Mention.length == 1) {
     		catégorie = "1"
     		nLongueur1++
+    	}
     	else if (Mention.length == 2) {
     		catégorie = "2"
     		nLongueur2++
+    	}
     	else {
     		catégorie = "3 ou plus"
     		nLongueur3++
+    	}
     	return catégorie
+    }
     //
     // FIN DE LA DÉFINITION DES RÈGLES
     //
     // CORPS DU SCRIPT
     if (!(corpusViewSelection instanceof MainCorpus)) {
     	println "Corpora selection is not a Corpus: "+corpusViewSelection
     	return
+    }
     // BEGINNING OF PARAMETERS
     @Field @Option(name="unit_type", usage="", widget="String", required=true, def="MENTION")
     def unit_type
     @Field @Option(name="pos_property_name", usage="", widget="String", required=true, def="pos")
     def pos_property_name
     @Field @Option(name="reset", usage="", widget="Boolean", required=true, def="true")
     def reset
     if (!ParametersDialog.open(this)) return
     corpus = corpusViewSelection
     CQI = CQPSearchEngine.getCqiClient()
     word = corpus.getWordProperty()
     posProperty = corpus.getProperty(pos_property_name)
     if (posProperty == null) {
     	println "Error: CQP corpus does not contains the word property with name=$pos_property_name"
     	return
+    }
     analecCorpus = URSCorpora.getCorpus(corpus)
     vue = URSCorpora.getVue(corpus)
     structure = analecCorpus.getStructure()
     if (!structure.getUnites().contains(unit_type)) { // check if the structure contains the unit_type units
     	println "Error: corpus structure does not contains unit with name=$unit_type"
     	return
+    }
     LONGUEUR = "LONGUEUR"
     // Si la structure d'annotation ne contient pas LONGUEUR, on la crée avec ses valeurs
     if (!structure.getUniteProperties(unit_type).contains(LONGUEUR)) {
     	// la propriété
     	analecCorpus.ajouterProp(Unite.class, unit_type, LONGUEUR)
     	// les valeurs
     	structure.ajouterVal(Unite.class, unit_type, LONGUEUR, "1")
     	structure.ajouterVal(Unite.class, unit_type, LONGUEUR, "2")
     	structure.ajouterVal(Unite.class, unit_type, LONGUEUR, "3 ou plus")
     	//...
+    }
     def nModified = 0
     def nIgnored = 0
     errors = new HashMap()
     def units = analecCorpus.getUnites(unit_type)
     units.sort() { a, b -> a.getDeb() <=> b.getDeb() ?: a.getFin() <=> b.getFin() }
     for (Unite unit : units) { // process all units
     	def prop = unit.getProp(LONGUEUR)
     	if (!reset && prop != null && prop.length() > 0) continue // l'unité a déjà une LONGUEUR
     		int[] positions = null
     	if (unit.getDeb() == unit.getFin()) positions = [unit.getDeb()]
     	else positions = (unit.getDeb()..unit.getFin())
     	def Mention = CQI.cpos2Str(posProperty.getQualifiedName(), positions)
     	def cat = testRules(positions, Mention)
     	if (cat != null) {
     		vue.setValeurChamp(unit, LONGUEUR, cat)
     		nModified++
     	} else {
     		nIgnored++
+    	}
+    }
     println "Result:"
     println "- $nModified units of type $unit_type have been modified."
     println "- $nIgnored units of type $unit_type have not been modified.\n"
     println "- $nLongueur1 mentions de longueur 1."
     println "- $nLongueur2 mentions de longueur 2."
     println "- $nLongueur3 mentions de longueur 3 ou plus.\n"
     if (errors.size() > 0) {
     	println "Some rules should be added to this macro to process the following remaining 'FROPOS / words' values:"
     	errors.keySet().each { println "fropos="+it+"\twords="+errors[it].join(" | ") }
+    }

     package org.txm.macro.urs.democrat
     import org.apache.commons.lang.*
     import org.kohsuke.args4j.*
     import groovy.transform.*
     import org.txm.*
     import org.txm.rcp.swt.widget.parameters.*
     import org.txm.annotation.urs.*
     import org.txm.searchengine.cqp.*
     import org.txm.searchengine.cqp.corpus.*
     import visuAnalec.Message.*
     import visuAnalec.donnees.*
     import visuAnalec.elements.*
     import visuAnalec.vue.*
     def testRules(def positions, def Mention) {
     	def catégorie = null
     // DÉFINITION DES RÈGLES
     // elles sont testées les unes après les autres.
     // Dès qu'une règle est appliquée les suivantes sont ignorées.
     // Règles de type CONTIENT
     	     if (Mention.contains("NOMpro")) catégorie = "Nom Propre"
     	else if (Mention.contains("DETpos")) catégorie = "Dét Possessif"
     //  ...
     // Règles de type COMMENCE ET NE CONTIENT PAS
     	else if (Mention.first() == "DETpos" && !Mention.contains("NOMpro")) catégorie = "GN Possessif"
     	else if (Mention.first() == "DETdem" && !Mention.contains("NOMpro")) catégorie = "GN Démonstratif"
     //  ...
     // Règles de type CONTIENT PLUSIEURS
     	else if (Mention.contains("PROadv") || Mention.contains("ADVgen.PROadv")) catégorie = "Pronom Adverbial"
     //  ...
     // Règles de type CONTIENT ET NE CONTIENT PAS
     	else if (
     			 ( Mention.contains("DETdef") || Mention.contains("PRE.DETdef") )
     			  &&
     			  !Mention.contains("PROpos") && !Mention.contains("NOMpro") && !Mention.contains("PROcar")
     			) catégorie = "GN Défini"
     //  ...
     // Fin des règles, aucune n'a matchée. On stocke le pattern  qu'on affichera à la fin.
     	else {
     		def forms = CQI.cpos2Str(word.getQualifiedName(), positions)
     		if (!errors.containsKey(Mention)) errors[Mention] = new HashSet()
     		errors[Mention] << forms
+    	}
     	return catégorie
+    }
     //
     // FIN DE LA DÉFINITION DES RÈGLES
     //
     // CORPS DU SCRIPT
     if (!(corpusViewSelection instanceof MainCorpus)) {
     	println "Corpora selection is not a Corpus"
     	return
+    }
     // BEGINNING OF PARAMETERS
     @Field @Option(name="unit_type", usage="", widget="String", required=true, def="MENTION")
     def unit_type
     @Field @Option(name="pos_property_name", usage="", widget="String", required=true, def="pos")
     def pos_property_name
     @Field @Option(name="reset", usage="", widget="Boolean", required=true, def="true")
     def reset
     if (!ParametersDialog.open(this)) return
     corpus = corpusViewSelection
     CQI = CQPSearchEngine.getCqiClient()
     word = corpus.getWordProperty()
     posProperty = corpus.getProperty(pos_property_name)
     if (posProperty == null) {
     	println "Error: CQP corpus does not contains the word property with name=$pos_property_name"
     	return
+    }
     analecCorpus = URSCorpora.getCorpus(corpus)
     vue = URSCorpora.getVue(corpus)
     structure = analecCorpus.getStructure()
     if (!structure.getUnites().contains(unit_type)) { // check if the structure contains the unit_type units
     	println "Error: corpus structure does not contains unit with name=$unit_type"
     	return
+    }
     CATEGORIE = "CATEGORIE"
     // Si la structure d'annotation ne contient pas CATEGORIE, on la crée avec ses valeurs
     if (!structure.getUniteProperties(unit_type).contains(CATEGORIE)) {
     // la propriété
     	analecCorpus.ajouterProp(Unite.class, unit_type, CATEGORIE)
     // les valeurs
     	analecCorpus.ajouterVal(Unite.class, unit_type, CATEGORIE, "Nom Propre")
     	analecCorpus.ajouterVal(Unite.class, unit_type, CATEGORIE, "Pronom Impersonnel")
     	analecCorpus.ajouterVal(Unite.class, unit_type, CATEGORIE, "Pronom Interrogatif")
     	analecCorpus.ajouterVal(Unite.class, unit_type, CATEGORIE, "Pronom Pronom cardinal")
     	analecCorpus.ajouterVal(Unite.class, unit_type, CATEGORIE, "Pronom Démonstratif")
     	analecCorpus.ajouterVal(Unite.class, unit_type, CATEGORIE, "Pronom Indéfini")
     	analecCorpus.ajouterVal(Unite.class, unit_type, CATEGORIE, "Pronom ordinal")
     	analecCorpus.ajouterVal(Unite.class, unit_type, CATEGORIE, "Pronom Relatif")
     //...
+    }
     def nModified = 0
     def nIgnored = 0
     errors = new HashMap()
     def units = analecCorpus.getUnites(unit_type)
     units.sort() { a, b -> a.getDeb() <=> b.getDeb() ?: a.getFin() <=> b.getFin() }
     for (Unite unit : units) { // process all units
     	def prop = unit.getProp(CATEGORIE)
     	if (!reset && prop != null && prop.length() > 0) continue // l'unité a déjà une CATEGORIE
     	int[] positions = null
     	if (unit.getDeb() == unit.getFin()) positions = [unit.getDeb()]
     	else positions = (unit.getDeb()..unit.getFin())
     	def Mention = CQI.cpos2Str(posProperty.getQualifiedName(), positions)
     	def cat = testRules(positions, Mention)
     	if (cat != null) {
     		vue.setValeurChamp(unit, CATEGORIE, cat)
     		nModified++
     	} else {
     		nIgnored++
+    	}
+    }
     println "Result:"
     println "- $nModified units of type $unit_type have been modified."
     println "- $nIgnored units of type $unit_type have not been modified.\n"
     if (errors.size() > 0) {
     	println "Some rules should be added to this macro to process the following remaining 'FROPOS / words' values:"
     	errors.keySet().each { println "fropos="+it+"\twords="+errors[it].join(" | ") }
+    }

     // @author Bruno Oberlé (2017-04-01 21:50)
     /*
     Définit la catégorie grammaticale du maillon d'après le champ `frpos'
     (tagset de TreeTagger).  Le script est adapté de
     http://svn.code.sf.net/p/txm/code/trunk/plugins/Analec/AnalecRCP/src/org/txm/macro/analec/Fropos2CategorieMacro.groovy.
     Voici la liste des catégories grammaticales retenues (manuel d'annotation de
     Democrat):
     - GN: Groupe Nominal (le petit chat, le chat, le même, ce chat etc.)
     - POSS: Possessif (mon, ton son, ma, ta, sa, mes, tes, ses, notre, votre,
       leur, nos, vos, leurs)
     - PR: Pronom (moi, toi, lui, elle, nous, vous, eux, elles, le tien, le mien,
       moi-même etc.)
     - PR_CL_O: Pronom Clitique Objet (me, te, le, la, les, lui, leur, y, en)
     - PR_CL_R: Pronom Clitique Réfléchi
     - PR_CL_S: Pronom Clitique Sujet (je, tu, il, elle, on, nous, vous, ils,
       elles)
     - PR_REL: Pronom Relatif (qui, que, quoi, dont, où, lequel, quiconque etc.)
     - PR_WH: Pronom Interrogatif (qui, que, quoi, lequel etc.)
     Le script ne peut pas désambiguïser les pronoms clitiques de même forme
     (`nous' est-il un sujet, un objet ou un réfléchi?).  Dans ce cas, le script
     opte pour le sujet (ou pour l'objet si l'ambiguïté n'est que entre objet et
     réfléchi).
     Quand il n'y a aucune information disponible (erreurs de l'étiqueteur), la
     valeur est UNDEFINED.
     L'algorithme est décrit ici:
     https://groupes.renater.fr/wiki/democrat/prive/txm_annotation_exploitation
     */
     package org.txm.macro.urs.democrat
     import org.apache.commons.lang.*
     import org.kohsuke.args4j.*
     import groovy.transform.*
     import org.txm.*
     import org.txm.rcp.swt.widget.parameters.*
     import org.txm.annotation.urs.*
     import org.txm.searchengine.cqp.*
     import org.txm.searchengine.cqp.corpus.*
     import visuAnalec.Message.*
     import visuAnalec.donnees.*
     import visuAnalec.elements.*
     import visuAnalec.vue.*
     def testClitic(def position, def frpos) {
        // je me sers de la forme, parce qu'il est difficile de savoir quel est le
        // lemme de "elle" ("il"?), de "te" ("tu"?) ou encore de "leur"
        def form = CQI.cpos2Str(word.getQualifiedName(), position)[0].toLowerCase()
        if (     form == "je" || form == "j'"
              || form == "tu" || form == "t'"
              || form == "il"
              || form == "elle"
              || form == "on"
              || form == "vous"
              || form == "nous"
              || form == "ils"
              || form == "elles" ) {
           return "PR_CL_S"
        } else if (form == "me" || form == "m'"
                || form == "te"
                || form == "le" || form == "l'"
                || form == "la"
                || form == "lui"
                || form == "leur"
                || form == "les" ) {
           return "PR_CL_O"
        } else if (form == "se" || form == "s'") {
           return "PR_CL_R"
+       }
        return null
+    }
     def testPhrase(def positions, def Mention) {
        // on doit regarder ce qui apparaît en premier:
        // - ce peut être un nom, comme dans `le petit chat que j'ai adopté'
        // - ce peut être un pronom relatif, comme dans `(le livre) dans lequel
        // j'ai lu cette histoire...'
        // NOTE: dans Democrat, on n'annote pas, bizarrement, la relative dans le
        // maillon, donc, dans un GN on n'a jamais de relatif inclus.  On aura donc
        // toujours `[le petit chat] [que] [j']ai adopté'.  Mais tout le monde
        // n'annote pas de la sorte...
        for (def i=0; i<Mention.length; i++) {
              def mention = Mention[i]
              //def form = CQI.cpos2Str(word.getQualifiedName(), positions[i])[0]
              if (mention == "NOM" || mention == "NAM") {
                 return "GN"
              } else if (mention == "PRO:REL") {
                 return "PR_REL"
+             }
+        }
         return null
+    }
     def testWhPronoun(position, mention) {
        def form = CQI.cpos2Str(word.getQualifiedName(), position)[0]
        if (mention == "PRO" && (form == "qui" || form == "que" || form == "lequel")) {
           return "PR_WH"
+       }
        return null
+    }
     def testRules(def positions, def Mention) {
     	def catégorie = null
        // a possessive (mon, ma...)
        if (Mention.length == 1 && Mention.contains("DET:POS"))
           catégorie = "POSS"
        // a clitic (subject: je, tu...; object: me, te; reflexive: se)
        if (!catégorie && Mention.length == 1 && Mention.contains("PRO:PER"))
           catégorie = testClitic(positions[0], Mention[0])
        // an interrogative pronoun
        if (!catégorie && Mention.length == 1)
           catégorie = testWhPronoun(positions[0], Mention[0])
        // a noun phrase or a relative pronoun
        if (!catégorie)
           catégorie = testPhrase(positions, Mention)
        // some other kind of pronouns
        if (!catégorie
              && (   Mention.contains("PRO")
                  || Mention.contains("PRO:POSS")
                  || Mention.contains("PRO:IND")
                  || Mention.contains("PRO:DEM")
                  || Mention.contains("PRO:PER") )
              && !Mention.contains("NOM")
              && !Mention.contains("NAM") )
           catégorie = "PRO"
     // Fin des règles, aucune n'a matchée. On stocke le pattern  qu'on affichera à la fin.
        if (!catégorie) {
           catégorie = "UNDEFINED" // clear the field
     		def forms = CQI.cpos2Str(word.getQualifiedName(), positions)
     		if (!errors.containsKey(Mention)) errors[Mention] = new HashSet()
     		errors[Mention] << forms
+    	}
     	return catégorie
+    }
     //
     // FIN DE LA DÉFINITION DES RÈGLES
     //
     // CORPS DU SCRIPT
     if (!(corpusViewSelection instanceof MainCorpus)) {
     	println "Corpora selection is not a Corpus"
     	return
+    }
     // BEGINNING OF PARAMETERS
     @Field @Option(name="unit_type", usage="", widget="String", required=true, def="MENTION")
     def unit_type
     @Field @Option(name="pos_property_name", usage="", widget="String", required=true, def="pos")
     def pos_property_name
     @Field @Option(name="reset", usage="", widget="Boolean", required=true, def="true")
     def reset
     if (!ParametersDialog.open(this)) return
     corpus = corpusViewSelection
     CQI = CQPSearchEngine.getCqiClient()
     word = corpus.getWordProperty()
     posProperty = corpus.getProperty(pos_property_name)
     if (posProperty == null) {
     	println "Error: CQP corpus does not contains the word property with name=$pos_property_name"
     	return
+    }
     analecCorpus = URSCorpora.getCorpus(corpus)
     vue = URSCorpora.getVue(corpus)
     structure = analecCorpus.getStructure()
     if (!structure.getUnites().contains(unit_type)) { // check if the structure contains the unit_type units
     	println "Error: corpus structure does not contains unit with name=$unit_type"
     	return
+    }
     CATEGORIE = "CATEGORIE"
     // Si la structure d'annotation ne contient pas CATEGORIE, on la crée avec ses valeurs
     if (!structure.getUniteProperties(unit_type).contains(CATEGORIE)) {
     // FIXME: dans le script original (see also
     // http://forge.cbp.ens-lyon.fr/redmine/issues/2065), on utilise
     // analecCorpus.ajouterProp/Val, mais cela ne marche pas dans ma version de
     // TXM-Analec --> je retourne donc à structure.ajouterProp/Val
     // la propriété
     	structure.ajouterProp(Unite.class, unit_type, CATEGORIE)
     // les valeurs
     	structure.ajouterVal(Unite.class, unit_type, CATEGORIE, "GN")
     	structure.ajouterVal(Unite.class, unit_type, CATEGORIE, "POSS")
     	structure.ajouterVal(Unite.class, unit_type, CATEGORIE, "PRO")
     	structure.ajouterVal(Unite.class, unit_type, CATEGORIE, "PR_CL_O")
     	structure.ajouterVal(Unite.class, unit_type, CATEGORIE, "PR_CL_S")
     	structure.ajouterVal(Unite.class, unit_type, CATEGORIE, "PR_CL_R")
     	structure.ajouterVal(Unite.class, unit_type, CATEGORIE, "PR_REL")
     	structure.ajouterVal(Unite.class, unit_type, CATEGORIE, "PR_WH")
     //...
+    }
     def nModified = 0
     def nIgnored = 0
     errors = new HashMap()
     def units = analecCorpus.getUnites(unit_type)
     units.sort() { a, b -> a.getDeb() <=> b.getDeb() ?: a.getFin() <=> b.getFin() }
     for (Unite unit : units) { // process all units
     	def prop = unit.getProp(CATEGORIE)
     	if (!reset && prop != null && prop.length() > 0) continue // l'unité a déjà une CATEGORIE
     	int[] positions = null
     	if (unit.getDeb() == unit.getFin()) positions = [unit.getDeb()]
     	else positions = (unit.getDeb()..unit.getFin())
     	def Mention = CQI.cpos2Str(posProperty.getQualifiedName(), positions)
     	def cat = testRules(positions, Mention)
     	if (cat != null) {
     		// following line in the original script but doesn't work for me:
           // vue.setValeurChamp(unit, CATEGORIE, cat)
           unit.getProps().put(CATEGORIE, cat)
     		nModified++
     	} else {
     		nIgnored++
+    	}
+    }
     println "Result:"
     println "- $nModified units of type $unit_type have been modified."
     println "- $nIgnored units of type $unit_type have not been modified.\n"
     if (errors.size() > 0) {
     	println "Some rules should be added to this macro to process the following remaining 'FROPOS / words' values:"
     	errors.keySet().each { println "fropos="+it+"\twords="+errors[it].join(" | ") }
+    }
     // udpate the view (also see also
     // http://forge.cbp.ens-lyon.fr/redmine/issues/2065)
     URSCorpora.getVue(analecCorpus).retablirVueParDefaut()

     // Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté
     // Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses)
     // @author mdecorde
     // @author sheiden
     // STANDARD DECLARATIONS
     package org.txm.macro.urs.democrat
     import org.kohsuke.args4j.*
     import groovy.transform.Field
     import org.txm.rcp.swt.widget.parameters.*
     import org.txm.annotation.urs.*
     import org.txm.searchengine.cqp.corpus.*
     import visuAnalec.donnees.Structure;
     import visuAnalec.elements.Unite;
     import visuAnalec.vue.Vue
     if (!(corpusViewSelection instanceof MainCorpus)) {
     	println "Corpora selection is not a Corpus"
     	return;
+    }
     // BEGINNING OF PARAMETERS
     @Field @Option(name="unit_type",usage="", widget="String", required=true, def="MENTION")
     String unit_type
     @Field @Option(name="reset",usage="", widget="Boolean", required=true, def="true")
     boolean reset
     if (!ParametersDialog.open(this)) return;
     MainCorpus corpus = corpusViewSelection
     def analecCorpus = URSCorpora.getCorpus(corpus);
     Structure structure = analecCorpus.getStructure()
     if (!structure.getUnites().contains(unit_type)) { // check if the structure contains the unit_type units
     	println "Error: corpus structure does not contains unit with name=$unit_type"
     	return
+    }
     def props = structure.getUniteProperties(unit_type)
     String NEW = "NEW"
     String REF = "REF"
     String YES = "YES"
     String NO = "NO"
     if (!props.contains(NEW)) { // update the structure if needed
     	analecCorpus.ajouterProp(Unite.class, unit_type, NEW);
     	analecCorpus.ajouterVal(Unite.class, unit_type, NEW, "YES");
     	analecCorpus.ajouterVal(Unite.class, unit_type, NEW, "NO");
+    }
     if (!props.contains(REF)) { // check the unit_type units have the REF property
     	println "Error: $unit_type units have no proprerty named 'REF'"
     	return
+    }
     int nIgnored = 0; // number of units ignored
     int nYes = 0 // number of "YES" unit set
     int nNo = 0 // number of "NO" unit set
     def allRefs = new HashSet<String>() // store the references already seen, allow to set the 'NEW' property to 'YES' or 'NO'
     def units = analecCorpus.getUnites(unit_type)
     units.sort() { a, b -> a.getDeb() <=> b.getDeb() ?: a.getFin() <=> b.getFin() }
     for (Unite unit : units) {
     	def prop = unit.getProp(NEW);
     	def ref = unit.getProp(REF);
     	if (reset || prop == null || prop.length() == 0) {
     		if (allRefs.contains(ref)) {
     			unit.getProps().put(NEW, NO)
     			nNo++
     		} else { // this is the first MENTION of the CHAINE
     			allRefs << ref
     			unit.getProps().put(NEW, YES)
     			nYes++
+    		}
     	} else {
     		// nothing to do "NEW" already exists
     		nIgnored++
+    	}
+    }
     println "nIgnored=$nIgnored"
     println "nYes=$nYes"
     println "nNo=$nNo"

     // Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté
     // Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses)
     // @author mdecorde
     // @author sheiden
     // STANDARD DECLARATIONS
     package org.txm.macro.urs.democrat
     import org.apache.commons.lang.StringUtils;
     import org.kohsuke.args4j.*
     import groovy.transform.Field
     import org.txm.Toolbox;
     import org.txm.rcp.swt.widget.parameters.*
     import org.txm.annotation.urs.*
     import org.txm.searchengine.cqp.AbstractCqiClient;
     import org.txm.searchengine.cqp.corpus.*
     import org.txm.searchengine.cqp.CQPSearchEngine
     import visuAnalec.Message.StructureEvent;
     import visuAnalec.Message.TypeModifStructure;
     import visuAnalec.donnees.Structure;
     import visuAnalec.elements.Unite;
     import visuAnalec.vue.Vue
     if (!(corpusViewSelection instanceof MainCorpus)) {
     	println "Corpora selection is not a Corpus"
     	return;
+    }
     // BEGINNING OF PARAMETERS
     @Field @Option(name="unit_type",usage="", widget="String", required=true, def="MENTION")
     String unit_type
     @Field @Option(name="reset",usage="", widget="Boolean", required=true, def="false")
     boolean reset
     if (!ParametersDialog.open(this)) return;
     MainCorpus corpus = corpusViewSelection
     AbstractCqiClient CQI = CQPSearchEngine.getCqiClient();
     def word = corpus.getWordProperty()
     def analecCorpus = URSCorpora.getCorpus(corpus);
     Structure structure = analecCorpus.getStructure()
     if (!structure.getUnites().contains(unit_type)) { // check if the structure contains the unit_type units
     	println "Error: corpus structure does not contains unit with name=$unit_type"
     	return
+    }
     def props = structure.getUniteProperties(unit_type)
     String DEFINITUDE = "DEFINITUDE"
     if (!props.contains(DEFINITUDE)) { // update the structure if needed
     	analecCorpus.ajouterProp(Unite.class, unit_type, DEFINITUDE);
     	analecCorpus.ajouterVal(Unite.class, unit_type, DEFINITUDE, "DEFINI");
     	analecCorpus.ajouterVal(Unite.class, unit_type, DEFINITUDE, "INDEFINI");
     	analecCorpus.ajouterVal(Unite.class, unit_type, DEFINITUDE, "DEMONSTRATIF");
     	analecCorpus.ajouterVal(Unite.class, unit_type, DEFINITUDE, "AMBIGU");
     	analecCorpus.ajouterVal(Unite.class, unit_type, DEFINITUDE, "NONE");
+    }
     int nIgnored = 0 // number of ignored units
     int nModified = 0 // number of modified units
     int nDefini = 0 // number of "DEFINI" units
     int nIndefini = 0 // number of "InDEFINI" units
     int nDemonstratif = 0 // number of "DEMONSTRATIF" units
     int nAmbigu = 0 // number of "AMBIGU" units
     int nNone = 0 // number of "NONE" units
     def units = analecCorpus.getUnites(unit_type)
     units = units.sort() { a, b -> a.getDeb() <=> b.getDeb() ?: a.getFin() <=> b.getFin() }
     for (Unite unit : units) { // process all units
     	def prop = unit.getProp(DEFINITUDE);
     	int[] pos = null
     	if (unit.getDeb() == unit.getFin()) pos = [unit.getDeb()]
     	else pos = (unit.getDeb()..unit.getFin())
     	def form = StringUtils.join(CQI.cpos2Str(word.getQualifiedName(), pos), " ")
     	if (reset || prop == null || prop.length() == 0 || prop.equals("NONE")) {
     		// petits ajouts à faire ? : |(ses\s.+)|(Ses\s.+)|(son\s.+)|(Son\s.+)|(sa\s.+)|(Sa\s.+)|(leurs?\s.+)|(Leurs?\s.+)|(tous\s.+)|(Tous\s.+)|(toutes\s.+)|(Toutes\s.+)
     		if (form =~ /^(le\s.+)|(Les\s.+)|(Le\s.+)|(la\s.+)|(La\s.+)|(l'.+)|(L'.+)|(les\s.+)|(au\s.+)|(Au\s.+)|(aux\s.+)|(Aux\s.+)|(du\s.+)|(Du\s.+)/) {
     			unit.getProps().put(DEFINITUDE, "DEFINI")
     			nDefini++
     		} else if (form =~ /^(un\s.+)|(une\s.+)|(Un\s.+)|(Une\s.+)|(Chaque\s.+)|(chaque\s.+)|(Certains\s.+)|(Certaines\s.+)|(certains\s.+)|(certaines\s.+)|(aucun\s.+)|(aucune\s.+)|(Aucun\s.+)|(Aucunes\s.+)|(Autre\s.+)|(Autre\s.+)|(autres\s.+)|(autre\s.+)|(quelque\s.+)|(quelques\s.+)|(Quelque\s.+)|(Quelques\s.+)/) {
     			unit.getProps().put(DEFINITUDE, "INDEFINI")
     			nIndefini++
     		} else if (form =~ /^(ce\s.+)|(cette\s.+)|(Cette\s.+)|(cet\s.+)|(ces\s.+)|(Ce\s.+)|(Cet\s.+)|(Ces\s.+)/) {
     			unit.getProps().put(DEFINITUDE, "DEMONSTRATIF")
     			nDemonstratif++
     		} else if (form =~ /^(des\s.+)|(de\s.+)|(Des\s.+)|(De\s.+)/) {
     			unit.getProps().put(DEFINITUDE, "AMBIGU")
     			nAmbigu++
     		} else {
     			unit.getProps().put(DEFINITUDE, "NONE")
     			nNone++;
+    		}
     		nModified++
     	} else {
     		// nothing to do
     		nIgnored++
+    	}
+    }
     println "nIgnored=$nIgnored"
     println "nModified=$nModified"
     println " nDefini=$nDefini"
     println " nIndefini=$nIndefini"
     println " nDemonstratif=$nDemonstratif"
     println " nAmbigu=$nAmbigu"
     println " nNone=$nNone"

     // @author Matthieu Quignard
     // Date : 04 Mai 2017
     /*
     Définit la catégorie grammaticale des mentions d'après le champ `frpos'
     (tagset de TreeTagger).
     La liste des catégories grammaticales est celle de CATTEX2009
     -- Groupes nominaux :
     	GN.NAM (noms propres) : Henri II
     	GN.DEF (définis) : Le roi, du roi
     	GN.IND (indéfinis) : Un roi
     	GN.POS (possessifs) : [Mon roi]
     	GN.DEM (demonstratif) : Ce roi
     	GN.NUM (numéraux) : Deux rois
     	GN.CHECK (GN indéterminés)
     -- Déterminants
     	DET.POS (possessifs) : [mon] roi
     -- Pronoms
     	PRO.PER (personnels) : je, moi, me, on, il, etc.
     	PRO.ADV (adverbiaux) : y, en
     	PRO.IND (indéfinis) : tout, tous, certains, plusieurs, etc.
     	PRO.DEM (demonstratifs) : ceci, cela, ce, ça...
     	PRO.POS (possessifs) : le mien, les nôtres...
     	PRO.NUM (cardinaux, ordinaux) : les deux...
     	PRO.REL (relatifs) : qui, que, quoi, duquel, etc.
     	PRO.INT (interrogatifs)
     	PRO.CHECK (pronoms indéterminés)
     -- Sujet Zéro : verbes conjugués, éventuellement pronominal
     -- ERREUR : erreur (a priori) de mention
     */
     package org.txm.macro.urs.democrat
     import org.apache.commons.lang.*
     import org.kohsuke.args4j.*
     import groovy.transform.*
     import org.txm.*
     import org.txm.rcp.swt.widget.parameters.*
     import org.txm.annotation.urs.*
     import org.txm.searchengine.cqp.*
     import org.txm.searchengine.cqp.corpus.*
     import visuAnalec.Message.*
     import visuAnalec.donnees.*
     import visuAnalec.elements.*
     import visuAnalec.vue.*
     // Propriété donnant le lemme
     def lemmaProperty = "frlemma"
     /*
        TODO : TreeTagger fait un mauvais étiquetage des premiers mots de la phrase.
        La majuscule lui fait croire qu'il s'agit d'un nom propre.
        Vérifier que ce mot n'est pas en fait un pronom en testant sa présence dans une des listes de formes particulières
     */
     /**
       * Liste de formes utile à la catégorisation des mentions
       * TreeTagger n'est pas forcément très performant sur des états plus anciens de la langue
     ***/
     formesArticlesDéfinis = ["le", "la", "les", "l'", "au", "aux", "du", "des"]
     formesArticlesIndéfinis = ["un", "une", "des"]
     formesDéterminantsPossessifs = ["ma", "ta", "sa", "mon", "ton", "son", "mes", "tes", "ses", "notre", "votre", "leur", "nos", "vos", "leurs"]
     formesAdjectifsDémonstratifs = ["ce", "cet", "cette", "ces"]
     formesPronomsPersonnels = ["je", "tu", "il", "elle", "on", "nous", "vous", "ils", "elles", "moi", "toi", "eux", "me", "te", "se", "lui", "leur"]
     formesPronomsAdverbiaux = ["en", "y"]
     formesPronomsPossessifs = ["mien", "mienne", "miens", "miennes", "tien", "tienne", "tiens", "tiennes", "sien", "sienne", "siens", "siennes", "nôtre", "nôtres", "vôtre", "vôtres", "leur", "leurs"]
     formesPronomsDémonstratifs = ["ce", "c'", "celui", "celle", "ceux", "celles", "ci", "ça", "ceci", "cela", "tel", "telle", "tels", "telles"]
     toutesLesFormes = []
     toutesLesFormes += formesArticlesDéfinis
     toutesLesFormes += formesArticlesIndéfinis
     toutesLesFormes += formesDéterminantsPossessifs
     toutesLesFormes += formesAdjectifsDémonstratifs
     toutesLesFormes += formesPronomsPersonnels
     toutesLesFormes += formesPronomsAdverbiaux
     toutesLesFormes += formesPronomsPossessifs
     toutesLesFormes += formesPronomsDémonstratifs
     /** Fin de la déclaration des formes **/
     def testRules(def positions, def Mention) {
     	def catégorie = null
         def forme = CQI.cpos2Str(word.getQualifiedName(), positions)[0].toLowerCase()
     	if (Mention.length == 1) {
     		     if (Mention.first() == "NAM"    ) catégorie = "GN.NAM"
     		else if (Mention.first() == "DET:POS") catégorie = "DET.POS"
     		else if (Mention.first() == "PRO:PER") {
     			if (formesPronomsAdverbiaux.contains(forme)) catégorie = "PRO.ADV"
     			else catégorie = "PRO.PER"
+    		}
     		else if (Mention.first() == "PRO:DEM") catégorie = "PRO.DEM"
     		else if (Mention.first() == "PRO:IND") catégorie = "PRO.IND"
     		else if (Mention.first() == "PRO:REL") catégorie = "PRO.REL"
     		else if (Mention.first().contains("VER:")) catégorie = "SUJ.ZERO"
     		else if (Mention.first() == "PRO") catégorie = "PRO.INT"
     		// GN indéfinis sans articles
     		else if (Mention.first() == "NOM") catégorie = "GN.IND"
     		else if (Mention.first() == "ADJ") catégorie = "GN.IND"
     		// gestion des erreurs de TreeTagger
     		else if (Mention.first() == "KON") catégorie = "PRO.REL" // Le 'que' dans une mention simple est un relatif
     		else if (Mention.first() == "DET:ART") catégorie = "PRO.PER" // le, les
     		else if (forme == "en") catégorie = "PRO.ADV"
     		else if (Mention.first() == "ADV") catégorie = "ERREUR"  // un adverbe seul n'est jamais référentiel
     		else if (Mention.first() == "PRE") catégorie = "ERREUR"  // une preposition seule n'est jamais référentielle
     		else if (Mention.first() == "ADJ") catégorie = "ERREUR"  // un adjectif seul n'est jamais référentiel
     		else if (Mention.first() == "INT") catégorie = "ERREUR"  // une interjection seule n'est jamais référentielle
     		else catégorie = "PRO.CHECK"
+    	}
     	else if (Mention.length == 2) {
     		     if (Mention.contains("NAM")) catégorie = "GN.NAM"
     		else if (Mention[1] == "PRO:POS") catégorie = "PRO.POS"  // "les miens"
     		else if (Mention[1] == "NUM"    ) catégorie = "PRO.NUM"  // "les deux"
     		else if (Mention[1] == "PRO:DEM") catégorie = "PRO.DEM"  // "Tout cela"
     		else if (Mention[0] == "PRO:IND") catégorie = "GN.IND"   // "Quelques trucs"
     		else if (Mention.contains("PRO:REL")) catégorie = "PRO.REL"
     		else if ((Mention[0].contains("DET")) && (Mention[1] == "PROind")) catégorie = "PRO.IND" // des autres
     		else if (Mention[1].contains("VER:")) catégorie = "SUJ.ZERO"
     		else if (!Mention.contains("NOM") && !Mention.contains("ADJ")) {
     			if (Mention[0] == "PRO:DEM") catégorie = "PRO.DEM"
     			else catégorie = "PRO.CHECK"
+    		}
     		else catégorie = "GN.CHECK"
+    	}
     	if ( (catégorie == null) || (catégorie == "GN.CHECK") ) {
     		// on est dans les GN
     		     if (Mention[0] == "DET:POS" ) catégorie = "GN.POS"
     		else if (Mention[0] == "NUM"     ) catégorie = "GN.NUM"
     		else if (Mention[0] == "PRO:DEM" ) catégorie = "GN.DEM"
     		else if (Mention[0] == "PRP:det" ) catégorie = "GN.DEF"
     		else if (formesArticlesIndéfinis.contains(forme) || (forme == "une")) catégorie = "GN.IND"
     		else if (formesArticlesDéfinis.contains(forme))   catégorie = "GN.DEF"
     		else if (Mention[0] == "PRO:IND" ) catégorie = "GN.IND"
     		else if (Mention[0] == "PRP" ) catégorie = "GN.IND"
     		else if (Mention[0] == "ADJ" ) catégorie = "GN.IND"
     		else if (Mention[0] == "NOM" ) catégorie = "GN.IND"
     		else if (Mention.contains("NAM")) catégorie = "GN.NAM"
     		else catégorie = "TEST"
+    	}
     	return catégorie
+    }
     //
     // FIN DE LA DÉFINITION DES RÈGLES
     //
     // CORPS DU SCRIPT
     if (!(corpusViewSelection instanceof MainCorpus)) {
     	println "Corpora selection is not a Corpus"
     	return
+    }
     // BEGINNING OF PARAMETERS
     @Field @Option(name="unit_type", usage="", widget="String", required=true, def="MENTION")
     def unit_type
     @Field @Option(name="pos_property_name", usage="", widget="String", required=true, def="pos")
     def pos_property_name
     @Field @Option(name="reset", usage="", widget="Boolean", required=true, def="true")
     def reset
     if (!ParametersDialog.open(this)) return
     corpus = corpusViewSelection
     CQI = CQPSearchEngine.getCqiClient()
     word = corpus.getWordProperty()
     posProperty = corpus.getProperty(pos_property_name)
     if (posProperty == null) {
     	println "Error: CQP corpus does not contains the word property with name=$pos_property_name"
     	return
+    }
     analecCorpus = URSCorpora.getCorpus(corpus)
     vue = URSCorpora.getVue(corpus)
     structure = analecCorpus.getStructure()
     if (!structure.getUnites().contains(unit_type)) { // check if the structure contains the unit_type units
     	println "Error: corpus structure does not contains unit with name=$unit_type"
     	return
+    }
     CATEGORIE = "CATEGORIE"
     // Si la structure d'annotation ne contient pas CATEGORIE, on la crée avec ses valeurs
     if (!structure.getUniteProperties(unit_type).contains(CATEGORIE)) {
     // FIXME: dans le script original (see also
     // http://forge.cbp.ens-lyon.fr/redmine/issues/2065), on utilise
     // analecCorpus.ajouterProp/Val, mais cela ne marche pas dans ma version de
     // TXM-Analec --> je retourne donc à structure.ajouterProp/Val
     // la propriété
     	structure.ajouterProp(Unite.class, unit_type, CATEGORIE)
     // les valeurs
     	structure.ajouterVal(Unite.class, unit_type, CATEGORIE, "GN.NAM")
     	structure.ajouterVal(Unite.class, unit_type, CATEGORIE, "GN.DEF")
     	structure.ajouterVal(Unite.class, unit_type, CATEGORIE, "GN.IND")
     	structure.ajouterVal(Unite.class, unit_type, CATEGORIE, "GN.POS")
     	structure.ajouterVal(Unite.class, unit_type, CATEGORIE, "GN.DEM")
     	structure.ajouterVal(Unite.class, unit_type, CATEGORIE, "GN.NUM")
     	structure.ajouterVal(Unite.class, unit_type, CATEGORIE, "GN.CHECK")
     	structure.ajouterVal(Unite.class, unit_type, CATEGORIE, "DET.POS")
     	structure.ajouterVal(Unite.class, unit_type, CATEGORIE, "PRO.PER")
     	structure.ajouterVal(Unite.class, unit_type, CATEGORIE, "PRO.ADV")
     	structure.ajouterVal(Unite.class, unit_type, CATEGORIE, "PRO.IND")
     	structure.ajouterVal(Unite.class, unit_type, CATEGORIE, "PRO.DEM")
     	structure.ajouterVal(Unite.class, unit_type, CATEGORIE, "PRO.POS")
     	structure.ajouterVal(Unite.class, unit_type, CATEGORIE, "PRO.NUM")
     	structure.ajouterVal(Unite.class, unit_type, CATEGORIE, "PRO.INT")
     	structure.ajouterVal(Unite.class, unit_type, CATEGORIE, "PRO.REL")
     	structure.ajouterVal(Unite.class, unit_type, CATEGORIE, "PRO.CHECK")
     	structure.ajouterVal(Unite.class, unit_type, CATEGORIE, "SUJ.ZERO")
     	structure.ajouterVal(Unite.class, unit_type, CATEGORIE, "ERREUR")
+    }
     def nModified = 0
     def nIgnored = 0
     errors = new HashMap()
     def units = analecCorpus.getUnites(unit_type)
     units.sort() { a, b -> a.getDeb() <=> b.getDeb() ?: a.getFin() <=> b.getFin() }
     for (Unite unit : units) { // process all units
     	def prop = unit.getProp(CATEGORIE)
     	if (!reset && prop != null && prop.length() > 0) continue // l'unité a déjà une CATEGORIE
     	int[] positions = null
     	if (unit.getDeb() == unit.getFin()) positions = [unit.getDeb()]
     	else positions = (unit.getDeb()..unit.getFin())
     	def Mention = CQI.cpos2Str(posProperty.getQualifiedName(), positions)
     	def cat = testRules(positions, Mention)
     	if (cat != null) {
     		// following line in the original script but doesn't work for me:
           // vue.setValeurChamp(unit, CATEGORIE, cat)
           unit.getProps().put(CATEGORIE, cat)
     		nModified++
     	} else {
     		nIgnored++
+    	}
+    }
     println "Result:"
     println "- $nModified units of type $unit_type have been modified."
     println "- $nIgnored units of type $unit_type have not been modified.\n"
     if (errors.size() > 0) {
     	println "Some rules should be added to this macro to process the following remaining 'FROPOS / words' values:"
     	errors.keySet().each { println "fropos="+it+"\twords="+errors[it].join(" | ") }
+    }
     // udpate the view (also see also
     // http://forge.cbp.ens-lyon.fr/redmine/issues/2065)
     URSCorpora.getVue(analecCorpus).retablirVueParDefaut()

     package org.txm.macro.urs.democrat
     import org.apache.commons.lang.*
     import org.kohsuke.args4j.*
     import groovy.transform.*
     import org.txm.*
     import org.txm.rcp.swt.widget.parameters.*
     import org.txm.annotation.urs.*
     import org.txm.searchengine.cqp.*
     import org.txm.searchengine.cqp.corpus.*
     import visuAnalec.Message.*
     import visuAnalec.donnees.*
     import visuAnalec.elements.*
     import visuAnalec.vue.*
     /*
        Distingue les Pronoms Personnels Anaphoriques (PRO.PERA) des Pronoms Personnels Déictiques (PRO.PERD)
        PRO.PERD = frlemma(je|me|moi|tu|te|toi|nous|vous)
        On rajoute aussi quelques graphies anciennes que TreeTagger ne connaît pas forcément.
        Inutile de différencier minuscules et majuscules. Le test gère cela très bien.
        Auteur : Matthieu Quignard (ICAR)
        Date : 19/12/2017
      */
     listeLemmesDeictiques = ["je", "me", "moi", "tu", "te", "toi", "nous", "vous"]
     listeFormesDeictiques = ["moy", "toy"]
     // CORPS DU SCRIPT
     if (!(corpusViewSelection instanceof MainCorpus)) {
     	println "Corpora selection is not a Corpus: "+corpusViewSelection
     	return
+    }
     // BEGINNING OF PARAMETERS
     @Field @Option(name="unit_type", usage="", widget="String", required=true, def="MENTION")
     def unit_type
     @Field @Option(name="pos_property_name", usage="", widget="String", required=true, def="frlemma")
     def pos_property_name
     if (!ParametersDialog.open(this)) return
     corpus = corpusViewSelection
     CQI = CQPSearchEngine.getCqiClient()
     word = corpus.getWordProperty()
     posProperty = corpus.getProperty(pos_property_name)
     if (posProperty == null) {
     	println "Error: CQP corpus does not contains the word property with name=$pos_property_name"
     	return
+    }
     analecCorpus = URSCorpora.getCorpus(corpus)
     vue = URSCorpora.getVue(corpus)
     structure = analecCorpus.getStructure()
     if (!structure.getUnites().contains(unit_type)) { // check if the structure contains the unit_type units
     	println "Error: corpus structure does not contains unit with name=$unit_type"
     	return
+    }
     CATEGORIE = "CATEGORIE"
     structure.ajouterVal(Unite.class, unit_type, CATEGORIE, "PRO.PERA")
     structure.ajouterVal(Unite.class, unit_type, CATEGORIE, "PRO.PERD")
     def nModified = 0
     def nIgnored = 0
     def nProPerA = 0
     def nProPerD = 0
     errors = new HashMap()
     def units = analecCorpus.getUnites(unit_type)
     units.sort() { a, b -> a.getDeb() <=> b.getDeb() ?: a.getFin() <=> b.getFin() }
     for (Unite unit : units) { // process all units
     	def prop = unit.getProp(CATEGORIE)
     	if ( (prop == null) || (!prop.contains("PRO.PER")) ) {
     		// On ne s'intéresse qu'aux pronoms personnels (PRO.PER)
     		// Les autres sont ignorés
     		nIgnored++
     	} else {
     		// Pour les mentions de type "PRO.PER", on cherche le premier mot
     		int[] positions = null
     		if (unit.getDeb() == unit.getFin()) positions = [unit.getDeb()]
     		else positions = (unit.getDeb()..unit.getFin())
     		// On récupère le lemme du premier mot
     		def tags = CQI.cpos2Str(posProperty.getQualifiedName(), positions)
     		def firstTag = tags[0];
     		// On récupère la forme du premier mot en minuscule
     		def forme = CQI.cpos2Str(word.getQualifiedName(), positions)[0].toLowerCase()
     		if (listeLemmesDeictiques.contains(firstTag)) {
     			// Si le lemme est dans la liste de déictiques, on recatégorise en PRO.PERD
     			vue.setValeurChamp(unit, CATEGORIE, "PRO.PERD")
     			nProPerD++
     		} else if (listeFormesDeictiques.contains(forme)) {
     			// Si la forme du premier mot dans la liste des formes des déictiques, on recatégorise en PRO.PERD
     			vue.setValeurChamp(unit, CATEGORIE, "PRO.PERD")
     			nProPerD++
     		} else {
     			// Sinon on recatégorise en PRO.PERA
     			vue.setValeurChamp(unit, CATEGORIE, "PRO.PERA")
     			nProPerA++
+    		}
     		nModified++
+    	}
+    }
     println "Result:"
     println "- $nModified units of type $unit_type have been modified."
     println "- $nIgnored units of type $unit_type have not been modified.\n"
     println "- $nProPerA mentions de ProPer anaphoriques."
     println "- $nProPerD mentions de ProPer déictiques."

     // @author Bruno Oberlé (2017-04-01 21:50)
     /*
     Définit la catégorie grammaticale du maillon d'après le champ `frpos'
     (tagset de TreeTagger).  Le script est adapté de
     http://svn.code.sf.net/p/txm/code/trunk/plugins/Analec/AnalecRCP/src/org/txm/macro/analec/Fropos2CategorieMacro.groovy.
     Voici la liste des catégories grammaticales retenues (manuel d'annotation de
     Democrat):
     - GN: Groupe Nominal (le petit chat, le chat, le même, ce chat etc.)
     - POSS: Possessif (mon, ton son, ma, ta, sa, mes, tes, ses, notre, votre,
       leur, nos, vos, leurs)
     - PR: Pronom (moi, toi, lui, elle, nous, vous, eux, elles, le tien, le mien,
       moi-même etc.)
     - PR_CL_O: Pronom Clitique Objet (me, te, le, la, les, lui, leur, y, en)
     - PR_CL_R: Pronom Clitique Réfléchi
     - PR_CL_S: Pronom Clitique Sujet (je, tu, il, elle, on, nous, vous, ils,
       elles)
     - PR_REL: Pronom Relatif (qui, que, quoi, dont, où, lequel, quiconque etc.)
     - PR_WH: Pronom Interrogatif (qui, que, quoi, lequel etc.)
     Le script ne peut pas désambiguïser les pronoms clitiques de même forme
     (`nous' est-il un sujet, un objet ou un réfléchi?).  Dans ce cas, le script
     opte pour le sujet (ou pour l'objet si l'ambiguïté n'est que entre objet et
     réfléchi).
     Quand il n'y a aucune information disponible (erreurs de l'étiqueteur), la
     valeur est UNDEFINED.
     L'algorithme est décrit ici:
     https://groupes.renater.fr/wiki/democrat/prive/txm_annotation_exploitation
     */
     package org.txm.macro.urs.democrat
     import org.apache.commons.lang.*
     import org.kohsuke.args4j.*
     import groovy.transform.*
     import org.txm.*
     import org.txm.rcp.swt.widget.parameters.*
     import org.txm.annotation.urs.*
     import org.txm.searchengine.cqp.*
     import org.txm.searchengine.cqp.corpus.*
     import visuAnalec.Message.*
     import visuAnalec.donnees.*
     import visuAnalec.elements.*
     import visuAnalec.vue.*
     def testClitic(def position, def frpos) {
        // je me sers de la forme, parce qu'il est difficile de savoir quel est le
        // lemme de "elle" ("il"?), de "te" ("tu"?) ou encore de "leur"
        def form = CQI.cpos2Str(word.getQualifiedName(), position)[0].toLowerCase()
        if (     form == "je" || form == "j'"
              || form == "tu" || form == "t'"
              || form == "il"
              || form == "elle"
              || form == "on"
              || form == "vous"
              || form == "nous"
              || form == "ils"
              || form == "elles" ) {
           return "PR_CL_S"
        } else if (form == "me" || form == "m'"
                || form == "te"
                || form == "le" || form == "l'"
                || form == "la"
                || form == "lui"
                || form == "leur"
                || form == "les" ) {
           return "PR_CL_O"
        } else if (form == "se" || form == "s'") {
           return "PR_CL_R"
+       }
        return null
+    }
     def testPhrase(def positions, def Mention) {
        // on doit regarder ce qui apparaît en premier:
        // - ce peut être un nom, comme dans `le petit chat que j'ai adopté'
        // - ce peut être un pronom relatif, comme dans `(le livre) dans lequel
        // j'ai lu cette histoire...'
        // NOTE: dans Democrat, on n'annote pas, bizarrement, la relative dans le
        // maillon, donc, dans un GN on n'a jamais de relatif inclus.  On aura donc
        // toujours `[le petit chat] [que] [j']ai adopté'.  Mais tout le monde
        // n'annote pas de la sorte...
        for (def i=0; i<Mention.length; i++) {
              def mention = Mention[i]
              //def form = CQI.cpos2Str(word.getQualifiedName(), positions[i])[0]
              if (mention == "NOM" || mention == "NAM") {
                 return "GN"
              } else if (mention == "PRO:REL") {
                 return "PR_REL"
+             }
+        }
         return null
+    }
     def testWhPronoun(position, mention) {
        def form = CQI.cpos2Str(word.getQualifiedName(), position)[0]
        if (mention == "PRO" && (form == "qui" || form == "que" || form == "lequel")) {
           return "PR_WH"
+       }
        return null
+    }
     def testRules(def positions, def Mention) {
     	def catégorie = null
        // a possessive (mon, ma...)
        if (Mention.length == 1 && Mention.contains("DET:POS"))
           catégorie = "POSS"
        // a clitic (subject: je, tu...; object: me, te; reflexive: se)
        if (!catégorie && Mention.length == 1 && Mention.contains("PRO:PER"))
           catégorie = testClitic(positions[0], Mention[0])
        // an interrogative pronoun
        if (!catégorie && Mention.length == 1)
           catégorie = testWhPronoun(positions[0], Mention[0])
        // a noun phrase or a relative pronoun
        if (!catégorie)
           catégorie = testPhrase(positions, Mention)
        // some other kind of pronouns
        if (!catégorie
              && (   Mention.contains("PRO")
                  || Mention.contains("PRO:POSS")
                  || Mention.contains("PRO:IND")
                  || Mention.contains("PRO:DEM")
                  || Mention.contains("PRO:PER") )
              && !Mention.contains("NOM")
              && !Mention.contains("NAM") )
           catégorie = "PRO"
     // Fin des règles, aucune n'a matchée. On stocke le pattern  qu'on affichera à la fin.
        if (!catégorie) {
           catégorie = "UNDEFINED" // clear the field
     		def forms = CQI.cpos2Str(word.getQualifiedName(), positions)
     		if (!errors.containsKey(Mention)) errors[Mention] = new HashSet()
     		errors[Mention] << forms
+    	}
     	return catégorie
+    }
     //
     // FIN DE LA DÉFINITION DES RÈGLES
     //
     // CORPS DU SCRIPT
     if (!(corpusViewSelection instanceof MainCorpus)) {
     	println "Corpora selection is not a Corpus"
     	return
+    }
     // BEGINNING OF PARAMETERS
     @Field @Option(name="unit_type", usage="", widget="String", required=true, def="MENTION")
     def unit_type
     @Field @Option(name="pos_property_name", usage="", widget="String", required=true, def="pos")
     def pos_property_name
     @Field @Option(name="reset", usage="", widget="Boolean", required=true, def="true")
     def reset
     if (!ParametersDialog.open(this)) return
     corpus = corpusViewSelection
     CQI = CQPSearchEngine.getCqiClient()
     word = corpus.getWordProperty()
     posProperty = corpus.getProperty(pos_property_name)
     if (posProperty == null) {
     	println "Error: CQP corpus does not contains the word property with name=$pos_property_name"
     	return
+    }
     analecCorpus = URSCorpora.getCorpus(corpus)
     vue = URSCorpora.getVue(corpus)
     structure = analecCorpus.getStructure()
     if (!structure.getUnites().contains(unit_type)) { // check if the structure contains the unit_type units
     	println "Error: corpus structure does not contains unit with name=$unit_type"
     	return
+    }
     CATEGORIE = "CATEGORIE"
     // Si la structure d'annotation ne contient pas CATEGORIE, on la crée avec ses valeurs
     if (!structure.getUniteProperties(unit_type).contains(CATEGORIE)) {
     // FIXME: dans le script original (see also
     // http://forge.cbp.ens-lyon.fr/redmine/issues/2065), on utilise
     // analecCorpus.ajouterProp/Val, mais cela ne marche pas dans ma version de
     // TXM-Analec --> je retourne donc à structure.ajouterProp/Val
     // la propriété
     	structure.ajouterProp(Unite.class, unit_type, CATEGORIE)
     // les valeurs
     	structure.ajouterVal(Unite.class, unit_type, CATEGORIE, "GN")
     	structure.ajouterVal(Unite.class, unit_type, CATEGORIE, "POSS")
     	structure.ajouterVal(Unite.class, unit_type, CATEGORIE, "PRO")
     	structure.ajouterVal(Unite.class, unit_type, CATEGORIE, "PR_CL_O")
     	structure.ajouterVal(Unite.class, unit_type, CATEGORIE, "PR_CL_S")
     	structure.ajouterVal(Unite.class, unit_type, CATEGORIE, "PR_CL_R")
     	structure.ajouterVal(Unite.class, unit_type, CATEGORIE, "PR_REL")
     	structure.ajouterVal(Unite.class, unit_type, CATEGORIE, "PR_WH")
     //...
+    }
     def nModified = 0
     def nIgnored = 0
     errors = new HashMap()
     def units = analecCorpus.getUnites(unit_type)
     units.sort() { a, b -> a.getDeb() <=> b.getDeb() ?: a.getFin() <=> b.getFin() }
     for (Unite unit : units) { // process all units
     	def prop = unit.getProp(CATEGORIE)
     	if (!reset && prop != null && prop.length() > 0) continue // l'unité a déjà une CATEGORIE
     	int[] positions = null
     	if (unit.getDeb() == unit.getFin()) positions = [unit.getDeb()]
     	else positions = (unit.getDeb()..unit.getFin())
     	def Mention = CQI.cpos2Str(posProperty.getQualifiedName(), positions)
     	def cat = testRules(positions, Mention)
     	if (cat != null) {
     		// following line in the original script but doesn't work for me:
           // vue.setValeurChamp(unit, CATEGORIE, cat)
           unit.getProps().put(CATEGORIE, cat)
     		nModified++
     	} else {
     		nIgnored++
+    	}
+    }
     println "Result:"
     println "- $nModified units of type $unit_type have been modified."
     println "- $nIgnored units of type $unit_type have not been modified.\n"
     if (errors.size() > 0) {
     	println "Some rules should be added to this macro to process the following remaining 'FROPOS / words' values:"
     	errors.keySet().each { println "fropos="+it+"\twords="+errors[it].join(" | ") }
+    }
     // udpate the view (also see also
     // http://forge.cbp.ens-lyon.fr/redmine/issues/2065)
     URSCorpora.getVue(analecCorpus).retablirVueParDefaut()

     // Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté
     // Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses)
     // @author mdecorde
     // STANDARD DECLARATIONS
     package org.txm.macro.urs.democrat
     import org.kohsuke.args4j.*
     import groovy.transform.Field
     import org.txm.rcp.swt.widget.parameters.*
     import org.txm.annotation.urs.*
     import org.txm.searchengine.cqp.corpus.*
     import visuAnalec.donnees.Structure;
     import visuAnalec.elements.Schema
     import visuAnalec.elements.Unite;
     import visuAnalec.vue.Vue
     if (!(corpusViewSelection instanceof MainCorpus)) {
     	println "Corpora selection is not a Corpus"
     	return;
+    }
     // BEGINNING OF PARAMETERS
     @Field @Option(name="unit_type",usage="", widget="String", required=true, def="MENTION")
     String unit_type
     @Field @Option(name="ref_property",usage="", widget="String", required=true, def="REF")
     String ref_property
     if (!ParametersDialog.open(this)) return;
     int nCreated = 0 // count the number of created CHAINE
     int nUpdated = 0 // count the number of updated CHAINE
     MainCorpus corpus = corpusViewSelection
     def analecCorpus = URSCorpora.getCorpus(corpus); // analec corpus has the same name has the TXM corpus
     Structure structure = analecCorpus.getStructure()
     if (!structure.getUnites().contains(unit_type)) { // check if the structure contains the unit_type units
     	println "Error: corpus structure does not contains unit with name=$unit_type"
     	return
+    }
     if (!structure.getSchemas().contains("CHAINE")) { // update the structure if needed
     	println "Creating the 'CHAINE' schema in the structure"
     	analecCorpus.ajouterType(Schema.class, "CHAINE")
     	analecCorpus.ajouterProp(Schema.class, "CHAINE", "REF")
     	analecCorpus.ajouterProp(Schema.class, "CHAINE", "GENRE")
     	analecCorpus.ajouterVal(Schema.class, "CHAINE", "GENRE", "INDETERMINABLE")
     	analecCorpus.ajouterVal(Schema.class, "CHAINE", "GENRE", "FEMININ")
     	analecCorpus.ajouterVal(Schema.class, "CHAINE", "GENRE", "MASCULIN")
     	analecCorpus.ajouterProp(Schema.class, "CHAINE", "NOMBRE")
     	analecCorpus.ajouterVal(Schema.class, "CHAINE", "NOMBRE", "GROUPE_FLOU")
     	analecCorpus.ajouterVal(Schema.class, "CHAINE", "NOMBRE", "GROUPE_STRICT")
     	analecCorpus.ajouterVal(Schema.class, "CHAINE", "NOMBRE", "SINGULIER")
     	analecCorpus.ajouterProp(Schema.class, "CHAINE", "NB MAILLONS")
     	analecCorpus.ajouterProp(Schema.class, "CHAINE", "TYPE REFERENT")
     	analecCorpus.ajouterVal(Schema.class, "CHAINE", "TYPE REFERENT", "UNKNOWN")
     	analecCorpus.ajouterVal(Schema.class, "CHAINE", "TYPE REFERENT", "CONCRET_OBJECT")
     	analecCorpus.ajouterVal(Schema.class, "CHAINE", "TYPE REFERENT", "ABSTRACT_OBJECT")
     	analecCorpus.ajouterVal(Schema.class, "CHAINE", "TYPE REFERENT", "TIME")
     	analecCorpus.ajouterVal(Schema.class, "CHAINE", "TYPE REFERENT", "PRODUCT")
     	analecCorpus.ajouterVal(Schema.class, "CHAINE", "TYPE REFERENT", "AMOUNT")
     	analecCorpus.ajouterVal(Schema.class, "CHAINE", "TYPE REFERENT", "EVENT")
     	analecCorpus.ajouterVal(Schema.class, "CHAINE", "TYPE REFERENT", "GPE")
     	analecCorpus.ajouterVal(Schema.class, "CHAINE", "TYPE REFERENT", "ORG")
     	analecCorpus.ajouterVal(Schema.class, "CHAINE", "TYPE REFERENT", "PERSON")
     	analecCorpus.ajouterVal(Schema.class, "CHAINE", "TYPE REFERENT", "LIEU")
+    }
     def props = structure.getUniteProperties(unit_type)
     if (!props.contains(ref_property)) { // check the unit_type units have the REF property
     	println "Error: $unit_type units have no proprerty named '$ref_property'"
     	return
+    }
     // parse the units to build CHAINES
     def chaines = [:]
     def units = analecCorpus.getUnites(unit_type)
     units.sort() { a, b -> a.getDeb() <=> b.getDeb() ?: a.getFin() <=> b.getFin() }
     for (Unite unit : units) {
     	def ref = unit.getProp(ref_property)
     	if (!chaines.containsKey(ref)) {
     		chaines[ref] = []
+    	}
     	chaines[ref] << unit
+    }
     // update the already existing CHAINES schemas
     for (Schema schema : analecCorpus.getSchemas("CHAINE")) {
     	String ref = schema.getProp(ref_property)
     	if (chaines.containsKey(ref)) { // the CHAINE exists
     		// maj des unités de la chaine existante
     		int size_before = schema.getContenu().size()
     		for (def unit : chaines[ref]) schema.ajouter(unit) // insert the new units in the hashset
     		// Update the CHAINE size
     		schema.props.put("NB MAILLONS", Integer.toString(schema.contenu.size()))

Laboratoire ICAR » Plateforme TXM

Révision 2065