/ - Diff - Plateforme TXM - Forge du Centre Blaise Pascal

     package org.txm.macro.urs.edit
     import org.apache.commons.lang.*
     import org.kohsuke.args4j.*
     import groovy.transform.*
     import org.txm.*
     import org.txm.rcp.swt.widget.parameters.*
     import org.txm.annotation.urs.*
     import org.txm.searchengine.cqp.*
     import org.txm.searchengine.cqp.corpus.*
     import visuAnalec.Message.*
     import visuAnalec.donnees.*
     import visuAnalec.elements.*
     import visuAnalec.vue.*
     /*
        Calcule l'accessibilité (faible ou forte) d'une mention à partir de sa CATEGORIE
        La macro fonctionne à partir de 2 listes de catégories : accessibilité forte, accessibilité nulle.
        L'accessibilité faible sera le cas par défaut.
        La macro crée une nouvelle propriété pour les MENTION : ACCESSIBILITE
        Les valeurs autorisées sont : "Faible", "Forte" et "N/A"
        Voir si la dénomination de cette dernière valeur convient.
        Auteur : Matthieu Quignard
        Date : 19/12/2017
      */
     def listeAccessibiliteForte=["PRO.PERA", "PRO.REL", "SUJ.ZERO", "DET.POS"]
     def listeAccessibiliteNulle=["PRO.PERD"]
     // CORPS DU SCRIPT
     if (!(corpusViewSelection instanceof MainCorpus)) {
     	println "Corpora selection is not a Corpus: "+corpusViewSelection
     	return
+    }
     // BEGINNING OF PARAMETERS
     @Field @Option(name="unit_type", usage="", widget="String", required=true, def="MENTION")
     def unit_type
     if (!ParametersDialog.open(this)) return
     corpus = corpusViewSelection
     CQI = CQPSearchEngine.getCqiClient()
     analecCorpus = URSCorpora.getCorpus(corpus)
     vue = URSCorpora.getVue(corpus)
     structure = analecCorpus.getStructure()
     if (!structure.getUnites().contains(unit_type)) { // check if the structure contains the unit_type units
     	println "Error: corpus structure does not contains unit with name=$unit_type"
     	return
+    }
     ACCESSIBILITE = "ACCESSIBILITE"
     // Si la structure d'annotation ne contient pas ACCESSIBILITE, on la crée avec ses valeurs
     if (!structure.getUniteProperties(unit_type).contains(ACCESSIBILITE)) {
     	// la propriété
     	analecCorpus.ajouterProp(Unite.class, unit_type, ACCESSIBILITE)
     	// les valeurs
     	structure.ajouterVal(Unite.class, unit_type, ACCESSIBILITE, "Faible")
     	structure.ajouterVal(Unite.class, unit_type, ACCESSIBILITE, "Forte")
     	structure.ajouterVal(Unite.class, unit_type, ACCESSIBILITE, "N/A")
+    }
     def nModified = 0
     def nIgnored = 0
     def nNulle = 0
     def nForte = 0
     def nFaible = 0
     errors = new HashMap()
     def units = analecCorpus.getUnites(unit_type)
     units.sort() { a, b -> a.getDeb() <=> b.getDeb() ?: a.getFin() <=> b.getFin() }
     for (Unite unit : units) { // process all units
     	def prop = unit.getProp("CATEGORIE")
     	if (prop== null) {
     		// On ignore les cas où la catégorie n'est pas renseignée.
     		nIgnored++
     	} else {
     		nModified++
     		if (listeAccessibiliteNulle.contains(prop)) {
     			// Cas d'accessibilité nulle
     			vue.setValeurChamp(unit, ACCESSIBILITE, "N/A")
     			nNulle++
     		} else if (listeAccessibiliteForte.contains(prop)) {
     			// Cas d'accessibilité forte
     			vue.setValeurChamp(unit, ACCESSIBILITE, "Forte")
     			nForte++
     		} else {
     			// Sinon, on est dans le cas d'accessibilité faible
     			vue.setValeurChamp(unit, ACCESSIBILITE, "Faible")
     			nFaible++
+    		}
+    	}
+    }
     println "Result:"
     println "- $nModified mentions ont été modifiées."
     println "- $nIgnored mentions ont été ignorées (leur catégorie est vide).\n"
     println "- $nForte mentions d'accessibilité forte."
     println "- $nFaible mentions d'accessibilité faible."
     println "- $nNulle mentions de type 'N/A' (déictiques).\n"

     package org.txm.macro.urs.edit
     import org.apache.commons.lang.*
     import org.kohsuke.args4j.*
     import groovy.transform.*
     import org.txm.*
     import org.txm.rcp.swt.widget.parameters.*
     import org.txm.annotation.urs.*
     import org.txm.searchengine.cqp.*
     import org.txm.searchengine.cqp.corpus.*
     import visuAnalec.Message.*
     import visuAnalec.donnees.*
     import visuAnalec.elements.*
     import visuAnalec.vue.*
     /*
     Définit la catégorie grammaticale des mentions d'après le champ `frpos'
     (tagset de TreeTagger).
     La liste des catégories grammaticales est celle de CATTEX2009
     -- Groupes nominaux :
     	GN.NAM (noms propres) : Henri II
     	GN.DEF (définis) : Le roi, du roi
     	GN.IND (indéfinis) : Un roi
     	GN.POS (possessifs) : [Mon roi]
     	GN.DEM (demonstratif) : Ce roi
     	GN.NUM (numéraux) : Deux rois
     	GN.CHECK (GN indéterminés)
     -- Déterminants
     	DET.POS (possessifs) : [mon] roi
     -- Pronoms
     	PRO.PER (personnels) : je, moi, me, on, il, etc.
     	PRO.ADV (adverbiaux) : y, en
     	PRO.IND (indéfinis) : tout, tous, certains, plusieurs, etc.
     	PRO.DEM (demonstratifs) : ceci, cela, ce, ça...
     	PRO.POS (possessifs) : le mien, les nôtres...
     	PRO.NUM (cardinaux, ordinaux) : les deux...
     	PRO.REL (relatifs) : qui, que, quoi, duquel, etc.
     	PRO.INT (interrogatifs)
     	PRO.CHECK (pronoms indéterminés)
     -- SUJ.ZERO (Sujet Zéro) : verbes conjugués, éventuellement pronominal
     -- ERREUR : erreur (a priori) de mention
     -- ADJpos : adjectif possessif à fusionner ou pas avec DETpos
     */
     def testRules(def positions, def Mention) {
     	def catégorie = null
     	// la forme du premier mot de la mention s'appelle 'forme'
     	def forme = CQI.cpos2Str(word.getQualifiedName(), positions)[0].toLowerCase()
     	if (Mention.length == 1) {
     		     if (Mention.first() == "NOMpro"    ) catégorie = "GN.NAM"
     		else if (Mention.first() == "DETpos") catégorie = "DET.POS"
     		else if (Mention.first() == "ADJpos") catégorie = "ADJ.POS"
     		else if (Mention.first() == "PROdem") catégorie = "PRO.DEM"
     		else if (Mention.first() == "PROind") catégorie = "PRO.IND"
     		else if (Mention.first() == "PROcar") catégorie = "PRO.NUM"
     		else if (Mention.first() == "PROord") catégorie = "PRO.NUM" // fusionné avec Cardinaux
     		else if (Mention.first() == "PROpos") catégorie = "PRO.POS"
     		else if (Mention.first() == "PROper") catégorie = "PRO.PER"
     		else if (Mention.first() == "PROimp") catégorie = "PRO.PER" // fusionné avec Pronoms Personnels
     		else if (Mention.first() == "PROint") catégorie = "PRO.INT"
     		else if (Mention.first() == "PROadv") catégorie = "PRO.ADV"
     		else if (Mention.first() == "PROrel") catégorie = "PRO.REL"
     		else if (Mention.first().contains("VER")) catégorie = "SUJ.ZERO"
     		else if (Mention.first() == "NOMcom") catégorie = "GN.CHECK"
     		// Pronoms "contractés"
     		else if (Mention.first() == "PROper.PROper") catégorie = "PRO.PER"  // double pronom personnel : ex 'jel' pour 'je le'
     		else if (Mention.first() == "ADVgen.PROper") catégorie = "PRO.PER"  // adverbe + pronom personnel : ex 'sil' pour 'si le'
     		else if (Mention.first() == "ADVneg.PROper") catégorie = "PRO.PER"  // adverbe + pronom personnel : ex 'nel' pour 'ne le'
     		// Erreurs de mention
     		else if (Mention.first() == "ADVgen") catégorie = "ERREUR"  // un adverbe seul n'est jamais référentiel
     		else if (Mention.first() == "ADVneg") catégorie = "ERREUR"  // un adverbe seul n'est jamais référentiel
     		else if (Mention.first() == "PRE") catégorie = "ERREUR"  // une preposition seule n'est jamais référentielle
     		else if (Mention.first() == "ADJqua") catégorie = "ERREUR"  // un adjectif seul n'est jamais référentiel
     		else if (Mention.first() == "ADJind") catégorie = "ERREUR"  // un adjectif seul n'est jamais référentiel
     		else if (Mention.first() == "INJ") catégorie = "ERREUR"  // une interjection seule n'est jamais référentielle
     		else catégorie = "PRO.CHECK"
     	} else if (Mention.length == 2) {
     		if ( (Mention[0] == "NOMpro") || (Mention[1] == "NOMpro") ) catégorie = "GN.NAM"
     		else if (Mention[1] == "PROrel") catégorie = "PRO.REL"  // "ce que" prioritaire sur "celui là"
     		else if (Mention[1] == "PROpos") catégorie = "PRO.POS"  // "les miens"
     		else if (Mention[1].contains("car")) catégorie = "PRO.NUM"  // "les deux"
     		else if (Mention[1] == "PROdem") catégorie = "PRO.DEM"  // "Tout cela"
     		else if (Mention[0].contains("DET") && Mention[1].contains("PROind")) catégorie = "PRO.IND" // "les autres"
     		else if (Mention[0].contains("VER") && Mention[1].contains("VER")) catégorie = "SUJ.ZERO" // verbe temps composé
     		else if (!Mention[0].contains("NOM") && !Mention[0].contains("ADJ") && !Mention[1].contains("NOM") && !Mention[1].contains("ADJ")) {
     			if (Mention[0] == "PROdem") catégorie = "PRO.DEM"
     			else if (Mention[1] == "VERinf") catégorie = "GN.CHECK" // Verbe substantivé
     			else if (Mention[1] == "VERppa") catégorie = "GN.CHECK" // Verbe substantivé
     			else if ( Mention[0].contains("PRE") && (Mention[1] == "PROper")) catégorie = "GN.CHECK" // Complément de nom
     			else catégorie = "PRO.CHECK"
+    		}
     		else catégorie = "GN.CHECK"
+    	}
     	if ( (catégorie == null) || (catégorie == "GN.CHECK") ) {
     		// on est dans les GN
     		if (Mention[0] == "DETcar"     ) catégorie = "GN.NUM"
     		else if (Mention[0] == "DETord"     ) catégorie = "GN.NUM"
     		else if (Mention.contains("NOMpro")) catégorie = "GN.NAM"
     		else if (Mention[0] == "DETpos" ) catégorie = "GN.POS"
     		else if (Mention[0] == "PROdem" ) catégorie = "GN.DEM"
     		else if (Mention[0] == "DETdem" ) catégorie = "GN.DEM"
     		else if (Mention[0] == "PRE.DETdef" ) catégorie = "GN.DEF"
     		else if (Mention[0] == "DETdef")   catégorie = "GN.DEF"
     		else if (Mention[0] == "DETndf") catégorie = "GN.IND"
     		else if (Mention[0] == "DETind") catégorie = "GN.IND"
     		else if (Mention[0] == "PROind" ) catégorie = "GN.IND"
     		else if (Mention[0].contains("PRP")) catégorie = "GN.IND"
     		else if (Mention[0].contains("ADJ")) catégorie = "GN.IND"
     		else if (Mention[0].contains("NOM")) catégorie = "GN.IND"
     		else catégorie = "GN.CHECK"
+    	}
     	return catégorie
+    }
     //
     // FIN DE LA DÉFINITION DES RÈGLES
     //
     // CORPS DU SCRIPT
     if (!(corpusViewSelection instanceof MainCorpus)) {
     	println "Corpora selection is not a Corpus"
     	return
+    }
     // BEGINNING OF PARAMETERS
     @Field @Option(name="unit_type", usage="", widget="String", required=true, def="MENTION")
     def unit_type
     @Field @Option(name="pos_property_name", usage="", widget="String", required=true, def="pos")
     def pos_property_name
     @Field @Option(name="reset", usage="", widget="Boolean", required=true, def="true")
     def reset
     if (!ParametersDialog.open(this)) return
     corpus = corpusViewSelection
     CQI = CQPSearchEngine.getCqiClient()
     word = corpus.getWordProperty()
     posProperty = corpus.getProperty(pos_property_name)
     if (posProperty == null) {
     	println "Error: CQP corpus does not contains the word property with name=$pos_property_name"
     	return
+    }
     analecCorpus = URSCorpora.getCorpus(corpus)
     vue = URSCorpora.getVue(corpus)
     structure = analecCorpus.getStructure()
     if (!structure.getUnites().contains(unit_type)) { // check if the structure contains the unit_type units
     	println "Error: corpus structure does not contains unit with name=$unit_type"
     	return
+    }
     CATEGORIE = "CATEGORIE"
     // Si la structure d'annotation ne contient pas CATEGORIE, on la crée avec ses valeurs
     if (!structure.getUniteProperties(unit_type).contains(CATEGORIE)) {
     // la propriété
     	analecCorpus.ajouterProp(Unite.class, unit_type, CATEGORIE)
     // les valeurs
     	structure.ajouterVal(Unite.class, unit_type, CATEGORIE, "GN.NAM")
     	structure.ajouterVal(Unite.class, unit_type, CATEGORIE, "GN.DEF")
     	structure.ajouterVal(Unite.class, unit_type, CATEGORIE, "GN.IND")
     	structure.ajouterVal(Unite.class, unit_type, CATEGORIE, "GN.POS")
     	structure.ajouterVal(Unite.class, unit_type, CATEGORIE, "GN.DEM")
     	structure.ajouterVal(Unite.class, unit_type, CATEGORIE, "GN.NUM")
     	structure.ajouterVal(Unite.class, unit_type, CATEGORIE, "GN.CHECK")
     	structure.ajouterVal(Unite.class, unit_type, CATEGORIE, "DET.POS")
     	structure.ajouterVal(Unite.class, unit_type, CATEGORIE, "PRO.PER")
     	structure.ajouterVal(Unite.class, unit_type, CATEGORIE, "PRO.ADV")
     	structure.ajouterVal(Unite.class, unit_type, CATEGORIE, "PRO.IND")
     	structure.ajouterVal(Unite.class, unit_type, CATEGORIE, "PRO.DEM")
     	structure.ajouterVal(Unite.class, unit_type, CATEGORIE, "PRO.POS")
     	structure.ajouterVal(Unite.class, unit_type, CATEGORIE, "PRO.NUM")
     	structure.ajouterVal(Unite.class, unit_type, CATEGORIE, "PRO.INT")
     	structure.ajouterVal(Unite.class, unit_type, CATEGORIE, "PRO.REL")
     	structure.ajouterVal(Unite.class, unit_type, CATEGORIE, "PRO.CHECK")
     	structure.ajouterVal(Unite.class, unit_type, CATEGORIE, "SUJ.ZERO")
     	structure.ajouterVal(Unite.class, unit_type, CATEGORIE, "ERREUR")
     	structure.ajouterVal(Unite.class, unit_type, CATEGORIE, "ADJ.POS")
     //...
+    }
     def nModified = 0
     def nIgnored = 0
     errors = new HashMap()
     def units = analecCorpus.getUnites(unit_type)
     units.sort() { a, b -> a.getDeb() <=> b.getDeb() ?: a.getFin() <=> b.getFin() }
     for (Unite unit : units) { // process all units
     	def prop = unit.getProp(CATEGORIE)
     	if (!reset && prop != null && prop.length() > 0) continue // l'unité a déjà une CATEGORIE
     	int[] positions = null
     	if (unit.getDeb() == unit.getFin()) positions = [unit.getDeb()]
     	else positions = (unit.getDeb()..unit.getFin())
     	def Mention = CQI.cpos2Str(posProperty.getQualifiedName(), positions)
     	def cat = testRules(positions, Mention)
     	if (cat != null) {
     		vue.setValeurChamp(unit, CATEGORIE, cat)
     		nModified++
     	} else {
     		nIgnored++
+    	}
+    }
     println "Result:"
     println "- $nModified units of type $unit_type have been modified."
     println "- $nIgnored units of type $unit_type have not been modified.\n"
     if (errors.size() > 0) {
     	println "Some rules should be added to this macro to process the following remaining 'FROPOS / words' values:"
     	errors.keySet().each { println "fropos="+it+"\twords="+errors[it].join(" | ") }
+    }

     // STANDARD DECLARATIONS
     package org.txm.macro.urs.edit
     import org.apache.commons.lang.*
     import org.kohsuke.args4j.*
     import groovy.transform.*
     import org.txm.*
     import org.txm.rcp.swt.widget.parameters.*
     import org.txm.annotation.urs.*
     import org.txm.searchengine.cqp.*
     import org.txm.searchengine.cqp.corpus.*
     import visuAnalec.Message.*
     import visuAnalec.donnees.*
     import visuAnalec.elements.*
     import visuAnalec.vue.*
     /*  MACRO pour corriger une erreur d'annotation
      Retirer le "De" du complément du nom
      Algo :
      POUR CHAQUE MENTION dont le premier mot est "de" (en minuscules)
      SI     il existe une autre MENTION dans laquelle celle-ci est totalement incluse
      ALORS  incrémenter d'un mot la frontière gauche de la mention
      Ajouter la categorie CDN.CHECK pour qu'on puisse verifier facilement le job.
      */
     // BEGINNING OF PARAMETERS
     if (!(corpusViewSelection instanceof org.txm.searchengine.cqp.corpus.CQPCorpus)) {
     	println "Selection must be a Corpus: "+corpusViewSelection
     	return;
+    }
     // Declare each parameter here
     // BEGINNING OF PARAMETERS
     @Field @Option(name="unit_type", usage="", widget="String", required=true, def="MENTION")
     		def unit_type
     @Field @Option(name="category_name", usage="", widget="String", required=true, def="CATEGORIE")
     		def category_name
     if (!ParametersDialog.open(this)) return
     	corpus = corpusViewSelection.getMainCorpus()
     CQI = CQPSearchEngine.getCqiClient()
     word = corpus.getWordProperty()
     analecCorpus = URSCorpora.getCorpus(corpus)
     vue = URSCorpora.getVue(corpus)
     structure = analecCorpus.getStructure()
     if (!structure.getUnites().contains(unit_type)) { // check if the structure contains the unit_type units
     	println "Error: corpus structure does not contains unit with name=$unit_type"
     	return
+    }
     // Si la structure d'annotation ne contient pas CATEGORIE, on la crée avec ses valeurs
     if (!structure.getUniteProperties(unit_type).contains(category_name)) {
     	structure.ajouterProp(Unite.class, unit_type, category_name)
+    }
     def check_cat = "CDN.CHECK"
     structure.ajouterVal(Unite.class, unit_type, category_name, check_cat)
     def nModified = 0
     def nIgnored1 = 0
     def nIgnored2 = 0
     def compteur = 0
     errors = new HashMap()
     def units = analecCorpus.getUnites(unit_type)
     units.sort() { a, b -> a.getDeb() <=> b.getDeb() ?: a.getFin() <=> b.getFin() }
     /* Test sur la premiere mention :
      def debut1 = units[1].getDeb()
      def fin1 = units[1].getFin()
      println "$debut1 - $fin1"
      units[1].setDeb( debut1 + 1)
      units[1].setFin( fin1 + 1)
      URSCorpora.getVue(analecCorpus).retablirVueParDefaut()
      def debut2 = units[1].getDeb()
      def fin2 = units[1].getFin()
      println "$debut2 - $fin2"
      */
     for (Unite unit : units) { // process all units
     	def debut = unit.getDeb()
     	def fin = unit.getFin()
     	def premierMot = CQI.cpos2Str(word.getQualifiedName(), debut)[0]
     	if (premierMot != "de") {
     		nIgnored1++
     		compteur++
     		continue
     	} else {
     		for (i = compteur-1; i >= 0 ; i--) {
     			def u = units[i]
     			def udeb = u.getDeb()
     			def ufin = u.getFin()
     			if (ufin >= fin) {
     				println "\nAVANT => Unit $compteur : $debut - $fin"
     				if (fin > debut) unit.setDeb( debut++ )
     				else println "not resizing"
     				def debut2 = unit.getDeb()
     				def fin2 = unit.getFin()
     				URSCorpora.getVue(analecCorpus).retablirVueParDefaut()
     				println "APRES => Unit $compteur : $debut2 - $fin2"
     				unit.getProps().put(category_name, check_cat)
     				break
+    			}
+    		}
     		if (i < 0) nIgnored2++
     		else nModified++
     		compteur++
+    	}
+    }
     println "\nResult:"
     println "- $nModified units have been modified."
     println "- $nIgnored1 units have been ignored because not concerned"
     println "- $nIgnored2 units have been ignored because no overlap.\n"
     println "Total ($compteur)."
     // END OF PARAMETERS
     URSCorpora.getVue(analecCorpus).retablirVueParDefaut()
     println "corpora selection: "+corpusViewSelection

     // Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté
     // Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses)
     // @author mdecorde
     // STANDARD DECLARATIONS
     package org.txm.macro.urs.edit
     import org.kohsuke.args4j.*
     import groovy.transform.Field
     import org.txm.rcp.swt.widget.parameters.*
     import org.txm.annotation.urs.*
     import org.txm.searchengine.cqp.corpus.*
     import visuAnalec.donnees.Structure;
     import visuAnalec.elements.Relation;
     import visuAnalec.elements.Schema
     import visuAnalec.elements.Unite;
     import visuAnalec.vue.Vue
     if (!(corpusViewSelection instanceof MainCorpus)) {
     	println "Corpora selection is not a Corpus"
     	return;
+    }
     // BEGINNING OF PARAMETERS
     @Field @Option(name="unit_type",usage="", widget="String", required=true, def="MENTION")
     		String unit_type
     @Field @Option(name="schema_type",usage="", widget="String", required=true, def="CHAINE")
     		String schema_type
     if (!ParametersDialog.open(this)) return;
     int nCreated = 0 // count the number of created RELATION
     MainCorpus corpus = corpusViewSelection
     def analecCorpus = URSCorpora.getCorpus(corpus); // analec corpus has the same name has the TXM corpus
     Structure structure = analecCorpus.getStructure()
     if (!structure.getUnites().contains(unit_type)) { // check if the structure contains the unit_type units
     	println "Error: corpus structure does not contains unit with name=$unit_type"
     	return
+    }
     if (!structure.getSchemas().contains(schema_type)) { // check if the structure contains the unit_type units
     	println "Error: corpus structure does not contains schema with name=$schema_type"
     	return
+    }
     if (!structure.getRelations().contains("ANAPHORE")) { // update the structure if needed
     	println "Creating the 'ANAPHORE' relation in the structure"
     	structure.ajouterType(Relation.class, "ANAPHORE")
     	analecCorpus.ajouterProp(Relation.class, "ANAPHORE", "TYPE")
     	analecCorpus.ajouterVal(Relation.class, "ANAPHORE", "TYPE", "COREFERENTE")
     	analecCorpus.ajouterVal(Relation.class, "ANAPHORE", "TYPE", "ASSOCIATIVE")
+    }
     if (analecCorpus.getRelations("ANAPHORE").size() > 0) {
     	println "Error: This macro can't update existing Relations"
     	return
+    }
     for (Schema schema : analecCorpus.getSchemas(schema_type)) { // parse all CHAINE
     	def units = []
     	for (Unite unit : schema.getUnitesSousjacentes()) { // keep only the 'unit_type' units
     		if (unit.type.equals(unit_type)) units << unit
+    	}
     	units.sort() { a, b -> a.getDeb() <=> b.getDeb() ?: a.getFin() <=> b.getFin() } // sort them
     	for (int i = 0 ; i < units.size() - 1 ; i++) { // build RELATIONS and don't process the last unit
     		println "creating "+units[i+1]+", "+units[i]
     		Relation relation = new Relation("ANAPHORE", units[i+1], units[i])
     		relation.getProps().put("TYPE", "COREFERENTE")
     		analecCorpus.addRelationLue(relation)  // add the new relation
     		nCreated++;
+    	}
+    }
     println "nCreated=$nCreated"

     package org.txm.macro.urs.edit
     import org.apache.commons.lang.*
     import org.kohsuke.args4j.*
     import groovy.transform.*
     import org.txm.*
     import org.txm.rcp.swt.widget.parameters.*
     import org.txm.annotation.urs.*
     import org.txm.searchengine.cqp.*
     import org.txm.searchengine.cqp.corpus.*
     import visuAnalec.Message.*
     import visuAnalec.donnees.*
     import visuAnalec.elements.*
     import visuAnalec.vue.*
     /*
      Calcule la longueur des mentions et attribue une valeur (1, 2, 3 ou plus) dans la propriété "LONGUEUR"
      */
     nLongueur1 = 0
     nLongueur2 = 0
     nLongueur3 = 0
     def testRules(def positions, def Mention) {
     	if (Mention.length == 1) {
     		catégorie = "1"
     		nLongueur1++
+    	}
     	else if (Mention.length == 2) {
     		catégorie = "2"
     		nLongueur2++
+    	}
     	else {
     		catégorie = "3 ou plus"
     		nLongueur3++
+    	}
     	return catégorie
+    }
     //
     // FIN DE LA DÉFINITION DES RÈGLES
     //
     // CORPS DU SCRIPT
     if (!(corpusViewSelection instanceof MainCorpus)) {
     	println "Corpora selection is not a Corpus: "+corpusViewSelection
     	return
+    }
     // BEGINNING OF PARAMETERS
     @Field @Option(name="unit_type", usage="", widget="String", required=true, def="MENTION")
     def unit_type
     @Field @Option(name="pos_property_name", usage="", widget="String", required=true, def="pos")
     def pos_property_name
     @Field @Option(name="reset", usage="", widget="Boolean", required=true, def="true")
     def reset
     if (!ParametersDialog.open(this)) return
     corpus = corpusViewSelection
     CQI = CQPSearchEngine.getCqiClient()
     word = corpus.getWordProperty()
     posProperty = corpus.getProperty(pos_property_name)
     if (posProperty == null) {
     	println "Error: CQP corpus does not contains the word property with name=$pos_property_name"
     	return
+    }
     analecCorpus = URSCorpora.getCorpus(corpus)
     vue = URSCorpora.getVue(corpus)
     structure = analecCorpus.getStructure()
     if (!structure.getUnites().contains(unit_type)) { // check if the structure contains the unit_type units
     	println "Error: corpus structure does not contains unit with name=$unit_type"
     	return
+    }
     LONGUEUR = "LONGUEUR"
     // Si la structure d'annotation ne contient pas LONGUEUR, on la crée avec ses valeurs
     if (!structure.getUniteProperties(unit_type).contains(LONGUEUR)) {
     	// la propriété
     	analecCorpus.ajouterProp(Unite.class, unit_type, LONGUEUR)
     	// les valeurs
     	structure.ajouterVal(Unite.class, unit_type, LONGUEUR, "1")
     	structure.ajouterVal(Unite.class, unit_type, LONGUEUR, "2")
     	structure.ajouterVal(Unite.class, unit_type, LONGUEUR, "3 ou plus")
     	//...
+    }
     def nModified = 0
     def nIgnored = 0
     errors = new HashMap()
     def units = analecCorpus.getUnites(unit_type)
     units.sort() { a, b -> a.getDeb() <=> b.getDeb() ?: a.getFin() <=> b.getFin() }
     for (Unite unit : units) { // process all units
     	def prop = unit.getProp(LONGUEUR)
     	if (!reset && prop != null && prop.length() > 0) continue // l'unité a déjà une LONGUEUR
     		int[] positions = null
     	if (unit.getDeb() == unit.getFin()) positions = [unit.getDeb()]
     	else positions = (unit.getDeb()..unit.getFin())
     	def Mention = CQI.cpos2Str(posProperty.getQualifiedName(), positions)
     	def cat = testRules(positions, Mention)
     	if (cat != null) {
     		vue.setValeurChamp(unit, LONGUEUR, cat)
     		nModified++
     	} else {
     		nIgnored++
+    	}
+    }
     println "Result:"
     println "- $nModified units of type $unit_type have been modified."
     println "- $nIgnored units of type $unit_type have not been modified.\n"
     println "- $nLongueur1 mentions de longueur 1."
     println "- $nLongueur2 mentions de longueur 2."
     println "- $nLongueur3 mentions de longueur 3 ou plus.\n"
     if (errors.size() > 0) {
     	println "Some rules should be added to this macro to process the following remaining 'FROPOS / words' values:"
     	errors.keySet().each { println "fropos="+it+"\twords="+errors[it].join(" | ") }
+    }

     package org.txm.macro.urs.edit
     import org.apache.commons.lang.*
     import org.kohsuke.args4j.*
     import groovy.transform.*
     import org.txm.*
     import org.txm.rcp.swt.widget.parameters.*
     import org.txm.annotation.urs.*
     import org.txm.searchengine.cqp.*
     import org.txm.searchengine.cqp.corpus.*
     import visuAnalec.Message.*
     import visuAnalec.donnees.*
     import visuAnalec.elements.*
     import visuAnalec.vue.*
     def testRules(def positions, def Mention) {
     	def catégorie = null
     // DÉFINITION DES RÈGLES
     // elles sont testées les unes après les autres.
     // Dès qu'une règle est appliquée les suivantes sont ignorées.
     // Règles de type CONTIENT
     	     if (Mention.contains("NOMpro")) catégorie = "Nom Propre"
     	else if (Mention.contains("DETpos")) catégorie = "Dét Possessif"
     //  ...
     // Règles de type COMMENCE ET NE CONTIENT PAS
     	else if (Mention.first() == "DETpos" && !Mention.contains("NOMpro")) catégorie = "GN Possessif"
     	else if (Mention.first() == "DETdem" && !Mention.contains("NOMpro")) catégorie = "GN Démonstratif"
     //  ...
     // Règles de type CONTIENT PLUSIEURS
     	else if (Mention.contains("PROadv") || Mention.contains("ADVgen.PROadv")) catégorie = "Pronom Adverbial"
     //  ...
     // Règles de type CONTIENT ET NE CONTIENT PAS
     	else if (
     			 ( Mention.contains("DETdef") || Mention.contains("PRE.DETdef") )
     			  &&
     			  !Mention.contains("PROpos") && !Mention.contains("NOMpro") && !Mention.contains("PROcar")
     			) catégorie = "GN Défini"
     //  ...
     // Fin des règles, aucune n'a matchée. On stocke le pattern  qu'on affichera à la fin.
     	else {
     		def forms = CQI.cpos2Str(word.getQualifiedName(), positions)
     		if (!errors.containsKey(Mention)) errors[Mention] = new HashSet()
     		errors[Mention] << forms
+    	}
     	return catégorie
+    }
     //
     // FIN DE LA DÉFINITION DES RÈGLES
     //
     // CORPS DU SCRIPT
     if (!(corpusViewSelection instanceof MainCorpus)) {
     	println "Corpora selection is not a Corpus"
     	return
+    }
     // BEGINNING OF PARAMETERS
     @Field @Option(name="unit_type", usage="", widget="String", required=true, def="MENTION")
     def unit_type
     @Field @Option(name="pos_property_name", usage="", widget="String", required=true, def="pos")
     def pos_property_name
     @Field @Option(name="reset", usage="", widget="Boolean", required=true, def="true")
     def reset
     if (!ParametersDialog.open(this)) return
     corpus = corpusViewSelection
     CQI = CQPSearchEngine.getCqiClient()
     word = corpus.getWordProperty()
     posProperty = corpus.getProperty(pos_property_name)
     if (posProperty == null) {
     	println "Error: CQP corpus does not contains the word property with name=$pos_property_name"
     	return
+    }
     analecCorpus = URSCorpora.getCorpus(corpus)
     vue = URSCorpora.getVue(corpus)
     structure = analecCorpus.getStructure()
     if (!structure.getUnites().contains(unit_type)) { // check if the structure contains the unit_type units
     	println "Error: corpus structure does not contains unit with name=$unit_type"
     	return
+    }
     CATEGORIE = "CATEGORIE"
     // Si la structure d'annotation ne contient pas CATEGORIE, on la crée avec ses valeurs
     if (!structure.getUniteProperties(unit_type).contains(CATEGORIE)) {
     // la propriété
     	analecCorpus.ajouterProp(Unite.class, unit_type, CATEGORIE)
     // les valeurs
     	analecCorpus.ajouterVal(Unite.class, unit_type, CATEGORIE, "Nom Propre")
     	analecCorpus.ajouterVal(Unite.class, unit_type, CATEGORIE, "Pronom Impersonnel")
     	analecCorpus.ajouterVal(Unite.class, unit_type, CATEGORIE, "Pronom Interrogatif")
     	analecCorpus.ajouterVal(Unite.class, unit_type, CATEGORIE, "Pronom Pronom cardinal")
     	analecCorpus.ajouterVal(Unite.class, unit_type, CATEGORIE, "Pronom Démonstratif")
     	analecCorpus.ajouterVal(Unite.class, unit_type, CATEGORIE, "Pronom Indéfini")
     	analecCorpus.ajouterVal(Unite.class, unit_type, CATEGORIE, "Pronom ordinal")
     	analecCorpus.ajouterVal(Unite.class, unit_type, CATEGORIE, "Pronom Relatif")
     //...
+    }
     def nModified = 0
     def nIgnored = 0
     errors = new HashMap()
     def units = analecCorpus.getUnites(unit_type)
     units.sort() { a, b -> a.getDeb() <=> b.getDeb() ?: a.getFin() <=> b.getFin() }
     for (Unite unit : units) { // process all units
     	def prop = unit.getProp(CATEGORIE)
     	if (!reset && prop != null && prop.length() > 0) continue // l'unité a déjà une CATEGORIE
     	int[] positions = null
     	if (unit.getDeb() == unit.getFin()) positions = [unit.getDeb()]
     	else positions = (unit.getDeb()..unit.getFin())
     	def Mention = CQI.cpos2Str(posProperty.getQualifiedName(), positions)
     	def cat = testRules(positions, Mention)
     	if (cat != null) {
     		vue.setValeurChamp(unit, CATEGORIE, cat)
     		nModified++
     	} else {
     		nIgnored++
+    	}
+    }
     println "Result:"
     println "- $nModified units of type $unit_type have been modified."
     println "- $nIgnored units of type $unit_type have not been modified.\n"
     if (errors.size() > 0) {
     	println "Some rules should be added to this macro to process the following remaining 'FROPOS / words' values:"
     	errors.keySet().each { println "fropos="+it+"\twords="+errors[it].join(" | ") }
+    }

     // @author Bruno Oberlé (2017-04-01 21:50)
     /*
     Définit la catégorie grammaticale du maillon d'après le champ `frpos'
     (tagset de TreeTagger).  Le script est adapté de
     http://svn.code.sf.net/p/txm/code/trunk/plugins/Analec/AnalecRCP/src/org/txm/macro/analec/Fropos2CategorieMacro.groovy.
     Voici la liste des catégories grammaticales retenues (manuel d'annotation de
     Democrat):
     - GN: Groupe Nominal (le petit chat, le chat, le même, ce chat etc.)
     - POSS: Possessif (mon, ton son, ma, ta, sa, mes, tes, ses, notre, votre,
       leur, nos, vos, leurs)
     - PR: Pronom (moi, toi, lui, elle, nous, vous, eux, elles, le tien, le mien,
       moi-même etc.)
     - PR_CL_O: Pronom Clitique Objet (me, te, le, la, les, lui, leur, y, en)
     - PR_CL_R: Pronom Clitique Réfléchi
     - PR_CL_S: Pronom Clitique Sujet (je, tu, il, elle, on, nous, vous, ils,
       elles)
     - PR_REL: Pronom Relatif (qui, que, quoi, dont, où, lequel, quiconque etc.)
     - PR_WH: Pronom Interrogatif (qui, que, quoi, lequel etc.)
     Le script ne peut pas désambiguïser les pronoms clitiques de même forme
     (`nous' est-il un sujet, un objet ou un réfléchi?).  Dans ce cas, le script
     opte pour le sujet (ou pour l'objet si l'ambiguïté n'est que entre objet et
     réfléchi).
     Quand il n'y a aucune information disponible (erreurs de l'étiqueteur), la
     valeur est UNDEFINED.
     L'algorithme est décrit ici:
     https://groupes.renater.fr/wiki/democrat/prive/txm_annotation_exploitation
     */
     package org.txm.macro.urs.edit
     import org.apache.commons.lang.*
     import org.kohsuke.args4j.*
     import groovy.transform.*
     import org.txm.*
     import org.txm.rcp.swt.widget.parameters.*
     import org.txm.annotation.urs.*
     import org.txm.searchengine.cqp.*
     import org.txm.searchengine.cqp.corpus.*
     import visuAnalec.Message.*
     import visuAnalec.donnees.*
     import visuAnalec.elements.*
     import visuAnalec.vue.*
     def testClitic(def position, def frpos) {
        // je me sers de la forme, parce qu'il est difficile de savoir quel est le
        // lemme de "elle" ("il"?), de "te" ("tu"?) ou encore de "leur"
        def form = CQI.cpos2Str(word.getQualifiedName(), position)[0].toLowerCase()
        if (     form == "je" || form == "j'"
              || form == "tu" || form == "t'"
              || form == "il"
              || form == "elle"
              || form == "on"
              || form == "vous"
              || form == "nous"
              || form == "ils"
              || form == "elles" ) {
           return "PR_CL_S"
        } else if (form == "me" || form == "m'"
                || form == "te"
                || form == "le" || form == "l'"
                || form == "la"
                || form == "lui"
                || form == "leur"
                || form == "les" ) {
           return "PR_CL_O"
        } else if (form == "se" || form == "s'") {
           return "PR_CL_R"
+       }
        return null
+    }
     def testPhrase(def positions, def Mention) {
        // on doit regarder ce qui apparaît en premier:
        // - ce peut être un nom, comme dans `le petit chat que j'ai adopté'
        // - ce peut être un pronom relatif, comme dans `(le livre) dans lequel
        // j'ai lu cette histoire...'
        // NOTE: dans Democrat, on n'annote pas, bizarrement, la relative dans le
        // maillon, donc, dans un GN on n'a jamais de relatif inclus.  On aura donc
        // toujours `[le petit chat] [que] [j']ai adopté'.  Mais tout le monde
        // n'annote pas de la sorte...
        for (def i=0; i<Mention.length; i++) {
              def mention = Mention[i]
              //def form = CQI.cpos2Str(word.getQualifiedName(), positions[i])[0]
              if (mention == "NOM" || mention == "NAM") {
                 return "GN"
              } else if (mention == "PRO:REL") {
                 return "PR_REL"
+             }
+        }
         return null
+    }
     def testWhPronoun(position, mention) {
        def form = CQI.cpos2Str(word.getQualifiedName(), position)[0]
        if (mention == "PRO" && (form == "qui" || form == "que" || form == "lequel")) {
           return "PR_WH"
+       }
        return null
+    }
     def testRules(def positions, def Mention) {
     	def catégorie = null
        // a possessive (mon, ma...)
        if (Mention.length == 1 && Mention.contains("DET:POS"))
           catégorie = "POSS"
        // a clitic (subject: je, tu...; object: me, te; reflexive: se)
        if (!catégorie && Mention.length == 1 && Mention.contains("PRO:PER"))
           catégorie = testClitic(positions[0], Mention[0])
        // an interrogative pronoun
        if (!catégorie && Mention.length == 1)
           catégorie = testWhPronoun(positions[0], Mention[0])
        // a noun phrase or a relative pronoun
        if (!catégorie)
           catégorie = testPhrase(positions, Mention)
        // some other kind of pronouns
        if (!catégorie
              && (   Mention.contains("PRO")
                  || Mention.contains("PRO:POSS")
                  || Mention.contains("PRO:IND")
                  || Mention.contains("PRO:DEM")
                  || Mention.contains("PRO:PER") )
              && !Mention.contains("NOM")
              && !Mention.contains("NAM") )
           catégorie = "PRO"
     // Fin des règles, aucune n'a matchée. On stocke le pattern  qu'on affichera à la fin.
        if (!catégorie) {
           catégorie = "UNDEFINED" // clear the field
     		def forms = CQI.cpos2Str(word.getQualifiedName(), positions)
     		if (!errors.containsKey(Mention)) errors[Mention] = new HashSet()
     		errors[Mention] << forms
+    	}
     	return catégorie
+    }
     //
     // FIN DE LA DÉFINITION DES RÈGLES
     //
     // CORPS DU SCRIPT
     if (!(corpusViewSelection instanceof MainCorpus)) {
     	println "Corpora selection is not a Corpus"
     	return
+    }
     // BEGINNING OF PARAMETERS
     @Field @Option(name="unit_type", usage="", widget="String", required=true, def="MENTION")
     def unit_type
     @Field @Option(name="pos_property_name", usage="", widget="String", required=true, def="pos")
     def pos_property_name
     @Field @Option(name="reset", usage="", widget="Boolean", required=true, def="true")
     def reset
     if (!ParametersDialog.open(this)) return
     corpus = corpusViewSelection
     CQI = CQPSearchEngine.getCqiClient()
     word = corpus.getWordProperty()
     posProperty = corpus.getProperty(pos_property_name)
     if (posProperty == null) {
     	println "Error: CQP corpus does not contains the word property with name=$pos_property_name"
     	return
+    }
     analecCorpus = URSCorpora.getCorpus(corpus)
     vue = URSCorpora.getVue(corpus)
     structure = analecCorpus.getStructure()
     if (!structure.getUnites().contains(unit_type)) { // check if the structure contains the unit_type units
     	println "Error: corpus structure does not contains unit with name=$unit_type"
     	return
+    }
     CATEGORIE = "CATEGORIE"
     // Si la structure d'annotation ne contient pas CATEGORIE, on la crée avec ses valeurs
     if (!structure.getUniteProperties(unit_type).contains(CATEGORIE)) {
     // FIXME: dans le script original (see also
     // http://forge.cbp.ens-lyon.fr/redmine/issues/2065), on utilise
     // analecCorpus.ajouterProp/Val, mais cela ne marche pas dans ma version de
     // TXM-Analec --> je retourne donc à structure.ajouterProp/Val
     // la propriété
     	structure.ajouterProp(Unite.class, unit_type, CATEGORIE)
     // les valeurs
     	structure.ajouterVal(Unite.class, unit_type, CATEGORIE, "GN")
     	structure.ajouterVal(Unite.class, unit_type, CATEGORIE, "POSS")
     	structure.ajouterVal(Unite.class, unit_type, CATEGORIE, "PRO")
     	structure.ajouterVal(Unite.class, unit_type, CATEGORIE, "PR_CL_O")
     	structure.ajouterVal(Unite.class, unit_type, CATEGORIE, "PR_CL_S")
     	structure.ajouterVal(Unite.class, unit_type, CATEGORIE, "PR_CL_R")
     	structure.ajouterVal(Unite.class, unit_type, CATEGORIE, "PR_REL")
     	structure.ajouterVal(Unite.class, unit_type, CATEGORIE, "PR_WH")
     //...
+    }
     def nModified = 0
     def nIgnored = 0
     errors = new HashMap()
     def units = analecCorpus.getUnites(unit_type)
     units.sort() { a, b -> a.getDeb() <=> b.getDeb() ?: a.getFin() <=> b.getFin() }
     for (Unite unit : units) { // process all units
     	def prop = unit.getProp(CATEGORIE)
     	if (!reset && prop != null && prop.length() > 0) continue // l'unité a déjà une CATEGORIE
     	int[] positions = null
     	if (unit.getDeb() == unit.getFin()) positions = [unit.getDeb()]
     	else positions = (unit.getDeb()..unit.getFin())
     	def Mention = CQI.cpos2Str(posProperty.getQualifiedName(), positions)
     	def cat = testRules(positions, Mention)
     	if (cat != null) {
     		// following line in the original script but doesn't work for me:
           // vue.setValeurChamp(unit, CATEGORIE, cat)
           unit.getProps().put(CATEGORIE, cat)
     		nModified++
     	} else {
     		nIgnored++
+    	}
+    }
     println "Result:"
     println "- $nModified units of type $unit_type have been modified."
     println "- $nIgnored units of type $unit_type have not been modified.\n"
     if (errors.size() > 0) {
     	println "Some rules should be added to this macro to process the following remaining 'FROPOS / words' values:"
     	errors.keySet().each { println "fropos="+it+"\twords="+errors[it].join(" | ") }
+    }
     // udpate the view (also see also
     // http://forge.cbp.ens-lyon.fr/redmine/issues/2065)
     URSCorpora.getVue(analecCorpus).retablirVueParDefaut()

     // Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté
     // Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses)
     // @author mdecorde
     // @author sheiden
     // STANDARD DECLARATIONS
     package org.txm.macro.urs.edit
     import org.kohsuke.args4j.*
     import groovy.transform.Field
     import org.txm.rcp.swt.widget.parameters.*
     import org.txm.annotation.urs.*
     import org.txm.searchengine.cqp.corpus.*
     import visuAnalec.donnees.Structure;
     import visuAnalec.elements.Unite;
     import visuAnalec.vue.Vue
     if (!(corpusViewSelection instanceof MainCorpus)) {
     	println "Corpora selection is not a Corpus"
     	return;
+    }
     // BEGINNING OF PARAMETERS
     @Field @Option(name="unit_type",usage="", widget="String", required=true, def="MENTION")
     String unit_type
     @Field @Option(name="reset",usage="", widget="Boolean", required=true, def="true")
     boolean reset
     if (!ParametersDialog.open(this)) return;
     MainCorpus corpus = corpusViewSelection
     def analecCorpus = URSCorpora.getCorpus(corpus);
     Structure structure = analecCorpus.getStructure()
     if (!structure.getUnites().contains(unit_type)) { // check if the structure contains the unit_type units
     	println "Error: corpus structure does not contains unit with name=$unit_type"
     	return
+    }
     def props = structure.getUniteProperties(unit_type)
     String NEW = "NEW"
     String REF = "REF"
     String YES = "YES"
     String NO = "NO"
     if (!props.contains(NEW)) { // update the structure if needed
     	analecCorpus.ajouterProp(Unite.class, unit_type, NEW);
     	analecCorpus.ajouterVal(Unite.class, unit_type, NEW, "YES");
     	analecCorpus.ajouterVal(Unite.class, unit_type, NEW, "NO");
+    }
     if (!props.contains(REF)) { // check the unit_type units have the REF property
     	println "Error: $unit_type units have no proprerty named 'REF'"
     	return
+    }
     int nIgnored = 0; // number of units ignored
     int nYes = 0 // number of "YES" unit set
     int nNo = 0 // number of "NO" unit set
     def allRefs = new HashSet<String>() // store the references already seen, allow to set the 'NEW' property to 'YES' or 'NO'
     def units = analecCorpus.getUnites(unit_type)
     units.sort() { a, b -> a.getDeb() <=> b.getDeb() ?: a.getFin() <=> b.getFin() }
     for (Unite unit : units) {
     	def prop = unit.getProp(NEW);
     	def ref = unit.getProp(REF);
     	if (reset || prop == null || prop.length() == 0) {
     		if (allRefs.contains(ref)) {
     			unit.getProps().put(NEW, NO)
     			nNo++
     		} else { // this is the first MENTION of the CHAINE
     			allRefs << ref
     			unit.getProps().put(NEW, YES)
     			nYes++
+    		}
     	} else {
     		// nothing to do "NEW" already exists
     		nIgnored++
+    	}
+    }
     println "nIgnored=$nIgnored"
     println "nYes=$nYes"
     println "nNo=$nNo"

     // Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté
     // Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses)
     // @author mdecorde
     // @author sheiden
     // STANDARD DECLARATIONS
     package org.txm.macro.urs.edit
     import org.apache.commons.lang.StringUtils;
     import org.kohsuke.args4j.*
     import groovy.transform.Field
     import org.txm.Toolbox;
     import org.txm.rcp.swt.widget.parameters.*
     import org.txm.annotation.urs.*
     import org.txm.searchengine.cqp.AbstractCqiClient;
     import org.txm.searchengine.cqp.corpus.*
     import org.txm.searchengine.cqp.CQPSearchEngine
     import visuAnalec.Message.StructureEvent;
     import visuAnalec.Message.TypeModifStructure;
     import visuAnalec.donnees.Structure;
     import visuAnalec.elements.Unite;
     import visuAnalec.vue.Vue
     if (!(corpusViewSelection instanceof MainCorpus)) {
     	println "Corpora selection is not a Corpus"
     	return;
+    }
     // BEGINNING OF PARAMETERS
     @Field @Option(name="unit_type",usage="", widget="String", required=true, def="MENTION")
     String unit_type
     @Field @Option(name="reset",usage="", widget="Boolean", required=true, def="false")
     boolean reset
     if (!ParametersDialog.open(this)) return;
     MainCorpus corpus = corpusViewSelection
     AbstractCqiClient CQI = CQPSearchEngine.getCqiClient();
     def word = corpus.getWordProperty()
     def analecCorpus = URSCorpora.getCorpus(corpus);
     Structure structure = analecCorpus.getStructure()
     if (!structure.getUnites().contains(unit_type)) { // check if the structure contains the unit_type units
     	println "Error: corpus structure does not contains unit with name=$unit_type"
     	return
+    }
     def props = structure.getUniteProperties(unit_type)
     String DEFINITUDE = "DEFINITUDE"
     if (!props.contains(DEFINITUDE)) { // update the structure if needed
     	analecCorpus.ajouterProp(Unite.class, unit_type, DEFINITUDE);
     	analecCorpus.ajouterVal(Unite.class, unit_type, DEFINITUDE, "DEFINI");
     	analecCorpus.ajouterVal(Unite.class, unit_type, DEFINITUDE, "INDEFINI");
     	analecCorpus.ajouterVal(Unite.class, unit_type, DEFINITUDE, "DEMONSTRATIF");
     	analecCorpus.ajouterVal(Unite.class, unit_type, DEFINITUDE, "AMBIGU");
     	analecCorpus.ajouterVal(Unite.class, unit_type, DEFINITUDE, "NONE");
+    }
     int nIgnored = 0 // number of ignored units
     int nModified = 0 // number of modified units
     int nDefini = 0 // number of "DEFINI" units
     int nIndefini = 0 // number of "InDEFINI" units
     int nDemonstratif = 0 // number of "DEMONSTRATIF" units
     int nAmbigu = 0 // number of "AMBIGU" units
     int nNone = 0 // number of "NONE" units
     def units = analecCorpus.getUnites(unit_type)
     units = units.sort() { a, b -> a.getDeb() <=> b.getDeb() ?: a.getFin() <=> b.getFin() }
     for (Unite unit : units) { // process all units
     	def prop = unit.getProp(DEFINITUDE);
     	int[] pos = null
     	if (unit.getDeb() == unit.getFin()) pos = [unit.getDeb()]
     	else pos = (unit.getDeb()..unit.getFin())
     	def form = StringUtils.join(CQI.cpos2Str(word.getQualifiedName(), pos), " ")
     	if (reset || prop == null || prop.length() == 0 || prop.equals("NONE")) {
     		// petits ajouts à faire ? : |(ses\s.+)|(Ses\s.+)|(son\s.+)|(Son\s.+)|(sa\s.+)|(Sa\s.+)|(leurs?\s.+)|(Leurs?\s.+)|(tous\s.+)|(Tous\s.+)|(toutes\s.+)|(Toutes\s.+)
     		if (form =~ /^(le\s.+)|(Les\s.+)|(Le\s.+)|(la\s.+)|(La\s.+)|(l'.+)|(L'.+)|(les\s.+)|(au\s.+)|(Au\s.+)|(aux\s.+)|(Aux\s.+)|(du\s.+)|(Du\s.+)/) {
     			unit.getProps().put(DEFINITUDE, "DEFINI")
     			nDefini++
     		} else if (form =~ /^(un\s.+)|(une\s.+)|(Un\s.+)|(Une\s.+)|(Chaque\s.+)|(chaque\s.+)|(Certains\s.+)|(Certaines\s.+)|(certains\s.+)|(certaines\s.+)|(aucun\s.+)|(aucune\s.+)|(Aucun\s.+)|(Aucunes\s.+)|(Autre\s.+)|(Autre\s.+)|(autres\s.+)|(autre\s.+)|(quelque\s.+)|(quelques\s.+)|(Quelque\s.+)|(Quelques\s.+)/) {
     			unit.getProps().put(DEFINITUDE, "INDEFINI")
     			nIndefini++
     		} else if (form =~ /^(ce\s.+)|(cette\s.+)|(Cette\s.+)|(cet\s.+)|(ces\s.+)|(Ce\s.+)|(Cet\s.+)|(Ces\s.+)/) {
     			unit.getProps().put(DEFINITUDE, "DEMONSTRATIF")
     			nDemonstratif++
     		} else if (form =~ /^(des\s.+)|(de\s.+)|(Des\s.+)|(De\s.+)/) {
     			unit.getProps().put(DEFINITUDE, "AMBIGU")
     			nAmbigu++
     		} else {
     			unit.getProps().put(DEFINITUDE, "NONE")
     			nNone++;
+    		}
     		nModified++
     	} else {
     		// nothing to do
     		nIgnored++
+    	}
+    }
     println "nIgnored=$nIgnored"
     println "nModified=$nModified"
     println " nDefini=$nDefini"
     println " nIndefini=$nIndefini"
     println " nDemonstratif=$nDemonstratif"
     println " nAmbigu=$nAmbigu"
     println " nNone=$nNone"

     // @author Matthieu Quignard
     // Date : 04 Mai 2017
     /*
     Définit la catégorie grammaticale des mentions d'après le champ `frpos'
     (tagset de TreeTagger).
     La liste des catégories grammaticales est celle de CATTEX2009
     -- Groupes nominaux :
     	GN.NAM (noms propres) : Henri II
     	GN.DEF (définis) : Le roi, du roi
     	GN.IND (indéfinis) : Un roi
     	GN.POS (possessifs) : [Mon roi]
     	GN.DEM (demonstratif) : Ce roi
     	GN.NUM (numéraux) : Deux rois
     	GN.CHECK (GN indéterminés)
     -- Déterminants
     	DET.POS (possessifs) : [mon] roi
     -- Pronoms
     	PRO.PER (personnels) : je, moi, me, on, il, etc.
     	PRO.ADV (adverbiaux) : y, en
     	PRO.IND (indéfinis) : tout, tous, certains, plusieurs, etc.
     	PRO.DEM (demonstratifs) : ceci, cela, ce, ça...
     	PRO.POS (possessifs) : le mien, les nôtres...
     	PRO.NUM (cardinaux, ordinaux) : les deux...
     	PRO.REL (relatifs) : qui, que, quoi, duquel, etc.
     	PRO.INT (interrogatifs)
     	PRO.CHECK (pronoms indéterminés)
     -- Sujet Zéro : verbes conjugués, éventuellement pronominal
     -- ERREUR : erreur (a priori) de mention
     */
     package org.txm.macro.urs.edit
     import org.apache.commons.lang.*
     import org.kohsuke.args4j.*
     import groovy.transform.*
     import org.txm.*
     import org.txm.rcp.swt.widget.parameters.*
     import org.txm.annotation.urs.*
     import org.txm.searchengine.cqp.*
     import org.txm.searchengine.cqp.corpus.*
     import visuAnalec.Message.*
     import visuAnalec.donnees.*
     import visuAnalec.elements.*
     import visuAnalec.vue.*
     // Propriété donnant le lemme
     def lemmaProperty = "frlemma"
     /*
        TODO : TreeTagger fait un mauvais étiquetage des premiers mots de la phrase.
        La majuscule lui fait croire qu'il s'agit d'un nom propre.
        Vérifier que ce mot n'est pas en fait un pronom en testant sa présence dans une des listes de formes particulières
     */
     /**
       * Liste de formes utile à la catégorisation des mentions
       * TreeTagger n'est pas forcément très performant sur des états plus anciens de la langue
     ***/
     formesArticlesDéfinis = ["le", "la", "les", "l'", "au", "aux", "du", "des"]
     formesArticlesIndéfinis = ["un", "une", "des"]
     formesDéterminantsPossessifs = ["ma", "ta", "sa", "mon", "ton", "son", "mes", "tes", "ses", "notre", "votre", "leur", "nos", "vos", "leurs"]
     formesAdjectifsDémonstratifs = ["ce", "cet", "cette", "ces"]
     formesPronomsPersonnels = ["je", "tu", "il", "elle", "on", "nous", "vous", "ils", "elles", "moi", "toi", "eux", "me", "te", "se", "lui", "leur"]
     formesPronomsAdverbiaux = ["en", "y"]
     formesPronomsPossessifs = ["mien", "mienne", "miens", "miennes", "tien", "tienne", "tiens", "tiennes", "sien", "sienne", "siens", "siennes", "nôtre", "nôtres", "vôtre", "vôtres", "leur", "leurs"]
     formesPronomsDémonstratifs = ["ce", "c'", "celui", "celle", "ceux", "celles", "ci", "ça", "ceci", "cela", "tel", "telle", "tels", "telles"]
     toutesLesFormes = []
     toutesLesFormes += formesArticlesDéfinis
     toutesLesFormes += formesArticlesIndéfinis
     toutesLesFormes += formesDéterminantsPossessifs
     toutesLesFormes += formesAdjectifsDémonstratifs
     toutesLesFormes += formesPronomsPersonnels
     toutesLesFormes += formesPronomsAdverbiaux
     toutesLesFormes += formesPronomsPossessifs
     toutesLesFormes += formesPronomsDémonstratifs
     /** Fin de la déclaration des formes **/
     def testRules(def positions, def Mention) {
     	def catégorie = null
         def forme = CQI.cpos2Str(word.getQualifiedName(), positions)[0].toLowerCase()
     	if (Mention.length == 1) {
     		     if (Mention.first() == "NAM"    ) catégorie = "GN.NAM"
     		else if (Mention.first() == "DET:POS") catégorie = "DET.POS"
     		else if (Mention.first() == "PRO:PER") {
     			if (formesPronomsAdverbiaux.contains(forme)) catégorie = "PRO.ADV"
     			else catégorie = "PRO.PER"
+    		}
     		else if (Mention.first() == "PRO:DEM") catégorie = "PRO.DEM"
     		else if (Mention.first() == "PRO:IND") catégorie = "PRO.IND"
     		else if (Mention.first() == "PRO:REL") catégorie = "PRO.REL"
     		else if (Mention.first().contains("VER:")) catégorie = "SUJ.ZERO"
     		else if (Mention.first() == "PRO") catégorie = "PRO.INT"
     		// GN indéfinis sans articles
     		else if (Mention.first() == "NOM") catégorie = "GN.IND"
     		else if (Mention.first() == "ADJ") catégorie = "GN.IND"
     		// gestion des erreurs de TreeTagger
     		else if (Mention.first() == "KON") catégorie = "PRO.REL" // Le 'que' dans une mention simple est un relatif
     		else if (Mention.first() == "DET:ART") catégorie = "PRO.PER" // le, les
     		else if (forme == "en") catégorie = "PRO.ADV"
     		else if (Mention.first() == "ADV") catégorie = "ERREUR"  // un adverbe seul n'est jamais référentiel
     		else if (Mention.first() == "PRE") catégorie = "ERREUR"  // une preposition seule n'est jamais référentielle
     		else if (Mention.first() == "ADJ") catégorie = "ERREUR"  // un adjectif seul n'est jamais référentiel
     		else if (Mention.first() == "INT") catégorie = "ERREUR"  // une interjection seule n'est jamais référentielle
     		else catégorie = "PRO.CHECK"
+    	}
     	else if (Mention.length == 2) {
     		     if (Mention.contains("NAM")) catégorie = "GN.NAM"
     		else if (Mention[1] == "PRO:POS") catégorie = "PRO.POS"  // "les miens"
     		else if (Mention[1] == "NUM"    ) catégorie = "PRO.NUM"  // "les deux"
     		else if (Mention[1] == "PRO:DEM") catégorie = "PRO.DEM"  // "Tout cela"
     		else if (Mention[0] == "PRO:IND") catégorie = "GN.IND"   // "Quelques trucs"
     		else if (Mention.contains("PRO:REL")) catégorie = "PRO.REL"
     		else if ((Mention[0].contains("DET")) && (Mention[1] == "PROind")) catégorie = "PRO.IND" // des autres
     		else if (Mention[1].contains("VER:")) catégorie = "SUJ.ZERO"
     		else if (!Mention.contains("NOM") && !Mention.contains("ADJ")) {
     			if (Mention[0] == "PRO:DEM") catégorie = "PRO.DEM"
     			else catégorie = "PRO.CHECK"
+    		}
     		else catégorie = "GN.CHECK"
+    	}
     	if ( (catégorie == null) || (catégorie == "GN.CHECK") ) {
     		// on est dans les GN
     		     if (Mention[0] == "DET:POS" ) catégorie = "GN.POS"
     		else if (Mention[0] == "NUM"     ) catégorie = "GN.NUM"
     		else if (Mention[0] == "PRO:DEM" ) catégorie = "GN.DEM"
     		else if (Mention[0] == "PRP:det" ) catégorie = "GN.DEF"
     		else if (formesArticlesIndéfinis.contains(forme) || (forme == "une")) catégorie = "GN.IND"
     		else if (formesArticlesDéfinis.contains(forme))   catégorie = "GN.DEF"
     		else if (Mention[0] == "PRO:IND" ) catégorie = "GN.IND"
     		else if (Mention[0] == "PRP" ) catégorie = "GN.IND"
     		else if (Mention[0] == "ADJ" ) catégorie = "GN.IND"
     		else if (Mention[0] == "NOM" ) catégorie = "GN.IND"
     		else if (Mention.contains("NAM")) catégorie = "GN.NAM"
     		else catégorie = "TEST"
+    	}
     	return catégorie
+    }
     //
     // FIN DE LA DÉFINITION DES RÈGLES
     //
     // CORPS DU SCRIPT
     if (!(corpusViewSelection instanceof MainCorpus)) {
     	println "Corpora selection is not a Corpus"
     	return

Laboratoire ICAR » Plateforme TXM

Révision 2066