Révision 2093
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/txt/SearchReplaceInDirectoryMacro.groovy (revision 2093) | ||
---|---|---|
23 | 23 |
// |
24 | 24 |
|
25 | 25 |
// imports |
26 |
|
|
26 | 27 |
import org.kohsuke.args4j.* |
27 | 28 |
import groovy.transform.Field |
28 |
import org.txm.rcp.swt.widget.parameters.* |
|
29 |
import org.txm.rcpapplication.swt.widget.parameters.*
|
|
29 | 30 |
|
30 | 31 |
// parameters |
31 | 32 |
|
32 |
// **change this parameter** |
|
33 |
@Field @Option(name="inputDirectory",usage="Dossier qui contient les fichiers à modifier", widget="Folder", required=true, def='voeux') |
|
34 |
inputDirectory = new File(System.getProperty("user.home"), "Bureau/voeux") |
|
33 |
@Field @Option(name="inputDirectory",usage="Dossier qui contient les fichiers à modifier", widget="Folder", required=true, def='') |
|
34 |
def inputDirectory |
|
35 | 35 |
|
36 |
// **change this parameter** |
|
37 | 36 |
@Field @Option(name="extension",usage="Regexp de l'extension des fichiers à modifier", widget="String", required=true, def='\\.txt') |
38 |
extension = "\\.txt"
|
|
37 |
def extension
|
|
39 | 38 |
|
40 |
// **change this parameter** |
|
41 |
@Field @Option(name="find",usage="Expression régulière", widget="String", required=true, def='’') |
|
42 |
find = "’" |
|
39 |
@Field @Option(name="find",usage="Expression régulière", widget="String", required=true, def='') |
|
40 |
def find |
|
43 | 41 |
|
44 |
// **change this parameter** |
|
45 |
@Field @Option(name="replaceWith",usage="Chaîne de remplacement", widget="String", required=false, def='\'') |
|
46 |
replaceWith = "'" |
|
42 |
@Field @Option(name="replaceWith",usage="Chaîne de remplacement", widget="String", required=false, def='') |
|
43 |
def replaceWith |
|
47 | 44 |
|
48 |
// **change this parameter** |
|
49 | 45 |
@Field @Option(name="encoding",usage="Encodage des fichiers", widget="String", required=true, def='UTF-8') |
50 |
encoding = "utf-8"
|
|
46 |
def encoding
|
|
51 | 47 |
|
52 |
// **change this parameter** |
|
53 |
// 'true' = only display matching lines, 'false' = replace in matching files |
|
54 | 48 |
//@Field @Option(name="showMatchingFilesOnly",usage="Montrer seulement les matchs", widget="Boolean", required=false, def='false') |
55 |
showMatchingFilesOnly = false;
|
|
49 |
def showMatchingFilesOnly = false
|
|
56 | 50 |
|
57 |
if (!ParametersDialog.open(this)) return;
|
|
51 |
if (!ParametersDialog.open(this)) return |
|
58 | 52 |
|
59 |
replaceWith = org.apache.commons.lang.StringEscapeUtils.unescapeJava(replaceWith)
|
|
53 |
println "Working in $inputDirectory on files with extension "+/$extension/
|
|
60 | 54 |
|
61 |
println "Working with $inputDirectory files with extension="+/.*$extension/ |
|
62 | 55 |
if (showMatchingFilesOnly) { |
63 |
println "Search '$find'" |
|
56 |
println "Searching '$find'"
|
|
64 | 57 |
} else { |
65 |
println "Replace '$find' with '$replaceWith'"
|
|
58 |
println "Replacing '$find' by '$replaceWith'"
|
|
66 | 59 |
} |
67 | 60 |
|
61 |
/* parse Java escape characters in replace string |
|
62 |
|
|
63 |
\t Insert a tab in the text at this point. |
|
64 |
\b Insert a backspace in the text at this point. |
|
65 |
\n Insert a newline in the text at this point. |
|
66 |
\r Insert a carriage return in the text at this point. |
|
67 |
\f Insert a formfeed in the text at this point. |
|
68 |
\' Insert a single quote character in the text at this point. |
|
69 |
\" Insert a double quote character in the text at this point. |
|
70 |
\\ Insert a backslash character in the text at this point. |
|
71 |
*/ |
|
72 |
replaceWith = org.apache.commons.lang.StringEscapeUtils.unescapeJava(replaceWith) |
|
73 |
|
|
68 | 74 |
//find = /date="([0-9]+)-([0-9]+-[0-9]+)"/ |
69 | 75 |
// **change this parameter** |
70 | 76 |
//replaceWith = 'date="$1-$2" year="$1"' |
... | ... | |
94 | 100 |
def tmp = File.createTempFile("SearchReplaceInDirectoryTemp", ".tmp", file.getParentFile()) // create temporary file |
95 | 101 |
tmp.write('') // create empty file |
96 | 102 |
tmp.withWriter(encoding) { writer -> |
97 |
String txt = file.getText(encoding); |
|
98 |
writer.println txt.replaceAll(find, replaceWith) |
|
103 |
writer.print file.getText(encoding).replaceAll(find, replaceWith) |
|
99 | 104 |
// file.eachLine(encoding) { line -> // for each line |
100 | 105 |
// writer.println line.replaceAll(find, replaceWith) // find&replace and print |
101 | 106 |
// } |
... | ... | |
103 | 108 |
} |
104 | 109 |
file.delete() |
105 | 110 |
tmp.renameTo(file) // save results |
106 |
} |
|
111 |
} |
|
112 |
|
|
113 |
return 1 |
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/txt/SearchReplaceInFileMacro.groovy (revision 2093) | ||
---|---|---|
23 | 23 |
// |
24 | 24 |
|
25 | 25 |
// imports |
26 |
|
|
26 | 27 |
import org.kohsuke.args4j.* |
27 | 28 |
import groovy.transform.Field |
28 |
import org.txm.rcp.swt.widget.parameters.* |
|
29 |
import org.txm.rcpapplication.swt.widget.parameters.*
|
|
29 | 30 |
|
30 | 31 |
// parameters |
31 | 32 |
|
32 |
// **change this parameter** |
|
33 |
@Field @Option(name="file",usage="Dossier qui contient les fichiers à modifier", widget="File", required=true, def='/home/mdecorde/xml/qgraalc/qgraal_cm-c.xml') |
|
34 |
file = new File(System.getProperty("user.home"), "Bureau/voeux") |
|
33 |
@Field @Option(name="file", usage="Dossier qui contient les fichiers à modifier", widget="File", required=true, def='') |
|
34 |
def file |
|
35 | 35 |
|
36 |
// **change this parameter** |
|
37 |
@Field @Option(name="extension",usage="Regexp de l'extension des fichiers à modifier", widget="String", required=true, def='\\.txt') |
|
38 |
extension = "\\.txt" |
|
36 |
@Field @Option(name="extension", usage="Regexp de l'extension des fichiers à modifier", widget="String", required=true, def='\\.txt') |
|
37 |
def extension |
|
39 | 38 |
|
40 |
// **change this parameter** |
|
41 |
@Field @Option(name="find",usage="Expression régulière", widget="String", required=true, def='’') |
|
42 |
find = "’" |
|
39 |
@Field @Option(name="find", usage="Expression régulière", widget="String", required=true, def='') |
|
40 |
def find |
|
43 | 41 |
|
44 |
// **change this parameter** |
|
45 |
@Field @Option(name="replaceWith",usage="Chaîne de remplacement", widget="String", required=false, def='\'') |
|
46 |
replaceWith = "'" |
|
42 |
@Field @Option(name="replaceWith", usage="Chaîne de remplacement", widget="String", required=true, def='') |
|
43 |
def replaceWith |
|
47 | 44 |
|
48 |
// **change this parameter** |
|
49 |
@Field @Option(name="encoding",usage="Encodage des fichiers", widget="String", required=true, def='UTF-8') |
|
50 |
encoding = "utf-8" |
|
45 |
@Field @Option(name="encoding", usage="Encodage des fichiers", widget="String", required=true, def='UTF-8') |
|
46 |
def encoding |
|
51 | 47 |
|
52 |
if (!ParametersDialog.open(this)) return;
|
|
48 |
if (!ParametersDialog.open(this)) return |
|
53 | 49 |
|
50 |
/* parse Java escape characters in replace string |
|
51 |
|
|
52 |
\t Insert a tab in the text at this point. |
|
53 |
\b Insert a backspace in the text at this point. |
|
54 |
\n Insert a newline in the text at this point. |
|
55 |
\r Insert a carriage return in the text at this point. |
|
56 |
\f Insert a formfeed in the text at this point. |
|
57 |
\' Insert a single quote character in the text at this point. |
|
58 |
\" Insert a double quote character in the text at this point. |
|
59 |
\\ Insert a backslash character in the text at this point. |
|
60 |
*/ |
|
54 | 61 |
replaceWith = org.apache.commons.lang.StringEscapeUtils.unescapeJava(replaceWith) |
55 | 62 |
|
56 | 63 |
println "SearchAndReplace in $file" |
57 | 64 |
|
58 |
def tmp = File.createTempFile("SearchReplaceInDirectoryTemp", ".tmp", file.getParentFile()) // create temporary file |
|
59 |
tmp.write('') // create empty file |
|
60 |
tmp.withWriter(encoding) { writer -> |
|
61 |
String txt = file.getText(encoding); |
|
62 |
writer.println txt.replaceAll(find, replaceWith) |
|
63 |
writer.close() |
|
64 |
} |
|
65 |
file.delete() |
|
66 |
tmp.renameTo(file) // save results |
|
65 |
// create temporary file |
|
66 |
def tmp = File.createTempFile("SearchReplaceInDirectoryTemp", ".tmp", file.getParentFile()) |
|
67 |
|
|
68 |
// create empty file |
|
69 |
tmp.write('') |
|
70 |
|
|
71 |
tmp.withWriter(encoding) { writer -> |
|
72 |
writer.print file.getText(encoding).replaceAll(find, replaceWith) |
|
73 |
writer.close() |
|
74 |
} |
|
75 |
|
|
76 |
// save results |
|
77 |
file.delete() |
|
78 |
tmp.renameTo(file) |
|
79 |
|
|
80 |
return 1 |
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/exploit/StructuresIndexMacro.groovy (revision 2093) | ||
---|---|---|
1 |
package org.txm.macro.urs.exploit |
|
2 |
// Copyright © 2017 ENS de Lyon, CNRS, University of Franche-Comté |
|
3 |
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses) |
|
4 |
// @author sheiden |
|
5 |
|
|
6 |
/* |
|
7 |
Macro affichant les statistiques de différentes structures d'un corpus |
|
8 |
Paramètres de la macro : |
|
9 |
- corpus : le corpus sélectionné dans la vue Corpus |
|
10 |
- structures : liste des structures à interroger. Séparer les noms par une virgule. |
|
11 |
- structProperties : liste des propriétés de structures. Séparer les noms par une virgule. |
|
12 |
Il doit y avoir autant de propriétés de structures que de structures indiquées dans le paramètre structures. |
|
13 |
Les structures doivent posséder la propriété demandée. |
|
14 |
Ce paramètre peut être laissé vide, dans ce cas la colonne 'prop' n'est pas affichée. |
|
15 |
- query : requête CQL de sélection de mots exprimée obligatoirement en format complet : [...] |
|
16 |
Par exemple : |
|
17 |
- [frpos="N.*"] pour sélectionner les noms communs et les noms propres |
|
18 |
- [] pour sélectionner tous les mots |
|
19 |
- wordProperty : propriété de mot utilisée pour calculer le vocabulaire et les fréquences |
|
20 |
- displayIndex : calculer l'index hiérarchique des valeurs de la propriété wordProperty pour la requête query sur chaque structure |
|
21 |
- Vmax : nombre maximum des mots les plus fréquents à afficher dans l'index |
|
22 |
Résultat : |
|
23 |
Le résultat est un tableau TSV affiché dans la console. |
|
24 |
On peut l'exploiter avec un copier/coller dans Calc. |
|
25 |
Chaque ligne correspond à une structure du corpus. |
|
26 |
Les lignes sont ordonnées par ordre hiérarchique des structures du début à la fin du corpus. |
|
27 |
Les colonnes sont : |
|
28 |
- struct : nom de la structure |
|
29 |
- prop : valeur de la propriété de la structure |
|
30 |
(si le paramètre structProperties est vide, cette colonne est absente du résultat) |
|
31 |
- start : position du premier mot de la structure dans le corpus |
|
32 |
(les positions du corpus sont numérotées à partir de 0). |
|
33 |
Les colonnes start et end sont pratiques quand on n'a pas de propriété de structure à afficher pour se repérer dans le corpus. |
|
34 |
- end : position du dernier mot de la structure |
|
35 |
- T : taille de la structure (end-start) |
|
36 |
- t : nombre de mots sélectionnés dans la structure |
|
37 |
- v : nombre de valeurs différentes de la propriété des mots sélectionnés dans la structure |
|
38 |
- fmin : fréquence minimale des valeurs de la propriété de mots sélectionnés dans la structure |
|
39 |
- fmax : fréquence maximale des valeurs de la propriété de mots sélectionnés dans la structure |
|
40 |
- index : l'index hiérarchique des valeurs de la propriété de mot choisie des mots sélectionnés par la requête CQL |
|
41 |
Exemple de résultats sur le texte "Essais sur la peinture" de Diderot : |
|
42 |
struct prop start end T t v fmin fmax index |
|
43 |
text DiderotEssais 46203 56871 10668 2011 903 1 38 [nature, couleur, homme, tableau, lumière, objets, œil, toile, art, effet, corps, artiste, ombre, ombres, deux, peintre, peinture, dessin, couleurs, tête] |
|
44 |
div 0 46214 49223 3009 549 327 1 16 [nature, homme, modèle, figure, deux, école, artiste, chose, âge, figures, dessin, actions, fois, professeur, action, attitude, manière, femme, col, tête] |
|
45 |
p 0 46220 46259 39 5 5 1 1 [nature, forme, cause, êtres, un] |
|
46 |
p 1 46260 46456 196 36 25 1 3 [yeux, col, épaules, gorge, femme, jeunesse, nature, accroissement, orbe, paupières, cavité, absence, organe, sourcils, joues, lèvre, mouvement, altération, parties, visage] |
|
47 |
p 2 46457 46578 121 28 26 1 2 [pieds, nature, regards, homme, dos, poitrine, forme, cartilages, col, vertèbres, tête, mains, articulation, poignet, coudes, arrière, membres, centre, gravité, système] |
|
48 |
p 3 46579 46622 43 5 4 1 2 [causes, effets, êtres, imitation] |
|
49 |
p 4 46623 46727 104 22 20 1 2 [ignorance, règles, effets, causes, convention, suites, peine, artiste, imitation, nature, pieds, jambes, genoux, têtes, tact, observation, phénomènes, liaison, enchaînement, difformités] |
|
50 |
p 5 46728 46797 69 10 6 1 4 [nez, Antinoüs, nature, difformité, altérations, reste] |
|
51 |
p 6 46798 46859 61 9 7 1 2 [règles, nature, homme, rue, chose, statue, proportions] |
|
52 |
p 7 46860 46942 82 13 11 1 2 [extrémité, pied, voile, bossu, Venus, Medicis, nature, figure, crayons, monstre, chose] |
|
53 |
p 8 46943 46982 39 11 11 1 1 [figure, système, suites, inconséquence, principe, production, art, mille, lieues, œuvre, nature] |
|
54 |
p 9 46983 47196 213 38 30 1 5 [homme, figure, âge, fonctions, mystères, art, artiste, proportions, despotisme, nature, condition, sacrifice, cent, manières, organisation, habitude, facilité, grandeur, proportion, membre] |
|
55 |
... [13 paragraphes] ... |
|
56 |
div 1 49224 52163 2939 531 307 1 23 [couleur, nature, chair, artiste, toile, art, homme, yeux, œil, couleurs, tableau, harmonie, effet, dessin, palette, organe, ton, coloriste, vie, ami] |
|
57 |
p 24 49230 49258 28 7 7 1 1 [C', dessin, forme, êtres, couleur, vie, souffle] |
|
58 |
p 25 49259 49284 25 6 6 1 1 [maîtres, art, juges, dessin, monde, couleur] |
|
59 |
p 26 49285 49354 69 16 16 1 1 [dessinateurs, coloristes, littérature, Cent, froids, orateur, Dix, orateurs, poète, intérêt, homme, Helvétius, dix, bons, peine, mort] |
|
60 |
p 27 49355 49485 130 24 21 1 2 [artiste, besoin, échelle, ami, atelier, teintes, demi-, palette, quart, heure, travail, ordre, pendant, passage, auteur, bureau, ligne, livre, place, allure] |
|
61 |
p 28 49486 49680 194 46 42 1 2 [yeux, toile, chaos, œuvre, sentiment, couleur, bouche, palette, image, pinceau, création, oiseaux, nuances, plumage, fleurs, velouté, arbres, verdures, azur, ciel] |
|
62 |
p 29 49681 49967 286 48 43 1 3 [nature, organe, homme, arbre, artistes, chose, monde, variété, coloristes, couleur, disposition, doute, œil, couleurs, tableau, effets, rouges, blancs, tapisserie, murs] |
|
63 |
p 30 49968 50068 100 20 17 1 3 [fois, organe, peintre, ouvrage, littérateur, caractère, disposition, pente, homme, voix, explosion, état, silence, artiste, tableau, couleur, coloris] |
|
64 |
p 31 50069 50105 36 7 7 1 1 [coup, organe, affection, corps, vapeur, nature, imitation] |
|
65 |
p 32 50106 50267 161 26 19 1 4 [couleur, palette, artiste, effet, tableau, teintes, couleurs, idée, endroit, fois, appréciation, scène, composition, manie, travail, teinte, composé, substances, unes] |
|
66 |
p 33 50268 50319 51 7 7 1 1 [général, harmonie, composition, peintre, effet, pinceau, couleur] |
|
67 |
... [etc.] |
|
68 |
Avec les paramètres : |
|
69 |
- structures : text,div,p |
|
70 |
- structProperties : id,n,n |
|
71 |
- query : [frpos="N.*"] |
|
72 |
- wordProperty : word |
|
73 |
- displayIndex : true |
|
74 |
- Vmax : 20 |
|
75 |
*/ |
|
76 |
|
|
77 |
// Déclarations |
|
78 |
|
|
79 |
import org.kohsuke.args4j.* |
|
80 |
|
|
81 |
import groovy.transform.Field |
|
82 |
|
|
83 |
import org.txm.rcp.swt.widget.parameters.* |
|
84 |
import org.txm.Toolbox |
|
85 |
import org.eclipse.ui.console.* |
|
86 |
import org.txm.macro.cqp.* |
|
87 |
import org.txm.searchengine.cqp.CQPSearchEngine |
|
88 |
import org.txm.searchengine.cqp.corpus.CQPCorpus |
|
89 |
import org.txm.searchengine.cqp.corpus.Partition |
|
90 |
import org.txm.searchengine.cqp.corpus.Property |
|
91 |
import org.txm.searchengine.cqp.corpus.QueryResult |
|
92 |
import org.txm.searchengine.cqp.corpus.Subcorpus; |
|
93 |
import org.txm.searchengine.cqp.corpus.query.CQLQuery |
|
94 |
import org.txm.rcp.commands.* |
|
95 |
import org.txm.statsengine.r.core.RWorkspace |
|
96 |
|
|
97 |
byte CQI_CONST_FIELD_MATCH = (byte) 0x10 |
|
98 |
|
|
99 |
def scriptName = this.class.getSimpleName() |
|
100 |
|
|
101 |
def selection = [] |
|
102 |
for (def s : corpusViewSelections) { |
|
103 |
if (s instanceof CQPCorpus) selection << s |
|
104 |
else if (s instanceof Partition) selection.addAll(s.getParts()) |
|
105 |
} |
|
106 |
|
|
107 |
if (selection.size() == 0) { |
|
108 |
println "** $scriptName: please select a Corpus or a Partition in the Corpus view: "+corpusViewSelections |
|
109 |
return false |
|
110 |
} |
|
111 |
println "WORKING WITH $selection" |
|
112 |
// BEGINNING OF PARAMETERS |
|
113 |
|
|
114 |
@Field @Option(name="structures", usage="act,scene", widget="String", required=true, def="text,div,p") |
|
115 |
def structures |
|
116 |
@Field @Option(name="structProperties", usage="n,n", widget="String", required=false, def="id,n,n") |
|
117 |
def structProperties |
|
118 |
@Field @Option(name="query", usage="[word!='\\p{P}']", widget="String", required=true, def="[pos=\"NOM.*\"|frpos=\"N.*\"]") |
|
119 |
def query |
|
120 |
@Field @Option(name="wordProperty", usage="word", widget="String", required=true, def="word") |
|
121 |
def wordProperty |
|
122 |
@Field @Option(name="displayIndex", usage="display a hierarchical index", widget="Boolean", required=true, def="true") |
|
123 |
def displayIndex |
|
124 |
@Field @Option(name="Vmax", usage="size of index", widget="Integer", required=false, def="20") |
|
125 |
def Vmax |
|
126 |
// END OF PARAMETERS |
|
127 |
|
|
128 |
// Open the parameters input dialog box |
|
129 |
if (!ParametersDialog.open(this)) return; |
|
130 |
|
|
131 |
def CQI = CQPSearchEngine.getCqiClient() |
|
132 |
|
|
133 |
def corpusStructs = structures.split(",") // ["act", "scene"] |
|
134 |
structProperties = structProperties.trim() |
|
135 |
|
|
136 |
if (structProperties.size() > 0) { |
|
137 |
propParam = true |
|
138 |
corpusStructPropNames = structProperties.split(",") // ["n", "n"] |
|
139 |
corpusStructProps = [corpusStructs, corpusStructPropNames].transpose().collectEntries() |
|
140 |
} else { |
|
141 |
propParam = false |
|
142 |
} |
|
143 |
|
|
144 |
// First define the order theory over corpus structures intervals |
|
145 |
// by defining a binary comparator that will be used to build the |
|
146 |
// TreeSet of intervals |
|
147 |
|
|
148 |
// function to print the hierarchical index of a query |
|
149 |
def print_index = { c, q, p, cut -> |
|
150 |
|
|
151 |
QueryResult qr = c.query(new CQLQuery(q), "RES1", false); |
|
152 |
Subcorpus subcorpus = c.createSubcorpus("RES1", qr); |
|
153 |
p = subcorpus.getProperty(p) |
|
154 |
def tC = subcorpus.getSize() |
|
155 |
def matches_target_p = CQI.cpos2Str(p.getQualifiedName(), CQI.dumpSubCorpus(qr.getQualifiedCqpId(), CQI_CONST_FIELD_MATCH, 0, tC-1)) |
|
156 |
if (cut > 0) { |
|
157 |
println matches_target_p.countBy { it }.sort { -it.value }.take(cut) |
|
158 |
} else { |
|
159 |
println matches_target_p.countBy { it }.sort { -it.value } |
|
160 |
} |
|
161 |
subcorpus.delete() |
|
162 |
} |
|
163 |
|
|
164 |
// function to print the statistics of an index of a query |
|
165 |
def print_freq = { CQPCorpus c, q, p -> |
|
166 |
|
|
167 |
// appel du moteur |
|
168 |
//println "QUERY=$q" |
|
169 |
QueryResult qr = c.query(new CQLQuery(q), "RES1", false); |
|
170 |
Subcorpus subcorpus = c.createSubcorpus("RES1", qr); |
|
171 |
p = subcorpus.getProperty(p) |
|
172 |
int csize = c.getSize() |
|
173 |
if (csize == 0) { |
|
174 |
if (displayIndex) { |
|
175 |
println "0\t0\t0\t0\t[]" |
|
176 |
} else { |
|
177 |
println "0\t0\t0\t0" |
|
178 |
} |
|
179 |
} else { |
|
180 |
def tC = CQI.subCorpusSize(subcorpus.getQualifiedCqpId()) |
|
181 |
def matches_target_p = CQI.cpos2Id(p.getQualifiedName(), CQI.dumpSubCorpus(subcorpus.getQualifiedCqpId(), CQI_CONST_FIELD_MATCH, 0, tC-1)) |
|
182 |
|
|
183 |
//println "" |
|
184 |
|
|
185 |
// afficher les positions de mots du résultat |
|
186 |
//println CQI.dumpSubCorpus("${c}:RES1", CQI_CONST_FIELD_MATCH, 0, CQI.subCorpusSize("${c}:RES1")-1) |
|
187 |
|
|
188 |
// afficher les codes des occurrences de la propriété du résultat |
|
189 |
//println matches_target_p |
|
190 |
|
|
191 |
// afficher l'index hiérarchique des codes du résultat |
|
192 |
//println matches_target_p.collect { it }.countBy { it }.sort { -it.value } |
|
193 |
|
|
194 |
// calculer la fréquence de chaque valeur et ne garder que les fréquences |
|
195 |
def index = matches_target_p.collect { it }.countBy { it } |
|
196 |
def freqs = index.values() |
|
197 |
|
|
198 |
// afficher la liste décroissante des fréquences du résultat |
|
199 |
//println freqs.sort { -it.value } |
|
200 |
|
|
201 |
|
|
202 |
//def tF = freqs.sum() // control value |
|
203 |
def v = freqs.size() |
|
204 |
def fmin = freqs.min() |
|
205 |
def fmax = freqs.max() |
|
206 |
//println sprintf("t %d, v %d, fmin %d, fmax %d", tC, v, fmin, fmax) |
|
207 |
print sprintf("%d\t%d\t%d\t%d", tC, v, fmin, fmax) |
|
208 |
// afficher les valeurs des occurrences de la propriété du résultat |
|
209 |
if (displayIndex) { |
|
210 |
heads = index.sort { -it.value }.take(Vmax).keySet() |
|
211 |
println "\t"+heads.collect { CQI.id2Str(p.getQualifiedName(), it)[0] } |
|
212 |
} else { |
|
213 |
println "" |
|
214 |
} |
|
215 |
} |
|
216 |
subcorpus.delete() |
|
217 |
} |
|
218 |
|
|
219 |
def r = RWorkspace.getRWorkspaceInstance() |
|
220 |
|
|
221 |
/** |
|
222 |
* group units by CQP match |
|
223 |
* |
|
224 |
* units are sorted for faster processing |
|
225 |
* |
|
226 |
* @param allUnites |
|
227 |
* @param matches |
|
228 |
* @param strict_inclusion |
|
229 |
* @return |
|
230 |
*/ |
|
231 |
static def inter(def allUnites, def matches) { |
|
232 |
//println allUnites.collect() {it -> it[0]} |
|
233 |
allUnites = allUnites.sort() { a, b -> a[0] <=> b[0] ?: a[1] <=> b[1] } |
|
234 |
//println allUnites.collect() {it -> it[0]} |
|
235 |
def unitsSize = allUnites.size() |
|
236 |
def iCurrentUnit = 0 |
|
237 |
def selectedUnits = [] |
|
238 |
|
|
239 |
def matchesSize = matches.size() |
|
240 |
def iCurrentMatch = 0 |
|
241 |
|
|
242 |
|
|
243 |
while (iCurrentMatch < matchesSize && iCurrentUnit < unitsSize) { |
|
244 |
|
|
245 |
def unit = allUnites[iCurrentUnit] |
|
246 |
def match = matches[iCurrentMatch] |
|
247 |
if (unit[1] < match.getStart()) { |
|
248 |
iCurrentUnit++ |
|
249 |
} else if (unit[0] > match.getEnd()) { |
|
250 |
iCurrentMatch++ |
|
251 |
} else { |
|
252 |
|
|
253 |
if (match.getStart() <= unit[0] && unit[1] <= match.getEnd()) { |
|
254 |
selectedUnits << unit |
|
255 |
} |
|
256 |
|
|
257 |
iCurrentUnit++ |
|
258 |
} |
|
259 |
} |
|
260 |
return selectedUnits |
|
261 |
} |
|
262 |
|
|
263 |
selection.each { corpus -> |
|
264 |
|
|
265 |
corpusName = corpus.getID() |
|
266 |
mainCorpusName = corpus.getMainCorpus().getID() |
|
267 |
println "Corpus = "+corpusName |
|
268 |
println "Corpus QualifiedCqpId = "+corpus.getCqpId() |
|
269 |
println "MainCorpus = "+mainCorpusName |
|
270 |
println "Corpus QualifiedCqpId = "+corpus.getMainCorpus().getCqpId() |
|
271 |
|
|
272 |
def struct_names = (CQI.corpusStructuralAttributes(corpus.getMainCorpus().getCqpId()) as List) |
|
273 |
struct_names.removeAll { it.contains('_') } |
|
274 |
struct_names=(struct_names-"txmcorpus").grep(corpusStructs) |
|
275 |
//println "struct_names = "+struct_names |
|
276 |
|
|
277 |
if (struct_names.size() == 0) { |
|
278 |
println "** Impossible to find the structures (${corpusStructs}), aborting." |
|
279 |
return |
|
280 |
} |
|
281 |
|
|
282 |
def level = [:] |
|
283 |
|
|
284 |
// Now build the TreeSet of corpus structures intervals |
|
285 |
|
|
286 |
def h = new TreeSet<Struct>() |
|
287 |
|
|
288 |
struct_names.each { |
|
289 |
def matches = [] |
|
290 |
for (i in 0..CQI.attributeSize("${mainCorpusName}.${it}")-1) { |
|
291 |
(start, end) = CQI.struc2Cpos("${mainCorpusName}.${it}", i) |
|
292 |
matches << [start, end] |
|
293 |
//println sprintf("Adding %s[%d, %d]", it, start, end) |
|
294 |
} |
|
295 |
def intersection = inter(matches, corpus.getMatches()) |
|
296 |
for (def item : intersection) |
|
297 |
h.add(new Struct(it, item[0], item[1])) |
|
298 |
} |
|
299 |
|
|
300 |
if (propParam) { |
|
301 |
print sprintf("struct\tprop\tstart\tend\tT\tt\tv\tfmin\tfmax") |
|
302 |
} else { |
|
303 |
print sprintf("struct\tstart\tend\tT\tt\tv\tfmin\tfmax") |
|
304 |
} |
|
305 |
|
|
306 |
if (displayIndex) { |
|
307 |
println sprintf("\tindex") |
|
308 |
} else { |
|
309 |
println "" |
|
310 |
} |
|
311 |
|
|
312 |
def env = System.getenv() |
|
313 |
def localPath = env["HOME"]+"/Documents/d3test" |
|
314 |
new File(localPath).mkdirs() |
|
315 |
|
|
316 |
// reset output file |
|
317 |
def resultFile = new File(localPath, "desc-partition.html") |
|
318 |
def result = new PrintWriter(resultFile) |
|
319 |
result.print("") |
|
320 |
result.close() |
|
321 |
|
|
322 |
resultFile << '''\ |
|
323 |
<!DOCTYPE html> |
|
324 |
<html> |
|
325 |
<head> |
|
326 |
<meta http-equiv="Content-Type" content="text/html;charset=utf-8" charset="UTF-8"/> |
|
327 |
<link type="text/css" rel="stylesheet" href="style.css"/> |
|
328 |
<script type="text/javascript" src="d3/d3.v3.js" charset="utf-8"></script> |
|
329 |
<script type="text/javascript" src="d3/layout/partition.js" charset="utf-8"></script> |
|
330 |
<style type="text/css"> |
|
331 |
|
|
332 |
.chart { |
|
333 |
display: block; |
|
334 |
margin: auto; |
|
335 |
margin-top: 60px; |
|
336 |
font-size: 11px; |
|
337 |
} |
|
338 |
|
|
339 |
rect { |
|
340 |
stroke: #eee; |
|
341 |
fill: #aaa; |
|
342 |
fill-opacity: .8; |
|
343 |
} |
|
344 |
|
|
345 |
rect.parent { |
|
346 |
cursor: pointer; |
|
347 |
fill: steelblue; |
|
348 |
} |
|
349 |
|
|
350 |
text { |
|
351 |
pointer-events: none; |
|
352 |
} |
|
353 |
|
|
354 |
</style> |
|
355 |
</head> |
|
356 |
<body> |
|
357 |
<div id="body"> |
|
358 |
<div id="footer"> |
|
359 |
Structures hierarchy |
|
360 |
<div class="hint">click or shift-alt-click to zoom-in or out</div> |
|
361 |
</div> |
|
362 |
</div> |
|
363 |
<script type="text/javascript"> |
|
364 |
|
|
365 |
var w = 1120, |
|
366 |
h = 600, |
|
367 |
x = d3.scale.linear().range([0, w]), |
|
368 |
y = d3.scale.linear().range([0, h]); |
|
369 |
|
|
370 |
var vis = d3.select("#body").append("div") |
|
371 |
.attr("class", "chart") |
|
372 |
.style("width", w + "px") |
|
373 |
.style("height", h + "px") |
|
374 |
.append("svg:svg") |
|
375 |
.attr("width", w) |
|
376 |
.attr("height", h); |
|
377 |
|
|
378 |
var partition = d3.layout.partition() |
|
379 |
.value(function(d) { return d.size; }).sort(null); |
|
380 |
|
|
381 |
var tree = `{''' |
|
382 |
|
|
383 |
// Now iterate on the TreeSet to get a depth first search on the structure intervals |
|
384 |
|
|
385 |
def rec_struct_regex = /([^0-9]+)[0-9]+/ |
|
386 |
|
|
387 |
/* |
|
388 |
"name": "sha-hamlet", |
|
389 |
"children": [ |
|
390 |
{ |
|
391 |
"name": "sha-hamcast", |
|
392 |
"children": [ |
|
393 |
{ |
|
394 |
"name": "sha-ham1", |
|
395 |
"children": [ |
|
396 |
{"name": "sha-ham102", "size": 855}, |
|
397 |
{"name": "sha-ham103", "size": 464}, |
|
398 |
{"name": "sha-ham104", "size": 296}, |
|
399 |
{"name": "sha-ham105", "size": 635} |
|
400 |
] |
|
401 |
} |
|
402 |
] |
|
403 |
} |
|
404 |
] |
|
405 |
}`; |
|
406 |
*/ |
|
407 |
|
|
408 |
def displayTree = { head -> |
|
409 |
if (head) { |
|
410 |
subtree = h.tailSet(head) |
|
411 |
subtree.each { print sprintf("%s[%d, %d], ", it.name, it.start, it.end) } |
|
412 |
println "" |
|
413 |
if (subtree.size() == 0) { |
|
414 |
println sprintf("%s[%d, %d]", head.name, head.start, head.end) |
|
415 |
} else { |
|
416 |
displayTree(subtree) |
|
417 |
} |
|
418 |
} |
|
419 |
} |
|
420 |
|
|
421 |
//displayTree(h.first()) |
|
422 |
|
|
423 |
def divPropVals = [] |
|
424 |
def divLengths = [] |
|
425 |
def textDivPropVals = [] |
|
426 |
def textDivLengths = [] |
|
427 |
|
|
428 |
h.each { |
|
429 |
|
|
430 |
//println sprintf("Displaying %s[%d, %d]", it.name, it.start, it.end) |
|
431 |
if (propParam) { |
|
432 |
|
|
433 |
def rec_match = (it.name =~ rec_struct_regex) |
|
434 |
if (rec_match.size() == 1) { |
|
435 |
println "Rec struct match = "+rec_match[0][1] |
|
436 |
istruct_name = rec_match[0][1] |
|
437 |
} else { |
|
438 |
//println "Struct match = "+it.name |
|
439 |
istruct_name = it.name |
|
440 |
} |
|
441 |
|
|
442 |
def struct_name = "${mainCorpusName}.${istruct_name}_${corpusStructProps[it.name]}" |
|
443 |
def propVal = CQI.struc2Str(struct_name, CQI.cpos2Struc(struct_name, [it.start] as int[]))[0] |
|
444 |
if (it.name == "text") { |
|
445 |
textDivPropVals.push(divPropVals) |
|
446 |
divPropVals = [] |
|
447 |
textDivLengths.push(divLengths) |
|
448 |
divLengths = [] |
|
449 |
} else if (it.name == "div") { |
|
450 |
divPropVals.push(propVal) |
|
451 |
divLengths.push(it.end-it.start) |
|
452 |
} |
|
453 |
|
|
454 |
|
|
455 |
print sprintf("%s\t%s\t%d\t%d\t%d\t", it.name, propVal, it.start, it.end, it.end-it.start) |
|
456 |
} else { |
|
457 |
def struct_name = "${mainCorpusName}.${it.name}" |
|
458 |
print sprintf("%s\t%d\t%d\t%d\t", it.name, it.start, it.end, it.end-it.start) |
|
459 |
} |
|
460 |
print_freq(corpus, sprintf("a:%s :: a>=%d & a<=%d", query, it.start, it.end), wordProperty) |
|
461 |
} |
|
462 |
|
|
463 |
textDivPropVals.push(divPropVals) |
|
464 |
textDivPropVals.remove(0) |
|
465 |
textDivLengths.push(divLengths) |
|
466 |
textDivLengths.remove(0) |
|
467 |
|
|
468 |
println textDivPropVals |
|
469 |
println textDivLengths |
|
470 |
|
|
471 |
def textDivPropVals1 = textDivPropVals[0] as String[] |
|
472 |
r.addVectorToWorkspace("textDivPropVals1", textDivPropVals1) |
|
473 |
def textDivLengths1 = textDivLengths[0] as int[] |
|
474 |
r.addVectorToWorkspace("textDivLengths1", textDivLengths1) |
|
475 |
|
|
476 |
def PNGFile = File.createTempFile("txm", ".png", new File(Toolbox.getTxmHomePath(), "results")) |
|
477 |
def PNGFilePath = PNGFile.getAbsolutePath() |
|
478 |
println "PNG file: "+PNGFilePath |
|
479 |
|
|
480 |
def SVGFile = File.createTempFile("txm", ".svg", new File(Toolbox.getTxmHomePath(), "results")) |
|
481 |
def SVGFilePath = SVGFile.getAbsolutePath() |
|
482 |
println "SVG file: "+SVGFilePath |
|
483 |
|
|
484 |
/// BEGINNING OF R SCRIPT |
|
485 |
def script =""" |
|
486 |
df <- data.frame(structure=textDivPropVals1, |
|
487 |
longueur=textDivLengths1) |
|
488 |
p<-ggplot(data=df, aes(x=structure, y=longueur)) + |
|
489 |
geom_bar(stat="identity", fill="steelblue") + |
|
490 |
geom_text(aes(label=longueur), vjust=1.6, color="white", size=3.5) + |
|
491 |
labs(title="${corpusName}", x="Structure div", y = "Longueur") + |
|
492 |
theme_minimal() |
|
493 |
""" |
|
494 |
/// END OF R SCRIPT |
|
495 |
|
|
496 |
// execute R script |
|
497 |
try { |
|
498 |
r.eval("library(ggplot2)") |
|
499 |
try { |
|
500 |
r.eval(script+"ggsave(file=\"${PNGFilePath}\", plot=p)") |
|
501 |
r.eval(script+"ggsave(file=\"${SVGFilePath}\", plot=p)") |
|
502 |
|
|
503 |
//display the SVG results graphic |
|
504 |
monitor.syncExec(new Runnable() { |
|
505 |
@Override |
|
506 |
public void run() { try { OpenSVGGraph.OpenSVGFile(SVGFilePath, "Longueur des structures de "+corpusName) } catch(Exception e) {e.printStackTrace()} } |
|
507 |
}) |
|
508 |
} catch (Exception e) { |
|
509 |
println "** Error: "+e |
|
510 |
} |
|
511 |
} catch (Exception e) { |
|
512 |
println "** The 'ggplot2' R package is not installed. Start R ("+RWorkspace.getExecutablePath()+") and run 'install.packages(\"ggplot2\");'." |
|
513 |
} |
|
514 |
} |
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/exploit/Struct.groovy (revision 2093) | ||
---|---|---|
1 |
package org.txm.macro.urs.exploit; |
|
2 |
|
|
3 |
class Struct implements Comparable<Struct> { |
|
4 |
|
|
5 |
String name |
|
6 |
Integer start |
|
7 |
Integer end |
|
8 |
|
|
9 |
Struct(String n, Integer s, Integer e) { |
|
10 |
name = n |
|
11 |
start = s |
|
12 |
end = e |
|
13 |
} |
|
14 |
|
|
15 |
public int compareTo(Struct s) { |
|
16 |
if (start < s.start && end > s.end) { // self contains s : [ { } ] |
|
17 |
//println sprintf("%s[%d, %d] ^ %s[%d, %d]", name, start, end, s.name, s.start, s.end) |
|
18 |
return -1 |
|
19 |
} else if (start > s.start && end < s.end) { // s contains self : { [ ] } |
|
20 |
//println sprintf("%s[%d, %d] v %s[%d, %d]", name, start, end, s.name, s.start, s.end) |
|
21 |
return 1 |
|
22 |
} else if (start == s.start && end == s.end) { // self and s have the same intervals : [{ }] |
|
23 |
//println sprintf("%s[%d, %d] = %s[%d, %d]", name, start, end, s.name, s.start, s.end) |
|
24 |
return name.compareTo(s.name) // use the lexicographic order of the structure names |
|
25 |
} else if (start < s.start) { // interval starting on the left comes first : [ { ... |
|
26 |
return -1 |
|
27 |
} else if (start > s.start) { // interval starting on the right comes after : { [ ... |
|
28 |
return 1 |
|
29 |
} else if (end > s.end) { // same start, interval ending on the right comes before : [{ } ]... |
|
30 |
return -1 |
|
31 |
} else if (end < s.end) { // same start, interval ending on the right comes before : [{ ] }... |
|
32 |
return -1 |
|
33 |
} else { // same start, same end : [{ ]}... |
|
34 |
return name.compareTo(s.name) // use the lexicographic order of the structure names |
|
35 |
} |
|
36 |
} |
|
37 |
|
|
38 |
public toString(Struct s) { |
|
39 |
sprintf("%s[%d, %d]", s.name, s.start, s.end) |
|
40 |
} |
|
41 |
|
|
42 |
public print(Struct s) { |
|
43 |
print(s.toString()) |
|
44 |
} |
|
45 |
} |
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/exploit/UnitsIndexMacro.groovy (revision 2093) | ||
---|---|---|
94 | 94 |
def word = mainCorpus.getWordProperty() |
95 | 95 |
def analecCorpus = URSCorpora.getCorpus(mainCorpus) |
96 | 96 |
|
97 |
def errors = AnalecUtils.isPropertyDefined(Schema.class, analecCorpus, schema_ursql) |
|
98 |
if (errors.size() > 0) { |
|
99 |
println "** The $schema_ursql schema URSQL cannot be computed in the corpus with types: $errors." |
|
100 |
return; |
|
101 |
} |
|
102 |
|
|
103 |
errors=AnalecUtils.isPropertyDefined(Unite.class, analecCorpus, unit_ursql) |
|
104 |
if (errors.size() > 0) { |
|
105 |
println "** $unit_ursql unit URSQL cannot be computed in the corpus with types: $errors." |
|
106 |
return; |
|
107 |
} |
|
108 |
|
|
97 | 109 |
if (unit_property_display.length() > 0) { |
98 |
def errors = AnalecUtils.isPropertyDefined(Unite.class, analecCorpus, unit_ursql, unit_property_display).size()
|
|
110 |
errors = AnalecUtils.isPropertyDefined(Unite.class, analecCorpus, AnalecUtils.getFilterParameters(unit_ursql)[0], unit_property_display).size()
|
|
99 | 111 |
if (errors > 0) { |
100 | 112 |
println "Error: some Unit types don't contain the $unit_property_display property: $errors" |
101 | 113 |
return |
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/exploit/SchemasProgressionMacro.groovy (revision 2093) | ||
---|---|---|
97 | 97 |
def word = mainCorpus.getWordProperty() |
98 | 98 |
def analecCorpus = URSCorpora.getCorpus(mainCorpus) |
99 | 99 |
|
100 |
def errors = AnalecUtils.isPropertyDefined(Schema.class, analecCorpus, schema_ursql) |
|
101 |
if (errors.size() > 0) { |
|
102 |
println "** The $schema_ursql schema URSQL cannot be computed in the corpus with types: $errors." |
|
103 |
return; |
|
104 |
} |
|
105 |
|
|
100 | 106 |
if (schema_property_display.length() > 0) { |
101 |
def errors = AnalecUtils.isPropertyDefined(Schema.class, analecCorpus, schema_ursql, schema_property_display).size()
|
|
107 |
errors = AnalecUtils.isPropertyDefined(Schema.class, analecCorpus, AnalecUtils.getFilterParameters(schema_ursql)[0], schema_property_display).size()
|
|
102 | 108 |
if (errors > 0) { |
103 | 109 |
println "Error: some Schema types don't contain the $schema_property_display property: $errors" |
104 | 110 |
return |
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/exploit/UnitsListMacro.groovy (revision 2093) | ||
---|---|---|
50 | 50 |
String unit_ursql |
51 | 51 |
@Field @Option(name="limit_distance_in_schema", usage="Unit distance in schema (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0") |
52 | 52 |
int limit_distance_in_schema |
53 |
@Field @Option(name="limit_cql", usage="CQL to build structure limits", widget="Query", required=true, def="<div> [] expand to div")
|
|
53 |
@Field @Option(name="limit_cql", usage="CQL to build structure limits", widget="Query", required=false, def="")
|
|
54 | 54 |
limit_cql |
55 | 55 |
@Field @Option(name="strict_inclusion", usage="Units must be strictly included into corpus matches", widget="Boolean", required=true, def="true") |
56 | 56 |
boolean strict_inclusion |
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/exploit/SchemasListMacro.groovy (revision 2093) | ||
---|---|---|
52 | 52 |
CQPCorpus corpus = corpusViewSelection |
53 | 53 |
def analecCorpus = URSCorpora.getCorpus(corpus) |
54 | 54 |
|
55 |
if (!AnalecUtils.isPropertyDefined(Schema.class, analecCorpus, schema_ursql)) { |
|
56 |
println "** The $schema_ursql schema URSQL cannot be computed in the corpus." |
|
55 |
def errors = AnalecUtils.isPropertyDefined(Schema.class, analecCorpus, schema_ursql) |
|
56 |
if (errors.size() > 0) { |
|
57 |
println "** The $schema_ursql schema URSQL cannot be computed in the corpus with types: $errors." |
|
57 | 58 |
return; |
58 | 59 |
} |
59 | 60 |
|
60 |
if (!AnalecUtils.isPropertyDefined(Unite.class, analecCorpus, unit_ursql)) { |
|
61 |
println "** $unit_ursql unit URSQL cannot be computed in the corpus." |
|
61 |
errors=AnalecUtils.isPropertyDefined(Unite.class, analecCorpus, unit_ursql) |
|
62 |
if (errors.size() > 0) { |
|
63 |
println "** $unit_ursql unit URSQL cannot be computed in the corpus with types: $errors." |
|
62 | 64 |
return; |
63 | 65 |
} |
64 | 66 |
|
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/exploit/UnitsInterDistanceMacro.groovy (revision 2093) | ||
---|---|---|
42 | 42 |
CQPCorpus corpus = corpusViewSelection |
43 | 43 |
def analecCorpus = URSCorpora.getCorpus(corpus) |
44 | 44 |
|
45 |
if (!AnalecUtils.isPropertyDefined(Schema.class, analecCorpus, schema_ursql)) { |
|
46 |
println "** The $schema_ursql schema URSQL cannot be computed in the corpus." |
|
45 |
def errors = AnalecUtils.isPropertyDefined(Schema.class, analecCorpus, schema_ursql) |
|
46 |
if (errors.size() > 0) { |
|
47 |
println "** The $schema_ursql schema URSQL cannot be computed in the corpus with types: $errors." |
|
47 | 48 |
return; |
48 | 49 |
} |
49 | 50 |
|
50 |
if (!AnalecUtils.isPropertyDefined(Unite.class, analecCorpus, unit_ursql)) { |
|
51 |
println "** $unit_ursql unit URSQL cannot be computed in the corpus." |
|
51 |
errors=AnalecUtils.isPropertyDefined(Unite.class, analecCorpus, unit_ursql) |
|
52 |
if (errors.size() > 0) { |
|
53 |
println "** $unit_ursql unit URSQL cannot be computed in the corpus with types: $errors." |
|
52 | 54 |
return; |
53 | 55 |
} |
54 | 56 |
|
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/exploit/UnitsStabilityScoreMacro.groovy (revision 2093) | ||
---|---|---|
46 | 46 |
def corpus = corpusViewSelection |
47 | 47 |
def analecCorpus = URSCorpora.getCorpus(corpus) |
48 | 48 |
|
49 |
if (!AnalecUtils.isPropertyDefined(Schema.class, analecCorpus, schema_ursql)) { |
|
50 |
println "** The $schema_ursql schema URSQL cannot be computed in the corpus." |
|
49 |
def errors = AnalecUtils.isPropertyDefined(Schema.class, analecCorpus, schema_ursql) |
|
50 |
if (errors.size() > 0) { |
|
51 |
println "** The $schema_ursql schema URSQL cannot be computed in the corpus with types: $errors." |
|
51 | 52 |
return; |
52 | 53 |
} |
53 | 54 |
|
54 |
if (!AnalecUtils.isPropertyDefined(Unite.class, analecCorpus, unit_ursql)) { |
|
55 |
println "** $unit_ursql unit URSQL cannot be computed in the corpus." |
|
55 |
errors=AnalecUtils.isPropertyDefined(Unite.class, analecCorpus, unit_ursql) |
|
56 |
if (errors.size() > 0) { |
|
57 |
println "** $unit_ursql unit URSQL cannot be computed in the corpus with types: $errors." |
|
56 | 58 |
return; |
57 | 59 |
} |
58 |
|
|
59 | 60 |
def CQI = CQPSearchEngine.getCqiClient() |
60 | 61 |
|
61 | 62 |
def prop = corpus.getProperty(word_property) |
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/exploit/SchemaLengthsMacro.groovy (revision 2093) | ||
---|---|---|
43 | 43 |
def corpus = corpusViewSelection |
44 | 44 |
def analecCorpus = URSCorpora.getCorpus(corpus) |
45 | 45 |
|
46 |
if (!AnalecUtils.isPropertyDefined(Schema.class, analecCorpus, schema_ursql)) { |
|
47 |
println "** The $schema_ursql schema URSQL cannot be computed in the corpus." |
|
46 |
def errors = AnalecUtils.isPropertyDefined(Schema.class, analecCorpus, schema_ursql) |
|
47 |
if (errors.size() > 0) { |
|
48 |
println "** The $schema_ursql schema URSQL cannot be computed in the corpus with types: $errors." |
|
48 | 49 |
return; |
49 | 50 |
} |
50 | 51 |
|
51 |
if (!AnalecUtils.isPropertyDefined(Unite.class, analecCorpus, unit_ursql)) { |
|
52 |
println "** $unit_ursql unit URSQL cannot be computed in the corpus." |
|
52 |
errors=AnalecUtils.isPropertyDefined(Unite.class, analecCorpus, unit_ursql) |
|
53 |
if (errors.size() > 0) { |
|
54 |
println "** $unit_ursql unit URSQL cannot be computed in the corpus with types: $errors." |
|
53 | 55 |
return; |
54 | 56 |
} |
55 | 57 |
|
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/edit/ResetAnnotationsMacro.groovy (revision 2093) | ||
---|---|---|
1 |
// STANDARD DECLARATIONS |
|
2 |
package org.txm.macro.urs.edit |
|
3 |
|
|
4 |
import org.kohsuke.args4j.* |
|
5 |
import groovy.transform.Field |
|
6 |
import org.txm.rcp.swt.widget.parameters.* |
|
7 |
import org.txm.annotation.urs.* |
|
8 |
import org.txm.searchengine.cqp.corpus.* |
|
9 |
import visuAnalec.elements.* |
|
10 |
|
|
11 |
if (!(corpusViewSelection instanceof MainCorpus)) { |
|
12 |
println "Corpora selection is not a Corpus" |
|
13 |
return; |
|
14 |
} |
|
15 |
|
|
16 |
@Field @Option(name="I_AM_SURE_IWANT_TO_RESET_THE_ANNOTATIONS", usage="an example boolean", widget="Boolean", required=false, def="false") |
|
17 |
def I_AM_SURE_IWANT_TO_RESET_THE_ANNOTATIONS |
|
18 |
|
|
19 |
// Open the parameters input dialog box |
|
20 |
if (!ParametersDialog.open(this)) return; |
|
21 |
|
|
22 |
MainCorpus corpus = corpusViewSelection |
|
23 |
def analecCorpus = URSCorpora.getCorpus(corpus); |
|
24 |
def structure = analecCorpus.getStructure() |
|
25 |
|
|
26 |
Class[] classes = [Unite.class, Relation.class, Schema.class] |
|
27 |
println "Removing unites..." |
|
28 |
for (String type : structure.getTypes(Unite.class)) { |
|
29 |
for (Unite unite : analecCorpus.getUnites(type).toArray(new Unite[0])) { |
|
30 |
analecCorpus.supUnite(unite) |
|
31 |
} |
|
32 |
// fireMessage(new StructureEvent(analecCorpus, TypeModifStructure.SUPPR_TYPE, Unite.class, type)); |
|
33 |
} |
|
34 |
|
|
35 |
println "Removing relations..." |
|
36 |
for (String type : structure.getTypes(Relation.class)) { |
|
37 |
for (Relation relation : analecCorpus.getRelations(type).toArray(new Relation[0])) { |
|
38 |
analecCorpus.supRelation(relation) |
|
39 |
} |
|
40 |
// fireMessage(new StructureEvent(analecCorpus, TypeModifStructure.SUPPR_TYPE, Relation.class, type)); |
|
41 |
} |
|
42 |
|
|
43 |
println "Removing schemas..." |
|
44 |
for (String type : structure.getTypes(Schema.class)) { |
|
45 |
for (Schema schema : analecCorpus.getSchemas(type).toArray(new Schema[0])) { |
|
46 |
analecCorpus.supSchema(schema) |
|
47 |
} |
|
48 |
// fireMessage(new StructureEvent(analecCorpus, TypeModifStructure.SUPPR_TYPE, Schema.class, type)); |
|
49 |
} |
|
50 |
|
|
51 |
println "Done. Save the corpus to finish the reset." |
|
52 |
|
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/edit/UnitsDeleteMacro.groovy (revision 2093) | ||
---|---|---|
1 |
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté |
|
2 |
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses) |
|
3 |
// @author mdecorde |
|
4 |
// @author sheiden |
|
5 |
// STANDARD DECLARATIONS |
|
6 |
package org.txm.macro.urs.edit |
|
7 |
|
|
8 |
import org.apache.commons.lang.StringUtils |
|
9 |
import org.kohsuke.args4j.* |
|
10 |
|
|
11 |
import groovy.transform.Field |
|
12 |
|
|
13 |
import org.txm.Toolbox |
|
14 |
import org.txm.rcp.swt.widget.parameters.* |
|
15 |
import org.txm.annotation.urs.* |
|
16 |
import org.txm.macro.urs.AnalecUtils |
|
17 |
import org.txm.searchengine.cqp.AbstractCqiClient |
|
18 |
import org.txm.searchengine.cqp.corpus.* |
|
19 |
import org.txm.searchengine.cqp.corpus.query.CQLQuery |
|
20 |
|
|
21 |
import visuAnalec.donnees.Corpus |
|
22 |
import visuAnalec.donnees.Structure |
|
23 |
import visuAnalec.elements.* |
|
24 |
import visuAnalec.vue.Vue |
|
25 |
|
|
26 |
def scriptName = this.class.getSimpleName() |
|
27 |
|
|
28 |
def selection = [] |
|
29 |
for (def s : corpusViewSelections) { |
|
30 |
if (s instanceof CQPCorpus) selection << s |
|
31 |
else if (s instanceof Partition) selection.addAll(s.getParts()) |
|
32 |
} |
|
33 |
|
|
34 |
if (selection.size() == 0) { |
|
35 |
println "** $scriptName: please select a Corpus or a Partition in the Corpus view: "+corpusViewSelections |
|
36 |
return false |
|
37 |
} |
|
38 |
|
|
39 |
// BEGINNING OF PARAMETERS |
|
40 |
@Field @Option(name="schema_ursql", usage="TYPE@PROP=VALUE", widget="String", required=true, def="CHAINE") |
|
41 |
String schema_ursql |
|
42 |
@Field @Option(name="minimum_schema_size", usage="Minimum size needed to consider a schema", widget="Integer", required=true, def="3") |
|
43 |
int minimum_schema_size |
|
44 |
@Field @Option(name="unit_ursql", usage="TYPE@PROP=VALUE", widget="String", required=false, def="MENTION") |
|
45 |
String unit_ursql |
|
46 |
@Field @Option(name="limit_distance_in_schema", usage="Unit distance in schema (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0") |
|
47 |
int limit_distance_in_schema |
|
48 |
@Field @Option(name="limit_cql", usage="CQL to build structure limits", widget="Query", required=true, def="<div> [] expand to div") |
|
49 |
limit_cql |
|
50 |
@Field @Option(name="strict_inclusion", usage="Units must be strictly included into corpus matches", widget="Boolean", required=true, def="true") |
|
51 |
boolean strict_inclusion |
|
52 |
@Field @Option(name="limit_distance", usage="Unit distance to structure limit (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0") |
|
53 |
int limit_distance |
|
54 |
|
|
55 |
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF ON ALL REALLY ALL", required=true, def="OFF") |
|
56 |
debug |
|
57 |
if (!ParametersDialog.open(this)) return |
|
58 |
if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3 |
|
59 |
|
|
60 |
//corpus = corpusViewSelection |
|
61 |
def allResults = [:] |
|
62 |
def errors = new HashSet() |
|
63 |
for (def corpus : selection) { |
|
64 |
println "Deleting '$unit_ursql' units of '$schema_ursql' schemas in the '$corpus' corpus..." |
|
65 |
|
|
66 |
def word = corpus.getWordProperty() |
|
67 |
def analecCorpus = URSCorpora.getCorpus(corpus) |
|
68 |
Vue analecView = URSCorpora.getVue(corpus) |
|
69 |
Structure structure = analecCorpus.getStructure() |
|
70 |
|
|
71 |
def selectedUnits = AnalecUtils.selectUnitsInSchema(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, Integer.MAX_VALUE, |
|
72 |
unit_ursql, limit_distance_in_schema, limit_cql, strict_inclusion, limit_distance); |
|
73 |
|
|
74 |
def n = 0 |
|
75 |
def nerrors = 0 |
|
76 |
for (Unite unit : selectedUnits) { |
|
77 |
analecCorpus.supUnite(unit); |
|
78 |
n++ |
|
79 |
} |
|
80 |
|
|
81 |
corpus.getMainCorpus().setIsModified(true) |
|
82 |
allResults[corpus] = selectedUnits |
|
83 |
allResults["n"] = n |
|
84 |
println " $n units deleted" |
|
85 |
} |
|
86 |
|
|
87 |
return allResults |
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/edit/ResetAllAnnotationsMacro.groovy (revision 2093) | ||
---|---|---|
1 |
// STANDARD DECLARATIONS |
|
2 |
package org.txm.macro.urs.edit |
|
3 |
|
|
4 |
import org.kohsuke.args4j.* |
|
5 |
import groovy.transform.Field |
|
6 |
import org.txm.rcp.swt.widget.parameters.* |
|
7 |
import org.txm.annotation.urs.* |
|
8 |
import org.txm.searchengine.cqp.corpus.* |
|
9 |
import visuAnalec.elements.* |
|
10 |
|
|
11 |
if (!(corpusViewSelection instanceof MainCorpus)) { |
|
12 |
println "Corpora selection is not a Corpus" |
|
13 |
return; |
|
14 |
} |
|
15 |
|
|
16 |
@Field @Option(name="I_AM_SURE_IWANT_TO_RESET_THE_ANNOTATIONS", usage="an example boolean", widget="Boolean", required=false, def="false") |
|
17 |
def I_AM_SURE_IWANT_TO_RESET_THE_ANNOTATIONS |
|
18 |
|
|
19 |
// Open the parameters input dialog box |
|
20 |
if (!ParametersDialog.open(this)) return; |
|
21 |
|
|
22 |
MainCorpus corpus = corpusViewSelection |
|
23 |
def analecCorpus = URSCorpora.getCorpus(corpus); |
|
24 |
def structure = analecCorpus.getStructure() |
|
25 |
|
|
26 |
Class[] classes = [Unite.class, Relation.class, Schema.class] |
|
27 |
println "Removing unites..." |
|
28 |
for (String type : structure.getTypes(Unite.class)) { |
|
29 |
for (Unite unite : analecCorpus.getUnites(type).toArray(new Unite[0])) { |
|
30 |
analecCorpus.supUnite(unite) |
|
31 |
} |
|
32 |
// fireMessage(new StructureEvent(analecCorpus, TypeModifStructure.SUPPR_TYPE, Unite.class, type)); |
|
33 |
} |
|
34 |
|
|
35 |
println "Removing relations..." |
|
36 |
for (String type : structure.getTypes(Relation.class)) { |
|
37 |
for (Relation relation : analecCorpus.getRelations(type).toArray(new Relation[0])) { |
|
38 |
analecCorpus.supRelation(relation) |
|
39 |
} |
|
40 |
// fireMessage(new StructureEvent(analecCorpus, TypeModifStructure.SUPPR_TYPE, Relation.class, type)); |
|
41 |
} |
|
42 |
|
|
43 |
println "Removing schemas..." |
|
44 |
for (String type : structure.getTypes(Schema.class)) { |
|
45 |
for (Schema schema : analecCorpus.getSchemas(type).toArray(new Schema[0])) { |
|
46 |
analecCorpus.supSchema(schema) |
|
47 |
} |
|
48 |
// fireMessage(new StructureEvent(analecCorpus, TypeModifStructure.SUPPR_TYPE, Schema.class, type)); |
|
49 |
} |
|
50 |
|
|
51 |
println "Done. Save the corpus to finish the reset." |
|
52 |
|
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/AnalecUtils.groovy (revision 2093) | ||
---|---|---|
11 | 11 |
|
12 | 12 |
|
13 | 13 |
static def isPropertyDefined(Class clazz, Corpus analecCorpus, String ursql) { |
14 |
if (ursql == null || ursql.length() == 0) return new HashSet() |
|
14 | 15 |
def params = getFilterParameters(ursql) |
15 | 16 |
def typeRegexp = params[0] |
16 | 17 |
def propRegexp = params[1] |
... | ... | |
19 | 20 |
|
20 | 21 |
static def isPropertyDefined(Class clazz, Corpus analecCorpus, String typeRegexp, String propRegexp) { |
21 | 22 |
def errors = new HashSet() |
23 |
if (propRegexp == null || propRegexp.length() == 0) return errors; |
|
22 | 24 |
Structure structure = analecCorpus.getStructure(); |
23 | 25 |
for (def type : structure.getTypes(clazz)) { |
24 | 26 |
if (!type.matches(typeRegexp)) continue; // test only types matching with typeRegexp |
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/check/CheckDuplicatesInSchemasMacro.groovy (revision 2093) | ||
---|---|---|
46 | 46 |
def word = corpus.getWordProperty() |
47 | 47 |
def analecCorpus = URSCorpora.getCorpus(corpus) |
48 | 48 |
|
49 |
|
|
50 |
|
|
51 |
def errors = AnalecUtils.isPropertyDefined(Schema.class, analecCorpus, schema_ursql) |
|
52 |
if (errors.size() > 0) { |
|
53 |
println "** The $schema_ursql schema URSQL cannot be computed in the corpus with types: $errors." |
|
54 |
return; |
|
55 |
} |
|
56 |
|
|
49 | 57 |
if (schema_property_display.length() > 0) { |
50 |
def errors = AnalecUtils.isPropertyDefined(Schema.class, analecCorpus, schema_ursql, schema_property_display).size()
|
|
58 |
errors = AnalecUtils.isPropertyDefined(Unite.class, analecCorpus, AnalecUtils.getFilterParameters(schema_ursql)[0], schema_property_display).size()
|
|
51 | 59 |
if (errors > 0) { |
52 | 60 |
println "Error: some Schema types don't contain the $schema_property_display property: $errors" |
53 | 61 |
return |
tmp/org.txm.analec.rcp/build.properties (revision 2093) | ||
---|---|---|
2 | 2 |
source.. = src/ |
3 | 3 |
output.. = bin/ |
4 | 4 |
bin.includes = META-INF/,\ |
5 |
OSGI-INF/,\ |
|
6 | 5 |
icons/,\ |
7 | 6 |
plugin.xml,\ |
8 |
libs/ |
|
7 |
libs/,\ |
|
8 |
OSGI-INF/ |
|
9 | 9 |
qualifier=svn |
tmp/org.txm.analec.rcp/OSGI-INF/l10n/bundle.properties (revision 2093) | ||
---|---|---|
39 | 39 |
|
40 | 40 |
view.name = Properties |
41 | 41 |
view.name.0 = Search |
42 |
|
|
43 |
menu.label.1 = Tools |
|
44 |
menu.label.2 = Annotation |
|
45 |
menu.label.3 = Verification |
|
46 |
menu.label.4 = Exploitation |
|
47 |
menu.label.5 = Export |
|
48 |
command.label.3 = URS |
|
49 |
PostTXMHOMEInstallationStep.description = Install URS Files |
tmp/org.txm.analec.rcp/OSGI-INF/l10n/bundle_fr.properties (revision 2093) | ||
---|---|---|
59 | 59 |
view.name = Propri\u00E9t\u00E9s |
60 | 60 |
|
61 | 61 |
view.name.0 = chercher |
62 |
|
|
63 |
menu.label.1 = Outils |
|
64 |
menu.label.2 = Annotation |
|
65 |
menu.label.3 = V?rification |
|
66 |
menu.label.4 = Exploitation |
|
67 |
menu.label.5 = Export |
|
68 |
command.label.3 = URS |
|
69 |
PostTXMHOMEInstallationStep.description = Installe les fichiers URS |
tmp/org.txm.analec.rcp/plugin.xml (revision 2093) | ||
---|---|---|
21 | 21 |
</command> |
22 | 22 |
<menu |
23 | 23 |
id="menu.urs.tools" |
24 |
label="Tools">
|
|
24 |
label="%menu.label.1">
|
|
25 | 25 |
<menu |
26 |
label="Annotation">
|
|
26 |
label="%menu.label.2">
|
|
27 | 27 |
<dynamic |
28 | 28 |
class="org.txm.annotation.urs.commands.URSToolsMenuContribution" |
29 | 29 |
id="edit"> |
30 | 30 |
</dynamic> |
31 | 31 |
</menu> |
32 | 32 |
<menu |
33 |
label="Verification">
|
|
33 |
label="%menu.label.3">
|
|
34 | 34 |
<dynamic |
35 | 35 |
class="org.txm.annotation.urs.commands.URSToolsMenuContribution" |
36 | 36 |
id="check"> |
37 | 37 |
</dynamic> |
38 | 38 |
</menu> |
39 | 39 |
<menu |
40 |
label="Exploitation">
|
|
40 |
label="%menu.label.4">
|
|
41 | 41 |
<dynamic |
42 | 42 |
class="org.txm.annotation.urs.commands.URSToolsMenuContribution" |
43 | 43 |
id="exploit"> |
44 | 44 |
</dynamic> |
45 | 45 |
</menu> |
46 | 46 |
<menu |
47 |
label="Export">
|
|
47 |
label="%menu.label.5">
|
|
48 | 48 |
<dynamic |
49 | 49 |
class="org.txm.annotation.urs.commands.URSToolsMenuContribution" |
50 | 50 |
id="export"> |
... | ... | |
194 | 194 |
locationURI="menu:menu.help.plugins"> |
195 | 195 |
<command |
196 | 196 |
commandId="org.txm.rcp.commands.OpenBrowser" |
197 |
label="URS"
|
|
197 |
label="%command.label.3"
|
|
198 | 198 |
style="push"> |
199 | 199 |
<parameter |
200 | 200 |
name="org.txm.rcp.commands.commandParameter2" |
... | ... | |
377 | 377 |
point="org.txm.PostTXMHOMEInstallationStep"> |
378 | 378 |
<PostTXMHOMEInstallationStep |
379 | 379 |
class="org.txm.annotation.urs.InstallURSFiles" |
380 |
description="Install URS Files"
|
|
380 |
description="%PostTXMHOMEInstallationStep.description"
|
|
381 | 381 |
name="URS macro"> |
382 | 382 |
</PostTXMHOMEInstallationStep> |
383 | 383 |
</extension> |
Formats disponibles : Unified diff