Révision 2174
tmp/org.txm.analec.rcp/plugin.xml (revision 2174) | ||
---|---|---|
50 | 50 |
id="export"> |
51 | 51 |
</dynamic> |
52 | 52 |
</menu> |
53 |
<command |
|
54 |
commandId="org.txm.annotation.urs.commands.OpenDemocratTools" |
|
55 |
style="push"> |
|
56 |
</command> |
|
53 | 57 |
</menu> |
54 | 58 |
<command |
55 | 59 |
commandId="org.txm.annotation.urs.commands.EditAnnotationStructure" |
... | ... | |
104 | 108 |
label="%menu.label.0" |
105 | 109 |
mnemonic="%menu.mnemonic.0"> |
106 | 110 |
<command |
107 |
commandId="org.txm.annotation.urs.commands.LoadStructureFromGlozz"
|
|
111 |
commandId="org.txm.annotation.urs.commands.SaveCorpus"
|
|
108 | 112 |
style="push"> |
109 | 113 |
<visibleWhen |
110 | 114 |
checkEnabled="false"> |
... | ... | |
113 | 117 |
</reference> |
114 | 118 |
</visibleWhen> |
115 | 119 |
</command> |
120 |
<menu |
|
121 |
id="menu.urs.tools" |
|
122 |
label="%menu.label.1"> |
|
123 |
<menu |
|
124 |
label="%menu.label.2"> |
|
125 |
<dynamic |
|
126 |
class="org.txm.annotation.urs.commands.URSToolsMenuContribution" |
|
127 |
id="edit"> |
|
128 |
</dynamic> |
|
129 |
</menu> |
|
130 |
<menu |
|
131 |
label="%menu.label.3"> |
|
132 |
<dynamic |
|
133 |
class="org.txm.annotation.urs.commands.URSToolsMenuContribution" |
|
134 |
id="check"> |
|
135 |
</dynamic> |
|
136 |
</menu> |
|
137 |
<menu |
|
138 |
label="%menu.label.4"> |
|
139 |
<dynamic |
|
140 |
class="org.txm.annotation.urs.commands.URSToolsMenuContribution" |
|
141 |
id="exploit"> |
|
142 |
</dynamic> |
|
143 |
</menu> |
|
144 |
<menu |
|
145 |
label="%menu.label.5"> |
|
146 |
<dynamic |
|
147 |
class="org.txm.annotation.urs.commands.URSToolsMenuContribution" |
|
148 |
id="export"> |
|
149 |
</dynamic> |
|
150 |
</menu> |
|
151 |
<command |
|
152 |
commandId="org.txm.annotation.urs.commands.OpenDemocratTools" |
|
153 |
style="push"> |
|
154 |
</command> |
|
155 |
</menu> |
|
116 | 156 |
<command |
117 | 157 |
commandId="org.txm.annotation.urs.commands.EditAnnotationStructure" |
118 | 158 |
style="push"> |
... | ... | |
124 | 164 |
</visibleWhen> |
125 | 165 |
</command> |
126 | 166 |
<command |
127 |
commandId="org.txm.annotation.urs.commands.SaveCorpus"
|
|
167 |
commandId="org.txm.annotation.urs.commands.EditVue"
|
|
128 | 168 |
style="push"> |
129 | 169 |
<visibleWhen |
130 | 170 |
checkEnabled="false"> |
... | ... | |
133 | 173 |
</reference> |
134 | 174 |
</visibleWhen> |
135 | 175 |
</command> |
176 |
<separator |
|
177 |
name="URSRCP.separator1" |
|
178 |
visible="true"> |
|
179 |
</separator> |
|
136 | 180 |
<command |
137 |
commandId="org.txm.annotation.urs.commands.ExportGlozzCorpus"
|
|
181 |
commandId="org.txm.annotation.urs.commands.LoadStructureFromGlozz"
|
|
138 | 182 |
style="push"> |
139 | 183 |
<visibleWhen |
140 | 184 |
checkEnabled="false"> |
... | ... | |
144 | 188 |
</visibleWhen> |
145 | 189 |
</command> |
146 | 190 |
<command |
147 |
commandId="org.txm.annotation.urs.commands.ExportTEICorpus" |
|
148 |
style="push"> |
|
149 |
<visibleWhen |
|
150 |
checkEnabled="false"> |
|
151 |
<reference |
|
152 |
definitionId="OneCorpusSelected"> |
|
153 |
</reference> |
|
154 |
</visibleWhen> |
|
155 |
</command> |
|
156 |
<command |
|
157 | 191 |
commandId="org.txm.annotation.urs.commands.ImportTEIAnnotations" |
158 | 192 |
style="push"> |
159 | 193 |
<visibleWhen |
... | ... | |
163 | 197 |
</reference> |
164 | 198 |
</visibleWhen> |
165 | 199 |
</command> |
200 |
<menu |
|
201 |
label="Export"> |
|
202 |
<command |
|
203 |
commandId="org.txm.annotation.urs.commands.ExportTEICorpus" |
|
204 |
label="%command.label.0" |
|
205 |
style="push"> |
|
206 |
</command> |
|
207 |
<command |
|
208 |
commandId="org.txm.annotation.urs.commands.ExportGlozzCorpus" |
|
209 |
label="%command.label.1" |
|
210 |
style="push"> |
|
211 |
</command> |
|
212 |
</menu> |
|
166 | 213 |
</menu> |
167 | 214 |
</menuContribution> |
168 | 215 |
<menuContribution |
... | ... | |
306 | 353 |
id="org.txm.annotation.urs.commands.EditVue" |
307 | 354 |
name="%command.name.16"> |
308 | 355 |
</command> |
356 |
<command |
|
357 |
categoryId="org.txm.rcp.category.txm" |
|
358 |
defaultHandler="org.txm.annotation.urs.commands.OpenDemocratTools" |
|
359 |
id="org.txm.annotation.urs.commands.OpenDemocratTools" |
|
360 |
name="Democrat tools"> |
|
361 |
</command> |
|
309 | 362 |
</extension> |
310 | 363 |
<extension |
311 | 364 |
point="org.eclipse.ui.views"> |
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/DemoMacro.groovy (revision 2174) | ||
---|---|---|
1 |
package org.txm.macro.urs |
|
2 |
|
|
3 |
import org.txm.annotation.urs.URSCorpora |
|
4 |
import org.txm.searchengine.cqp.corpus.CQPCorpus |
|
5 |
import visuAnalec.elements.* |
|
6 |
|
|
7 |
// get the CQP corpus |
|
8 |
if (!(corpusViewSelection instanceof CQPCorpus)) { |
|
9 |
println "Corpus view selection is no a corpus: $corpusViewSelection" |
|
10 |
return false; |
|
11 |
} |
|
12 |
def corpus = corpusViewSelection |
|
13 |
|
|
14 |
// get the Analec corpus : stores the annotations and annotation structure |
|
15 |
|
|
16 |
def analecCorpus = URSCorpora.getCorpus(corpus) |
|
17 |
|
|
18 |
// get the annotations structure : stores the available annotation types, properties and values |
|
19 |
|
|
20 |
def structure = analecCorpus.getStructure() |
|
21 |
|
|
22 |
// get the annotations view : stores annotations and annotation structure display rules |
|
23 |
|
|
24 |
def view = URSCorpora.getVue(corpus) |
|
25 |
|
|
26 |
// set a default vue -> all is visible |
|
27 |
view.retablirVueParDefaut() |
|
28 |
|
|
29 |
// list the viewed types and properties |
|
30 |
for (def type : view.getTypesAVoir(Unite.class)) { |
|
31 |
println "$type: "+view.getNomsChamps(Unite.class, type) |
|
32 |
} |
|
33 |
|
|
34 |
// manage the viewed&enabled type, properties ; the types and properties must be enabled to use the "view.setValeurChamp(TYPE, PROPERTY, VALUE)" method |
|
35 |
view.ajouterType(Unite.class, "EXEMPLE") |
|
36 |
view.ajouterProp(Unite.class, "EXEMPLE", "PEXEMPLE") |
|
37 |
|
|
38 |
// display annotations per Element type (Unite, Relation, Schema) and per type |
|
39 |
|
|
40 |
// Create annotation type |
|
41 |
|
|
42 |
structure.ajouterType(Unite.class, "EXEMPLE"); |
|
43 |
//structure.ajouterType(Relation.class, "EXEMPLE"); |
|
44 |
//structure.ajouterType(Schema.class, "EXEMPLE"); |
|
45 |
println structure.getTypes(Unite.class) |
|
46 |
|
|
47 |
// Create annotation property |
|
48 |
|
|
49 |
structure.ajouterProp(Unite.class, "EXEMPLE", "PEXEMPLE") |
|
50 |
println structure.getNomsProps(Unite.class, "EXEMPLE") |
|
51 |
|
|
52 |
// Create annotation value |
|
53 |
|
|
54 |
structure.ajouterVal(Unite.class, "EXEMPLE", "PEXEMPLE", "oui") |
|
55 |
structure.ajouterVal(Unite.class, "EXEMPLE", "PEXEMPLE", "non") |
|
56 |
|
|
57 |
// Create annotations |
|
58 |
|
|
59 |
// Unite |
|
60 |
def u = analecCorpus.addUniteSaisie("EXEMPLE", 0, 10, ["PEXEMPLE":"oui"]) |
|
61 |
def u2 = analecCorpus.addUniteSaisie("EXEMPLE", 12, 12, ["PEXEMPLE":"oui"]) |
|
62 |
println u |
|
63 |
|
|
64 |
// Edit annotations |
|
65 |
|
|
66 |
u.getProps()["PEXEMPLE"] = "non" |
|
67 |
|
|
68 |
// Unit getters |
|
69 |
|
|
70 |
println u.getDeb() // start of unit |
|
71 |
println u.getFin() // end of unit |
|
72 |
|
|
73 |
println u.getProp("EXEMPLE") |
|
74 |
println u.getType() |
|
75 |
|
|
76 |
// Relation |
|
77 |
Relation relation = new Relation("REXEMPLE", u, u2) |
|
78 |
relation.getProps().put("PEXEMPLE", "oui") |
|
79 |
analecCorpus.addRelationLue(relation) // add the new relation |
|
80 |
|
|
81 |
// Schema |
|
82 |
Schema schema = new Schema() |
|
83 |
schema.type = "SEXEMPLE" |
|
84 |
schema.props.put("PEXEMPLE", "oui") |
|
85 |
schema.ajouter(u) // insert one unit |
|
86 |
|
|
87 |
analecCorpus.addSchemaLu(schema) // add the new schema |
|
88 |
|
|
89 |
// Browse Units |
|
90 |
|
|
91 |
println "Units:" |
|
92 |
for (String type : structure.getUnites()) { |
|
93 |
def units = analecCorpus.getUnites(type) |
|
94 |
if (units.size() > 0) { |
|
95 |
println " ${units.size()} $type" |
|
96 |
} |
|
97 |
} |
|
98 |
|
|
99 |
// Browse Relations |
|
100 |
|
|
101 |
println "Relations:" |
|
102 |
for (String type : structure.getRelations()) { |
|
103 |
def relations = analecCorpus.getRelations(type) |
|
104 |
if (relations.size() > 0) { |
|
105 |
println " ${relations.size()} $type" |
|
106 |
} |
|
107 |
} |
|
108 |
|
|
109 |
// Browse Schemas |
|
110 |
|
|
111 |
println "Schemas:" |
|
112 |
for (String type : structure.getSchemas()) { |
|
113 |
def schemas = analecCorpus.getSchemas(type) |
|
114 |
if (schemas.size() > 0) { |
|
115 |
println " ${schemas.size()} $type" |
|
116 |
} |
|
117 |
} |
|
118 |
|
|
119 |
// URS selections |
|
120 |
|
|
121 |
// select Schemas |
|
122 |
def debug = 0 // 1 2 for more logs |
|
123 |
def strict_inclusion = true |
|
124 |
def position = 0 |
|
125 |
def minimum_schema_size = 1; |
|
126 |
def maximum_schema_size = 10; |
|
127 |
def schema_ursql = "SEXEMPLE" |
|
128 |
def unit_ursql = "EXEMPLE@PEXEMPLE=oui" |
|
129 |
def unit_type = "EXEMPLE" |
|
130 |
println AnalecUtils.selectSchemasInCorpus(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, maximum_schema_size, strict_inclusion) |
|
131 |
|
|
132 |
|
|
133 |
// with URSQL |
|
134 |
println AnalecUtils.filterElements(debug, analecCorpus.getUnites(unit_type), unit_ursql) |
|
135 |
|
|
136 |
// with intersection with CQP corpus matches |
|
137 |
println AnalecUtils.filterUniteByInclusion(debug, analecCorpus.getUnites(unit_type), corpus.getMatches(), strict_inclusion, position) |
|
138 |
|
|
139 |
// by size |
|
140 |
println AnalecUtils.filterBySize(analecCorpus.getSchemas(schema_ursql), minimum_schema_size, maximum_schema_size) |
|
141 |
|
|
142 |
// Delete annotations |
|
143 |
|
|
144 |
analecCorpus.supUnite(u) |
|
145 |
analecCorpus.supUnite(u2) |
|
146 |
analecCorpus.supRelation(relation) |
|
147 |
analecCorpus.supSchema(schema) |
|
148 |
|
|
149 |
// Delete annotation value |
|
150 |
|
|
151 |
structure.supprimerVal(Unite.class, "EXEMPLE", "PEXEMPLE", "oui") |
|
152 |
structure.supprimerVal(Unite.class, "EXEMPLE", "PEXEMPLE", "non") |
|
153 |
|
|
154 |
// Delete annotation property |
|
155 |
|
|
156 |
structure.supprimerProp(Unite.class, "EXEMPLE", "PEXEMPLE") |
|
157 |
structure.supprimerProp(Relation.class, "REXEMPLE", "PEXEMPLE") |
|
158 |
structure.supprimerProp(Schema.class, "SEXEMPLE", "PEXEMPLE") |
|
159 |
|
|
160 |
// Delete annotation type |
|
161 |
|
|
162 |
structure.supprimerType(Unite.class, "EXEMPLE"); |
|
163 |
structure.supprimerType(Relation.class, "REXEMPLE"); |
|
164 |
structure.supprimerType(Schema.class, "SEXEMPLE"); |
|
165 |
|
|
166 |
// Revert changes |
|
167 |
|
|
168 |
//URSCorpora.revert(corpus); |
|
169 |
|
|
170 |
// Save changes |
|
171 |
|
|
172 |
//URSCorpora.saveCorpus(corpus) |
|
173 |
|
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/democrat/SUJ-PARTINFMacro.groovy (revision 2174) | ||
---|---|---|
43 | 43 |
if (!ParametersDialog.open(this)) return |
44 | 44 |
|
45 | 45 |
corpus = corpusViewSelection |
46 |
CQI = Toolbox.getCqiClient()
|
|
46 |
CQI = CQPSearchEngine.getCqiClient()
|
|
47 | 47 |
word = corpus.getWordProperty() |
48 | 48 |
posProperty = corpus.getProperty(pos_property_name) |
49 | 49 |
if (posProperty == null) { |
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/democrat/VerificationsMacro.groovy (revision 2174) | ||
---|---|---|
18 | 18 |
import org.kohsuke.args4j.* |
19 | 19 |
import groovy.transform.* |
20 | 20 |
import org.txm.* |
21 |
|
|
21 | 22 |
import org.txm.rcpapplication.swt.widget.parameters.* |
22 |
import org.txm.analec.* |
|
23 |
import org.txm.urs.* |
|
24 |
import org.txm.annotation.urs.* |
|
23 | 25 |
import org.txm.searchengine.cqp.* |
24 | 26 |
import org.txm.searchengine.cqp.corpus.* |
25 | 27 |
import visuAnalec.Message.* |
... | ... | |
57 | 59 |
if (!ParametersDialog.open(this)) return |
58 | 60 |
|
59 | 61 |
corpus = corpusViewSelection |
60 |
CQI = Toolbox.getCqiClient()
|
|
62 |
CQI = CQPSearchEngine.getCqiClient()
|
|
61 | 63 |
word = corpus.getWordProperty() |
62 | 64 |
posProperty = corpus.getProperty(pos_property_name) |
63 | 65 |
if (posProperty == null) { |
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/democrat/CreationAnaphoresMacro.groovy (revision 2174) | ||
---|---|---|
70 | 70 |
units.sort() { a, b -> a.getDeb() <=> b.getDeb() ?: a.getFin() <=> b.getFin() } // sort them |
71 | 71 |
|
72 | 72 |
for (int i = 0 ; i < units.size() - 1 ; i++) { // build RELATIONS and don't process the last unit |
73 |
println "creating "+units[i+1]+", "+units[i]
|
|
73 |
println "creating relation with "+units[i+1].getProps()+", "+units[i].getProps()
|
|
74 | 74 |
Relation relation = new Relation("ANAPHORE", units[i+1], units[i]) |
75 | 75 |
relation.getProps().put("TYPE", "COREFERENTE") |
76 | 76 |
analecCorpus.addRelationLue(relation) // add the new relation |
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/democrat/LongueurDesMentionsMacro.groovy (revision 2174) | ||
---|---|---|
107 | 107 |
def prop = unit.getProp(LONGUEUR) |
108 | 108 |
if (!reset && prop != null && prop.length() > 0) continue // l'unité a déjà une LONGUEUR |
109 | 109 |
|
110 |
int[] positions = null
|
|
110 |
int[] positions = null |
|
111 | 111 |
if (unit.getDeb() == unit.getFin()) positions = [unit.getDeb()] |
112 | 112 |
else positions = (unit.getDeb()..unit.getFin()) |
113 | 113 |
|
114 |
def Mention = CQI.cpos2Str(posProperty.getQualifiedName(), positions) |
|
115 |
def cat = testRules(positions, Mention)
|
|
114 |
//def Mention = CQI.cpos2Str(posProperty.getQualifiedName(), positions)
|
|
115 |
def cat = testRules(positions, positions)
|
|
116 | 116 |
|
117 | 117 |
if (cat != null) { |
118 | 118 |
vue.setValeurChamp(unit, LONGUEUR, cat) |
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/democrat/Si2SingletonMacro.groovy (revision 2174) | ||
---|---|---|
26 | 26 |
} |
27 | 27 |
|
28 | 28 |
// BEGINNING OF PARAMETERS |
29 |
@Field @Option(name="unitType", usage="", widget="String", required=true, def="MENTION")
|
|
30 |
def unitType
|
|
31 |
@Field @Option(name="refPropertyName", usage="", widget="String", required=true, def="REF")
|
|
32 |
def refPropertyName
|
|
29 |
@Field @Option(name="unit_type", usage="", widget="String", required=true, def="MENTION")
|
|
30 |
def unit_type
|
|
31 |
@Field @Option(name="ref_property_name", usage="", widget="String", required=true, def="REF")
|
|
32 |
def ref_property_name
|
|
33 | 33 |
if (!ParametersDialog.open(this)) return |
34 | 34 |
|
35 | 35 |
corpus = corpusViewSelection |
... | ... | |
38 | 38 |
analecCorpus = URSCorpora.getCorpus(corpus) |
39 | 39 |
vue = URSCorpora.getVue(corpus) |
40 | 40 |
structure = analecCorpus.getStructure() |
41 |
if (!structure.getUnites().contains(unitType)) { // check if the structure contains the unitType units
|
|
42 |
println "Error: corpus structure does not contains unit with name=$unitType"
|
|
41 |
if (!structure.getUnites().contains(unit_type)) { // check if the structure contains the unit_type units
|
|
42 |
println "Error: corpus structure does not contains unit with name=$unit_type"
|
|
43 | 43 |
return |
44 | 44 |
} |
45 | 45 |
|
46 |
if (!structure.getUniteProperties(unitType).contains(refPropertyName)) {
|
|
47 |
println "Error: corpus structure does not contains property name=$unitType"
|
|
46 |
if (!structure.getUniteProperties(unit_type).contains(ref_property_name)) {
|
|
47 |
println "Error: corpus structure does not contains property name=$unit_type"
|
|
48 | 48 |
return |
49 | 49 |
} |
50 | 50 |
|
51 | 51 |
def nModified = 0 |
52 | 52 |
def nIgnored = 0 |
53 | 53 |
|
54 |
def units = analecCorpus.getUnites(unitType)
|
|
54 |
def units = analecCorpus.getUnites(unit_type)
|
|
55 | 55 |
//units.sort() { a, b -> a.getDeb() <=> b.getDeb() ?: a.getFin() <=> b.getFin() } |
56 | 56 |
|
57 | 57 |
def refSet = new HashSet() |
58 | 58 |
for (Unite unit : units) { // process all units |
59 |
def prop = unit.getProp(refPropertyName)
|
|
59 |
def prop = unit.getProp(ref_property_name)
|
|
60 | 60 |
refSet.add(prop) |
61 | 61 |
} |
62 | 62 |
|
63 | 63 |
def counter = 1 |
64 | 64 |
for (Unite unit : units) { // process all units |
65 | 65 |
|
66 |
def prop = unit.getProp(refPropertyName)
|
|
66 |
def prop = unit.getProp(ref_property_name)
|
|
67 | 67 |
if (prop && prop == "SI") { |
68 | 68 |
def name = "SI_" + counter |
69 | 69 |
while (refSet.contains(name)) { |
... | ... | |
72 | 72 |
} |
73 | 73 |
counter++ |
74 | 74 |
//println "old prop"+ prop |
75 |
unit.getProps().put(refPropertyName, name)
|
|
75 |
unit.getProps().put(ref_property_name, name)
|
|
76 | 76 |
//println "new prop"+ name |
77 | 77 |
nModified++ |
78 | 78 |
} else { |
... | ... | |
81 | 81 |
} |
82 | 82 |
|
83 | 83 |
println "Result:" |
84 |
println "- $nModified units of type $unitType have been modified." |
|
85 |
println "- $nIgnored units of type $unitType have not been modified.\n" |
|
84 |
println "- $nModified units of type $unit_type have been modified." |
|
85 |
println "- $nIgnored units of type $unit_type have not been modified.\n" |
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/export/ExportToGlozz1_0_0Macro.groovy (revision 2174) | ||
---|---|---|
1 |
package org.txm.macro.urs.export |
|
2 |
|
|
3 |
// @author: Bruno Oberlé |
|
4 |
// v1.0.0 2017-08-28 |
|
5 |
|
|
6 |
/* |
|
7 |
Cette macro exporte le corpus sélectionné et ses annotations vers deux fichiers de format Glozz: |
|
8 |
- un fichier .ac contenant le corpus brut, |
|
9 |
- un fichier .aa contenant les annotations au format XML utilisé par Glozz. |
|
10 |
Le corpus sélectionné dans TXM devrait contenir une structure Analec avec au moins un type d'unité défini (e.g. MENTION, maillon, etc.). S'il n'y a pas de structure, |
|
11 |
ce n'est pas grave: le fichier est exporter, mais aucune annotation n'est créer. Cela permet d'exporter n'importe quel corpus au format Glozz. |
|
12 |
Pour exporter un texte au format Glozz *sans* les annotations qu'il contient, simplement mettre un unit_type qui n'existe pas (e.g. "foobar" au lieu de "MENTION"). |
|
13 |
La macro ne produit pour l'instant pas automatiquement de modèle Glozz (fichier .aam). Cela n'est pas un problème pour ouvrir le résultat dans Glozz ou Analec. |
|
14 |
*/ |
|
15 |
|
|
16 |
// STANDARD DECLARATIONS |
|
17 |
|
|
18 |
import org.apache.commons.lang.* |
|
19 |
import org.kohsuke.args4j.* |
|
20 |
import groovy.transform.* |
|
21 |
import org.txm.* |
|
22 |
import org.txm.rcp.swt.widget.parameters.* |
|
23 |
import org.txm.annotation.urs.* |
|
24 |
import org.txm.searchengine.cqp.* |
|
25 |
import org.txm.searchengine.cqp.corpus.* |
|
26 |
import visuAnalec.Message.* |
|
27 |
import visuAnalec.donnees.* |
|
28 |
import visuAnalec.elements.* |
|
29 |
import visuAnalec.vue.* |
|
30 |
|
|
31 |
// CHECK CORPUS |
|
32 |
|
|
33 |
if (!(corpusViewSelection instanceof MainCorpus)) { |
|
34 |
println "Corpora selection is not a MainCorpus: "+corpusViewSelection |
|
35 |
return; |
|
36 |
} |
|
37 |
|
|
38 |
// BEGINNING OF PARAMETERS |
|
39 |
|
|
40 |
@Field @Option(name="unit_type",usage="", widget="String", required=true, def="MENTION") |
|
41 |
String unit_type |
|
42 |
|
|
43 |
@Field @Option(name="filename",usage="", widget="String", required=true, def="filename without extension (.ac/.aa)") |
|
44 |
String filename |
|
45 |
|
|
46 |
if (!ParametersDialog.open(this)) return; |
|
47 |
|
|
48 |
// VARIABLES |
|
49 |
|
|
50 |
corpus = corpusViewSelection |
|
51 |
|
|
52 |
doExport(corpus, unit_type, filename) |
|
53 |
|
|
54 |
public void doExport(MainCorpus corpus, String unit_type, String filename) { |
|
55 |
|
|
56 |
size = corpus.getSize() // you may also use: corpus.getTextEndLimits() (= index of last token = size-1) |
|
57 |
CQI = CQPSearchEngine.getCqiClient() |
|
58 |
word = corpus.getWordProperty() |
|
59 |
// note: using "lbn" seems to work better than "pn" (some imported corpora are missing this information) |
|
60 |
//pn = corpus.getProperty("pn") |
|
61 |
//if (pn == null) pn = corpus.getProperty("div") |
|
62 |
pn = corpus.getProperty("lbn") |
|
63 |
|
|
64 |
// BUILD THE RAW TEXT, THE POSITIONS AND FIND THE PARAGRAPHS |
|
65 |
|
|
66 |
rawText = "" // the corpus for the .ac file |
|
67 |
positions = [] // each element is an array [start, end] indicating the position in the rawText |
|
68 |
pnCount = 0 // the par counter, used for indexing the pns array |
|
69 |
lastPn = -1 // the last paragraph number |
|
70 |
pns = [] // each element is an array [start, end] representing the start and end of the paragraph in the rawText |
|
71 |
for (def i=0; i<size; i++) { |
|
72 |
f = CQI.cpos2Str(word.getQualifiedName(), (int[])[i])[0] |
|
73 |
if (pn == null) { |
|
74 |
p = 1 |
|
75 |
} else { |
|
76 |
p = CQI.cpos2Str(pn.getQualifiedName(), (int[])[i])[0] |
|
77 |
} |
|
78 |
start = rawText.length() |
|
79 |
rawText += f |
|
80 |
if (lastPn != p) { |
|
81 |
pnCount++; |
|
82 |
if (pnCount > 1) { |
|
83 |
pns[pnCount-2][1] = end |
|
84 |
} |
|
85 |
pns[pnCount-1] = [start, 0] |
|
86 |
} |
|
87 |
lastPn = p |
|
88 |
end = rawText.length() // must be after setting it up in pns! |
|
89 |
if (i != size-1) rawText += " " |
|
90 |
positions[i] = [start, end] |
|
91 |
} |
|
92 |
pns[pnCount-1][1] = end |
|
93 |
println pnCount + " paragraph(s) found." |
|
94 |
|
|
95 |
|
|
96 |
|
|
97 |
// CORPUS ANALEC (GET THE ANNOTATIONS) |
|
98 |
|
|
99 |
// note that unit_type has been defined with an option of the dialog at the beginning |
|
100 |
def analecCorpus = URSCorpora.getCorpus(corpus); |
|
101 |
|
|
102 |
// list of properties |
|
103 |
|
|
104 |
struct = analecCorpus.getStructure(); |
|
105 |
propertyList = struct.getUniteProperties(unit_type); |
|
106 |
|
|
107 |
// export to file (corpus) |
|
108 |
|
|
109 |
corpusFilename = filename + ".ac"; |
|
110 |
def corpusFile = new File(corpusFilename); |
|
111 |
corpusFile.write(rawText) |
|
112 |
println("Corpus written to `"+corpusFilename+"'."); |
|
113 |
|
|
114 |
// export to file (annotations) |
|
115 |
|
|
116 |
annotFilename = filename + ".aa"; |
|
117 |
def annotFile = new File(annotFilename) |
|
118 |
annotFile.write("<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n<annotations>\n") |
|
119 |
// erase (if you use << you don't erase!) |
|
120 |
def counter = 0 |
|
121 |
|
|
122 |
// export paragraphs |
|
123 |
for (def i=0; i<pns.size(); i++) { |
|
124 |
def start = pns[i][0] |
|
125 |
def end = pns[i][1] |
|
126 |
annotFile << "<unit id=\"me_"+counter+"\">\n"; |
|
127 |
annotFile << "<metadata><author>me</author><creation-date>"+counter+"</creation-date></metadata>\n"; |
|
128 |
annotFile << "<characterisation><type>paragraph</type><featureSet /></characterisation>\n"; |
|
129 |
annotFile << "<positioning><start><singlePosition index=\""+start+"\" /></start><end><singlePosition index=\""+end+"\" /></end></positioning>\n"; |
|
130 |
annotFile << "</unit>\n"; |
|
131 |
counter++; |
|
132 |
} |
|
133 |
|
|
134 |
// export units |
|
135 |
def units = analecCorpus.getUnites(unit_type); |
|
136 |
//units.sort() { a, b -> a.getDeb() <=> b.getDeb() ?: a.getFin() <=> b.getFin() }; |
|
137 |
def unitCount = 0 |
|
138 |
for (Unite unit : units) { |
|
139 |
unitCount++; |
|
140 |
annotFile << "<unit id=\"me_"+counter+"\">\n"; |
|
141 |
annotFile << "<metadata><author>me</author><creation-date>"+counter+"</creation-date></metadata>\n"; |
|
142 |
annotFile << "<characterisation>\n"; |
|
143 |
annotFile << "<type>"+unit_type+"</type>\n"; |
|
144 |
annotFile << "<featureSet>\n"; |
|
145 |
for (String propertyName : propertyList) { |
|
146 |
annotFile << "<feature name=\""+propertyName+"\">"+unit.getProp(propertyName)+"</feature>\n"; |
|
147 |
} |
|
148 |
annotFile << "</featureSet>\n"; |
|
149 |
annotFile << "</characterisation>\n"; |
|
150 |
start = positions[unit.getDeb()][0] |
|
151 |
end = positions[unit.getFin()][1] |
|
152 |
annotFile << "<positioning><start><singlePosition index=\""+start+"\" /></start><end><singlePosition index=\""+end+"\" /></end></positioning>\n"; |
|
153 |
annotFile << "</unit>\n"; |
|
154 |
counter++; |
|
155 |
} |
|
156 |
annotFile << "</annotations>\n"; |
|
157 |
|
|
158 |
println unitCount + " unit(s) found." |
|
159 |
|
|
160 |
println("Annotations written to `"+annotFilename+"'."); |
|
161 |
} |
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/export/ExportToGlozz1_1_0Macro.groovy (revision 2174) | ||
---|---|---|
1 |
package org.txm.macro.urs.export |
|
2 |
|
|
3 |
// @author: Bruno Oberle |
|
4 |
// v1.1.0 2017-10-25: |
|
5 |
// - using the "p" structural unit if available to get paragraphs |
|
6 |
// - no space before comma, parenthesis, full stop, hyphen, etc. |
|
7 |
// - no space after hyphen, parenthesis, apostrophe, etc. |
|
8 |
// v1.0.0 2017-08-28 |
|
9 |
|
|
10 |
/* |
|
11 |
Cette macro exporte le corpus sélectionné et ses annotations vers deux fichiers de format Glozz: |
|
12 |
- un fichier .ac contenant le corpus brut, |
|
13 |
- un fichier .aa contenant les annotations au format XML utilisé par Glozz. |
|
14 |
Le corpus sélectionné dans TXM devrait contenir une structure Analec avec au moins un type d'unité défini (e.g. MENTION, maillon, etc.). S'il n'y a pas de structure, |
|
15 |
ce n'est pas grave: le fichier est exporter, mais aucune annotation n'est créer. Cela permet d'exporter n'importe quel corpus au format Glozz. |
|
16 |
Pour exporter un texte au format Glozz *sans* les annotations qu'il contient, simplement mettre un unit_type qui n'existe pas (e.g. "foobar" au lieu de "MENTION"). |
|
17 |
La macro ne produit pour l'instant pas automatiquement de modèle Glozz (fichier .aam). Cela n'est pas un problème pour ouvrir le résultat dans Glozz ou Analec. |
|
18 |
*/ |
|
19 |
|
|
20 |
// STANDARD DECLARATIONS |
|
21 |
|
|
22 |
import org.apache.commons.lang.* |
|
23 |
import org.kohsuke.args4j.* |
|
24 |
import groovy.transform.* |
|
25 |
import org.txm.* |
|
26 |
import org.txm.rcp.swt.widget.parameters.* |
|
27 |
import org.txm.annotation.urs.* |
|
28 |
import org.txm.searchengine.cqp.* |
|
29 |
import org.txm.searchengine.cqp.corpus.* |
|
30 |
import visuAnalec.Message.* |
|
31 |
import visuAnalec.donnees.* |
|
32 |
import visuAnalec.elements.* |
|
33 |
import visuAnalec.vue.* |
|
34 |
|
|
35 |
|
|
36 |
// GLOBAL VARIABLES |
|
37 |
|
|
38 |
corpus = corpusViewSelection |
|
39 |
CQI = CQPSearchEngine.getCqiClient() |
|
40 |
|
|
41 |
// CHECK CORPUS |
|
42 |
|
|
43 |
if (!(corpusViewSelection instanceof MainCorpus)) { |
|
44 |
println "Corpora selection is not a MainCorpus: "+corpusViewSelection |
|
45 |
return; |
|
46 |
} |
|
47 |
|
|
48 |
// PARAGRAPH AS STRUCTURAL UNIT? |
|
49 |
|
|
50 |
/* note: some text have a structural unit called "p", which represents a paragraph. |
|
51 |
If this structural unit is present, we use it. Otherwise we ask the user (the |
|
52 |
best way is to use the "pn" (paragraph number) or "lbn" (line number) property), |
|
53 |
depending on what is available in the corpus. If the user give not property name, |
|
54 |
we only define one paragraph for the whole text. */ |
|
55 |
|
|
56 |
parUnit = corpus.getStructuralUnit("p") |
|
57 |
|
|
58 |
// BEGINNING OF PARAMETERS |
|
59 |
|
|
60 |
@Field @Option(name="unit_type",usage="", widget="String", required=true, def="MENTION") |
|
61 |
String unit_type |
|
62 |
|
|
63 |
@Field @Option(name="filename",usage="", widget="String", required=true, def="filename without extension (.ac/.aa)") |
|
64 |
String filename |
|
65 |
|
|
66 |
if (!parUnit) { |
|
67 |
@Field @Option(name="par_prop",usage="the property used to compute paragraphs", widget="String", required=false, def="lbn") |
|
68 |
String par_prop |
|
69 |
} |
|
70 |
|
|
71 |
if (!ParametersDialog.open(this)) return; |
|
72 |
|
|
73 |
// what paragraph unit to use? |
|
74 |
|
|
75 |
pn = null |
|
76 |
if (!parUnit && !par_prop.equals("")) { |
|
77 |
pn = corpus.getProperty(par_prop) |
|
78 |
if (!pn) { |
|
79 |
println "Error: I can't find a the property `$par_prop'." |
|
80 |
return |
|
81 |
} |
|
82 |
} |
|
83 |
|
|
84 |
/*********************************/ |
|
85 |
|
|
86 |
doExport(corpus, unit_type, filename) |
|
87 |
|
|
88 |
public void doExport(MainCorpus corpus, String unit_type, String filename) { |
|
89 |
|
|
90 |
size = corpus.getSize() // you may also use: corpus.getTextEndLimits() (= index of last token = size-1) |
|
91 |
word = corpus.getWordProperty() |
|
92 |
|
|
93 |
// BUILD THE RAW TEXT, THE POSITIONS AND FIND THE PARAGRAPHS |
|
94 |
|
|
95 |
rawText = "" // the corpus for the .ac file |
|
96 |
positions = [] // each element is an array [start, end] indicating the position in the rawText |
|
97 |
pnCount = 0 // the par counter, used for indexing the pars array |
|
98 |
lastPn = -1 // the last paragraph number |
|
99 |
pars = [] // each element is an array [start, end] representing the start and end of the paragraph in the rawText |
|
100 |
insertSpace = true |
|
101 |
for (def i=0; i<size; i++) { |
|
102 |
f = CQI.cpos2Str(word.getQualifiedName(), (int[])[i])[0] |
|
103 |
if (parUnit) { |
|
104 |
p = CQI.cpos2Struc(parUnit.getQualifiedName(), (int[])[i])[0] |
|
105 |
} else if (pn == null) { |
|
106 |
p = 1 |
|
107 |
} else { |
|
108 |
p = CQI.cpos2Str(pn.getQualifiedName(), (int[])[i])[0] |
|
109 |
} |
|
110 |
if (i > 0 && insertSpace |
|
111 |
&& !f.equals(".") && !f.equals(",") && !f.equals("'") && !f.equals("’") && !f.equals("-") |
|
112 |
&& !f.equals(")") && !f.equals("]") && !f.startsWith("-")) { |
|
113 |
rawText += " " |
|
114 |
} |
|
115 |
insertSpace = true // reset |
|
116 |
if (f.equals("-") || f.equals("[") || f.equals("(") |
|
117 |
|| f.endsWith("-") || f.endsWith("'") || f.endsWith("’") || f.endsWith("-")) { |
|
118 |
insertSpace = false |
|
119 |
} |
|
120 |
start = rawText.length() |
|
121 |
rawText += f |
|
122 |
if (lastPn != p) { |
|
123 |
pnCount++; |
|
124 |
if (pnCount > 1) { |
|
125 |
pars[pnCount-2][1] = end |
|
126 |
} |
|
127 |
pars[pnCount-1] = [start, 0] |
|
128 |
} |
|
129 |
lastPn = p |
|
130 |
end = rawText.length() // must be after setting it up in pars! |
|
131 |
positions[i] = [start, end] |
|
132 |
} |
|
133 |
pars[pnCount-1][1] = end |
|
134 |
println pnCount + " paragraph(s) found." |
|
135 |
|
|
136 |
// CORPUS ANALEC (GET THE ANNOTATIONS) |
|
137 |
|
|
138 |
// note that unit_type has been defined with an option of the dialog at the beginning |
|
139 |
def analecCorpus = URSCorpora.getCorpus(corpus); |
|
140 |
|
|
141 |
// list of properties |
|
142 |
|
|
143 |
struct = analecCorpus.getStructure(); |
|
144 |
propertyList = struct.getUniteProperties(unit_type); |
|
145 |
|
|
146 |
// export to file (corpus) |
|
147 |
|
|
148 |
corpusFilename = filename + ".ac"; |
|
149 |
def corpusFile = new File(corpusFilename); |
|
150 |
corpusFile.write(rawText) |
|
151 |
println("Corpus written to `"+corpusFilename+"'."); |
|
152 |
|
|
153 |
// export to file (annotations) |
|
154 |
|
|
155 |
annotFilename = filename + ".aa"; |
|
156 |
def annotFile = new File(annotFilename) |
|
157 |
annotFile.write("<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n<annotations>\n") |
|
158 |
// erase (if you use << you don't erase!) |
|
159 |
def counter = 0 |
|
160 |
|
|
161 |
// export paragraphs |
|
162 |
for (def i=0; i<pars.size(); i++) { |
|
163 |
def start = pars[i][0] |
|
164 |
def end = pars[i][1] |
|
165 |
annotFile << "<unit id=\"me_"+counter+"\">\n"; |
|
166 |
annotFile << "<metadata><author>me</author><creation-date>"+counter+"</creation-date></metadata>\n"; |
|
167 |
annotFile << "<characterisation><type>paragraph</type><featureSet /></characterisation>\n"; |
|
168 |
annotFile << "<positioning><start><singlePosition index=\""+start+"\" /></start><end><singlePosition index=\""+end+"\" /></end></positioning>\n"; |
|
169 |
annotFile << "</unit>\n"; |
|
170 |
counter++; |
|
171 |
} |
|
172 |
|
|
173 |
// export units |
|
174 |
def units = analecCorpus.getUnites(unit_type); |
|
175 |
//units.sort() { a, b -> a.getDeb() <=> b.getDeb() ?: a.getFin() <=> b.getFin() }; |
|
176 |
def unitCount = 0 |
|
177 |
for (Unite unit : units) { |
|
178 |
unitCount++; |
|
179 |
annotFile << "<unit id=\"me_"+counter+"\">\n"; |
|
180 |
annotFile << "<metadata><author>me</author><creation-date>"+counter+"</creation-date></metadata>\n"; |
|
181 |
annotFile << "<characterisation>\n"; |
|
182 |
annotFile << "<type>"+unit_type+"</type>\n"; |
|
183 |
annotFile << "<featureSet>\n"; |
|
184 |
for (String propertyName : propertyList) { |
|
185 |
annotFile << "<feature name=\""+propertyName+"\">"+unit.getProp(propertyName)+"</feature>\n"; |
|
186 |
} |
|
187 |
annotFile << "</featureSet>\n"; |
|
188 |
annotFile << "</characterisation>\n"; |
|
189 |
start = positions[unit.getDeb()][0] |
|
190 |
end = positions[unit.getFin()][1] |
|
191 |
annotFile << "<positioning><start><singlePosition index=\""+start+"\" /></start><end><singlePosition index=\""+end+"\" /></end></positioning>\n"; |
|
192 |
annotFile << "</unit>\n"; |
|
193 |
counter++; |
|
194 |
} |
|
195 |
annotFile << "</annotations>\n"; |
|
196 |
|
|
197 |
println unitCount + " unit(s) found." |
|
198 |
|
|
199 |
println("Annotations written to `"+annotFilename+"'."); |
|
200 |
} |
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/export/ExportToGlozzMacro.groovy (revision 2174) | ||
---|---|---|
1 |
package org.txm.macro.urs.export |
|
2 |
|
|
3 |
// @author: Bruno Oberlé |
|
4 |
// v1.0.0 2017-08-28 |
|
5 |
|
|
6 |
/* |
|
7 |
Cette macro exporte le corpus sélectionné et ses annotations vers deux fichiers de format Glozz: |
|
8 |
- un fichier .ac contenant le corpus brut, |
|
9 |
- un fichier .aa contenant les annotations au format XML utilisé par Glozz. |
|
10 |
Le corpus sélectionné dans TXM devrait contenir une structure Analec avec au moins un type d'unité défini (e.g. MENTION, maillon, etc.). S'il n'y a pas de structure, |
|
11 |
ce n'est pas grave: le fichier est exporter, mais aucune annotation n'est créer. Cela permet d'exporter n'importe quel corpus au format Glozz. |
|
12 |
Pour exporter un texte au format Glozz *sans* les annotations qu'il contient, simplement mettre un unit_type qui n'existe pas (e.g. "foobar" au lieu de "MENTION"). |
|
13 |
La macro ne produit pour l'instant pas automatiquement de modèle Glozz (fichier .aam). Cela n'est pas un problème pour ouvrir le résultat dans Glozz ou Analec. |
|
14 |
*/ |
|
15 |
|
|
16 |
// STANDARD DECLARATIONS |
|
17 |
|
|
18 |
import org.apache.commons.lang.* |
|
19 |
import org.kohsuke.args4j.* |
|
20 |
import groovy.transform.* |
|
21 |
import org.txm.* |
|
22 |
import org.txm.rcp.swt.widget.parameters.* |
|
23 |
import org.txm.annotation.urs.* |
|
24 |
import org.txm.searchengine.cqp.* |
|
25 |
import org.txm.searchengine.cqp.corpus.* |
|
26 |
import visuAnalec.Message.* |
|
27 |
import visuAnalec.donnees.* |
|
28 |
import visuAnalec.elements.* |
|
29 |
import visuAnalec.vue.* |
|
30 |
|
|
31 |
// CHECK CORPUS |
|
32 |
|
|
33 |
if (!(corpusViewSelection instanceof MainCorpus)) { |
|
34 |
println "Corpora selection is not a MainCorpus: "+corpusViewSelection |
|
35 |
return; |
|
36 |
} |
|
37 |
|
|
38 |
// BEGINNING OF PARAMETERS |
|
39 |
|
|
40 |
@Field @Option(name="unit_type",usage="", widget="String", required=true, def="MENTION") |
|
41 |
String unit_type |
|
42 |
|
|
43 |
@Field @Option(name="filename",usage="", widget="String", required=true, def="filename without extension (.ac/.aa)") |
|
44 |
String filename |
|
45 |
|
|
46 |
if (!ParametersDialog.open(this)) return; |
|
47 |
|
|
48 |
// VARIABLES |
|
49 |
|
|
50 |
corpus = corpusViewSelection |
|
51 |
|
|
52 |
doExport(corpus, unit_type, filename) |
|
53 |
|
|
54 |
public void doExport(MainCorpus corpus, String unit_type, String filename) { |
|
55 |
|
|
56 |
size = corpus.getSize() // you may also use: corpus.getTextEndLimits() (= index of last token = size-1) |
|
57 |
CQI = CQPSearchEngine.getCqiClient() |
|
58 |
word = corpus.getWordProperty() |
|
59 |
// note: using "lbn" seems to work better than "pn" (some imported corpora are missing this information) |
|
60 |
//pn = corpus.getProperty("pn") |
|
61 |
//if (pn == null) pn = corpus.getProperty("div") |
|
62 |
pn = corpus.getProperty("lbn") |
|
63 |
|
|
64 |
// BUILD THE RAW TEXT, THE POSITIONS AND FIND THE PARAGRAPHS |
|
65 |
|
|
66 |
rawText = "" // the corpus for the .ac file |
|
67 |
positions = [] // each element is an array [start, end] indicating the position in the rawText |
|
68 |
pnCount = 0 // the par counter, used for indexing the pns array |
|
69 |
lastPn = -1 // the last paragraph number |
|
70 |
pns = [] // each element is an array [start, end] representing the start and end of the paragraph in the rawText |
|
71 |
for (def i=0; i<size; i++) { |
|
72 |
f = CQI.cpos2Str(word.getQualifiedName(), (int[])[i])[0] |
|
73 |
if (pn == null) { |
|
74 |
p = 1 |
|
75 |
} else { |
|
76 |
p = CQI.cpos2Str(pn.getQualifiedName(), (int[])[i])[0] |
|
77 |
} |
|
78 |
start = rawText.length() |
|
79 |
rawText += f |
|
80 |
if (lastPn != p) { |
|
81 |
pnCount++; |
|
82 |
if (pnCount > 1) { |
|
83 |
pns[pnCount-2][1] = end |
|
84 |
} |
|
85 |
pns[pnCount-1] = [start, 0] |
|
86 |
} |
|
87 |
lastPn = p |
|
88 |
end = rawText.length() // must be after setting it up in pns! |
|
89 |
if (i != size-1) rawText += " " |
|
90 |
positions[i] = [start, end] |
|
91 |
} |
|
92 |
pns[pnCount-1][1] = end |
|
93 |
println pnCount + " paragraph(s) found." |
|
94 |
|
|
95 |
|
|
96 |
|
|
97 |
// CORPUS ANALEC (GET THE ANNOTATIONS) |
|
98 |
|
|
99 |
// note that unit_type has been defined with an option of the dialog at the beginning |
|
100 |
def analecCorpus = URSCorpora.getCorpus(corpus); |
|
101 |
|
|
102 |
// list of properties |
|
103 |
|
|
104 |
struct = analecCorpus.getStructure(); |
|
105 |
propertyList = struct.getUniteProperties(unit_type); |
|
106 |
|
|
107 |
// export to file (corpus) |
|
108 |
|
|
109 |
corpusFilename = filename + ".ac"; |
|
110 |
def corpusFile = new File(corpusFilename); |
|
111 |
corpusFile.write(rawText) |
|
112 |
println("Corpus written to `"+corpusFilename+"'."); |
|
113 |
|
|
114 |
// export to file (annotations) |
|
115 |
|
|
116 |
annotFilename = filename + ".aa"; |
|
117 |
def annotFile = new File(annotFilename) |
|
118 |
annotFile.write("<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n<annotations>\n") // erase |
|
119 |
def counter = 0 |
|
120 |
|
|
121 |
// export paragraphs |
|
122 |
for (def i=0; i<pns.size(); i++) { |
|
123 |
def start = pns[i][0] |
|
124 |
def end = pns[i][1] |
|
125 |
annotFile << "<unit id=\"me_"+counter+"\">\n"; |
|
126 |
annotFile << "<metadata><author>me</author><creation-date>"+counter+"</creation-date></metadata>\n"; |
|
127 |
annotFile << "<characterisation><type>paragraph</type><featureSet /></characterisation>\n"; |
|
128 |
annotFile << "<positioning><start><singlePosition index=\""+start+"\" /></start><end><singlePosition index=\""+end+"\" /></end></positioning>\n"; |
|
129 |
annotFile << "</unit>\n"; |
|
130 |
counter++; |
|
131 |
} |
|
132 |
|
|
133 |
// export units |
|
134 |
def units = analecCorpus.getUnites(unit_type); |
|
135 |
//units.sort() { a, b -> a.getDeb() <=> b.getDeb() ?: a.getFin() <=> b.getFin() }; |
|
136 |
def unitCount = 0 |
|
137 |
for (Unite unit : units) { |
|
138 |
unitCount++; |
|
139 |
annotFile << "<unit id=\"me_"+counter+"\">\n"; |
|
140 |
annotFile << "<metadata><author>me</author><creation-date>"+counter+"</creation-date></metadata>\n"; |
|
141 |
annotFile << "<characterisation>\n"; |
|
142 |
annotFile << "<type>"+unit_type+"</type>\n"; |
|
143 |
annotFile << "<featureSet>\n"; |
|
144 |
for (String propertyName : propertyList) { |
|
145 |
annotFile << "<feature name=\""+propertyName+"\">"+unit.getProp(propertyName)+"</feature>\n"; |
|
146 |
} |
|
147 |
annotFile << "</featureSet>\n"; |
|
148 |
annotFile << "</characterisation>\n"; |
|
149 |
start = positions[unit.getDeb()][0] |
|
150 |
end = positions[unit.getFin()][1] |
|
151 |
annotFile << "<positioning><start><singlePosition index=\""+start+"\" /></start><end><singlePosition index=\""+end+"\" /></end></positioning>\n"; |
|
152 |
annotFile << "</unit>\n"; |
|
153 |
counter++; |
|
154 |
} |
|
155 |
annotFile << "</annotations>\n"; |
|
156 |
|
|
157 |
println unitCount + " unit(s) found." |
|
158 |
|
|
159 |
println("Annotations written to `"+annotFilename+"'."); |
|
160 |
} |
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/export/ExportAsMacro.groovy (revision 2174) | ||
---|---|---|
1 |
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté |
|
2 |
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses) |
|
3 |
// @author mdecorde |
|
4 |
// @author sheiden |
|
5 |
// STANDARD DECLARATIONS |
|
6 |
package org.txm.macro.urs.export |
|
7 |
|
|
8 |
import groovy.transform.Field |
|
9 |
|
|
10 |
import org.jfree.chart.JFreeChart |
|
11 |
import org.kohsuke.args4j.* |
|
12 |
import org.txm.Toolbox |
|
13 |
import org.txm.annotation.urs.* |
|
14 |
import org.txm.macro.urs.AnalecUtils |
|
15 |
import org.txm.rcp.Application |
|
16 |
import org.txm.rcp.IImageKeys |
|
17 |
import org.txm.rcp.swt.widget.parameters.* |
|
18 |
import org.txm.searchengine.cqp.corpus.* |
|
19 |
import org.txm.searchengine.cqp.corpus.query.CQLQuery |
|
20 |
import org.txm.utils.io.FileCopy; |
|
21 |
import org.txm.utils.io.IOUtils |
|
22 |
import org.txm.utils.zip.Zip |
|
23 |
|
|
24 |
import visuAnalec.elements.* |
|
25 |
|
|
26 |
def scriptName = this.class.getSimpleName() |
|
27 |
if (!(corpusViewSelection instanceof MainCorpus)) { |
|
28 |
println "** $scriptName please select a MainCorpus to run the macro" |
|
29 |
return; |
|
30 |
} |
|
31 |
|
|
32 |
@Field @Option(name="new_name", usage="Corpus name in uppercase", widget="String", required=true, def="CORPUSNAME") |
|
33 |
String new_name |
|
34 |
if (!ParametersDialog.open(this)) return |
|
35 |
|
|
36 |
new_name = new_name.toUpperCase() |
|
37 |
def pattern = "[A-Z][-A-Z0-9]{1,20}" |
|
38 |
if (!new_name.matches(pattern)) { |
|
39 |
println "New corpus name not conformant to CQP corpus name: "+pattern |
|
40 |
return false |
|
41 |
} |
|
42 |
|
|
43 |
MainCorpus mainCorpus = corpusViewSelection.getMainCorpus() |
|
44 |
String name = mainCorpus.getName() |
|
45 |
if (mainCorpus.isModified()) { |
|
46 |
println "Selected corpus is not saved. Aborting" |
|
47 |
return false |
|
48 |
} |
|
49 |
|
|
50 |
visuAnalec.donnees.Corpus analecCorpus = URSCorpora.getCorpus(mainCorpus) |
|
51 |
if (analecCorpus.isModifie()) { |
|
52 |
println "Selected Analec corpus is not saved. Aborting" |
|
53 |
return false |
|
54 |
} |
|
55 |
|
|
56 |
File binDirectory = mainCorpus.getProjectDirectory() |
|
57 |
String binName = binDirectory.getName() |
|
58 |
|
|
59 |
|
|
60 |
File newBinDirectory = new File(binDirectory.getParentFile(), new_name) |
|
61 |
|
|
62 |
if (newBinDirectory.exists()) { |
|
63 |
println "The new corpus directory already exists: $newBinDirectory. Aborting." |
|
64 |
return false |
|
65 |
} |
|
66 |
|
|
67 |
FileCopy.copyFiles(binDirectory, newBinDirectory) |
|
68 |
if (!newBinDirectory.exists()) { |
|
69 |
println "Fail to copy binary directory $binDirectory to $newBinDirectory" |
|
70 |
return |
|
71 |
} |
|
72 |
|
|
73 |
File ecFile = new File(newBinDirectory, "analec/${name}.ec") |
|
74 |
File ecvFile = new File(newBinDirectory, "analec/${name}.ecv") |
|
75 |
File cssFile = new File(newBinDirectory, "css/${name}.css") |
|
76 |
File dataFile = new File(newBinDirectory, "data/${name}") |
|
77 |
File htmlFile = new File(newBinDirectory, "HTML/${name}") |
|
78 |
File defaultCSSFile = new File(newBinDirectory, "HTML/${name}/default/css/${name}.css") |
|
79 |
File registryFile = new File(newBinDirectory, "registry/${name.toLowerCase()}") |
|
80 |
File txmFile = new File(newBinDirectory, "txm/${name}") |
|
81 |
|
|
82 |
File ecFile2 = new File(newBinDirectory, "analec/${new_name}.ec") |
|
83 |
File ecvFile2 = new File(newBinDirectory, "analec/${new_name}.ecv") |
|
84 |
File cssFile2 = new File(newBinDirectory, "css/${new_name}.css") |
|
85 |
File dataFile2 = new File(newBinDirectory, "data/${new_name}") |
|
86 |
File htmlFile2 = new File(newBinDirectory, "HTML/${new_name}") |
|
87 |
File defaultCSSFile2 = new File(newBinDirectory, "HTML/${new_name}/default/css/${new_name}.css") |
|
88 |
File registryFile2 = new File(newBinDirectory, "registry/${new_name.toLowerCase()}") |
|
89 |
File txmFile2 = new File(newBinDirectory, "txm/${new_name}") |
|
90 |
|
|
91 |
println "renaming $ecFile : "+ecFile.renameTo(ecFile2) |
|
92 |
println "renaming $ecvFile : "+ecvFile.renameTo(ecvFile2) |
|
93 |
println "renaming $cssFile : "+cssFile.renameTo(cssFile2) |
|
94 |
println "renaming $dataFile : "+dataFile.renameTo(dataFile2) |
|
95 |
println "renaming $htmlFile : "+htmlFile.renameTo(htmlFile2) |
|
96 |
println "renaming $defaultCSSFile : "+defaultCSSFile.renameTo(defaultCSSFile2) |
|
97 |
println "renaming $registryFile : "+registryFile.renameTo(registryFile2) |
|
98 |
println "renaming $txmFile : "+txmFile.renameTo(txmFile2) |
|
99 |
|
|
100 |
// patch name in settings |
|
101 |
println "replacing old name $name -> ${new_name} in preferences" |
|
102 |
File settingsDirectory = new File(newBinDirectory, ".settings") |
|
103 |
for (File prefFile : settingsDirectory.listFiles()) { |
|
104 |
IOUtils.write(prefFile, prefFile.getText().replace(name, new_name)) |
|
105 |
} |
|
106 |
|
|
107 |
File projectSetting = new File(newBinDirectory, ".project") |
|
108 |
IOUtils.write(projectSetting, projectSetting.getText().replace(name, new_name)) |
|
109 |
|
|
110 |
// patch registry |
|
111 |
String oldcontent = registryFile2.getText(); |
|
112 |
content = oldcontent.replace(name, new_name) |
|
113 |
content = content.replace(name.toLowerCase(), new_name.toLowerCase()) |
|
114 |
registryFile2.withWriter { writer -> |
|
115 |
writer.write(content) |
|
116 |
} |
|
117 |
|
|
118 |
|
|
119 |
File zipFile = new File(newBinDirectory.getAbsolutePath()+".txm") |
|
120 |
Zip.compress(newBinDirectory, zipFile, monitor) |
|
121 |
|
|
122 |
if (!zipFile.exists()) { |
|
123 |
println "Fail to zip binary directory $binDirectory to $zipFile" |
|
124 |
return |
|
125 |
} else { |
|
126 |
println "Done: $zipFile" |
|
127 |
} |
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/export/ExportUnitsToGlozz.groovy (revision 2174) | ||
---|---|---|
1 |
package org.txm.macro.urs.export |
|
2 |
|
|
3 |
// @author: Bruno Oberlé |
|
4 |
// v1.0.0 2017-08-28 |
|
5 |
|
|
6 |
/* |
|
7 |
Cette macro exporte le corpus sélectionné et ses annotations vers deux fichiers de format Glozz: |
|
8 |
- un fichier .ac contenant le corpus brut, |
|
9 |
- un fichier .aa contenant les annotations au format XML utilisé par Glozz. |
|
10 |
Le corpus sélectionné dans TXM devrait contenir une structure Analec avec au moins un type d'unité défini (e.g. MENTION, maillon, etc.). S'il n'y a pas de structure, |
|
11 |
ce n'est pas grave: le fichier est exporter, mais aucune annotation n'est créer. Cela permet d'exporter n'importe quel corpus au format Glozz. |
|
12 |
Pour exporter un texte au format Glozz *sans* les annotations qu'il contient, simplement mettre un unit_type qui n'existe pas (e.g. "foobar" au lieu de "MENTION"). |
|
13 |
La macro ne produit pour l'instant pas automatiquement de modèle Glozz (fichier .aam). Cela n'est pas un problème pour ouvrir le résultat dans Glozz ou Analec. |
|
14 |
*/ |
|
15 |
|
|
16 |
// STANDARD DECLARATIONS |
|
17 |
|
|
18 |
import org.apache.commons.lang.* |
|
19 |
import org.kohsuke.args4j.* |
|
20 |
import groovy.transform.* |
|
21 |
import org.txm.* |
|
22 |
import org.txm.rcp.swt.widget.parameters.* |
|
23 |
import org.txm.annotation.urs.* |
|
24 |
import org.txm.searchengine.cqp.* |
|
25 |
import org.txm.searchengine.cqp.corpus.* |
|
26 |
import visuAnalec.Message.* |
|
27 |
import visuAnalec.donnees.* |
|
28 |
import visuAnalec.elements.* |
|
29 |
import visuAnalec.vue.* |
|
30 |
|
|
31 |
// CHECK CORPUS |
|
32 |
|
|
33 |
if (!(corpusViewSelection instanceof MainCorpus)) { |
|
34 |
println "Corpora selection is not a MainCorpus: "+corpusViewSelection |
|
35 |
return; |
|
36 |
} |
|
37 |
|
|
38 |
// BEGINNING OF PARAMETERS |
|
39 |
|
|
40 |
@Field @Option(name="unit_type",usage="", widget="String", required=true, def="MENTION") |
|
41 |
String unit_type |
|
42 |
|
|
43 |
@Field @Option(name="filename",usage="", widget="String", required=true, def="filename without extension (.ac/.aa)") |
|
44 |
String filename |
|
45 |
|
|
46 |
if (!ParametersDialog.open(this)) return; |
|
47 |
|
|
48 |
// VARIABLES |
|
49 |
|
|
50 |
corpus = corpusViewSelection |
|
51 |
|
|
52 |
doExport(corpus, unit_type, filename) |
|
53 |
|
|
54 |
public void doExport(MainCorpus corpus, String unit_type, String filename) { |
|
55 |
|
|
56 |
size = corpus.getSize() // you may also use: corpus.getTextEndLimits() (= index of last token = size-1) |
|
57 |
CQI = CQPSearchEngine.getCqiClient() |
|
58 |
word = corpus.getWordProperty() |
|
59 |
// note: using "lbn" seems to work better than "pn" (some imported corpora are missing this information) |
|
60 |
//pn = corpus.getProperty("pn") |
|
61 |
//if (pn == null) pn = corpus.getProperty("div") |
|
62 |
pn = corpus.getProperty("lbn") |
|
63 |
|
|
64 |
// BUILD THE RAW TEXT, THE POSITIONS AND FIND THE PARAGRAPHS |
|
65 |
|
|
66 |
rawText = "" // the corpus for the .ac file |
|
67 |
positions = [] // each element is an array [start, end] indicating the position in the rawText |
|
68 |
pnCount = 0 // the par counter, used for indexing the pns array |
|
69 |
lastPn = -1 // the last paragraph number |
|
70 |
pns = [] // each element is an array [start, end] representing the start and end of the paragraph in the rawText |
|
71 |
for (def i=0; i<size; i++) { |
|
72 |
f = CQI.cpos2Str(word.getQualifiedName(), (int[])[i])[0] |
|
73 |
if (pn == null) { |
|
74 |
p = 1 |
|
75 |
} else { |
|
76 |
p = CQI.cpos2Str(pn.getQualifiedName(), (int[])[i])[0] |
|
77 |
} |
|
78 |
start = rawText.length() |
|
79 |
rawText += f |
|
80 |
if (lastPn != p) { |
|
81 |
pnCount++; |
|
82 |
if (pnCount > 1) { |
|
83 |
pns[pnCount-2][1] = end |
|
84 |
} |
|
85 |
pns[pnCount-1] = [start, 0] |
|
86 |
} |
|
87 |
lastPn = p |
|
88 |
end = rawText.length() // must be after setting it up in pns! |
|
89 |
if (i != size-1) rawText += " " |
|
90 |
positions[i] = [start, end] |
|
91 |
} |
|
92 |
pns[pnCount-1][1] = end |
|
93 |
println pnCount + " paragraph(s) found." |
|
94 |
|
|
95 |
|
|
96 |
|
|
97 |
// CORPUS ANALEC (GET THE ANNOTATIONS) |
|
98 |
|
|
99 |
// note that unit_type has been defined with an option of the dialog at the beginning |
|
100 |
def analecCorpus = URSCorpora.getCorpus(corpus); |
|
101 |
|
|
102 |
// list of properties |
|
103 |
|
|
104 |
struct = analecCorpus.getStructure(); |
|
105 |
propertyList = struct.getUniteProperties(unit_type); |
|
106 |
|
|
107 |
// export to file (corpus) |
|
108 |
|
|
109 |
corpusFilename = filename + ".ac"; |
|
110 |
def corpusFile = new File(corpusFilename); |
|
111 |
corpusFile.write(rawText) |
|
112 |
println("Corpus written to `"+corpusFilename+"'."); |
|
113 |
|
|
114 |
// export to file (annotations) |
|
115 |
|
|
116 |
annotFilename = filename + ".aa"; |
|
117 |
def annotFile = new File(annotFilename) |
|
118 |
annotFile.write("<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n<annotations>\n") // erase |
|
119 |
def counter = 0 |
|
120 |
|
|
121 |
// export paragraphs |
|
122 |
for (def i=0; i<pns.size(); i++) { |
|
123 |
def start = pns[i][0] |
|
124 |
def end = pns[i][1] |
|
125 |
annotFile << "<unit id=\"me_"+counter+"\">\n"; |
|
126 |
annotFile << "<metadata><author>me</author><creation-date>"+counter+"</creation-date></metadata>\n"; |
|
127 |
annotFile << "<characterisation><type>paragraph</type><featureSet /></characterisation>\n"; |
|
128 |
annotFile << "<positioning><start><singlePosition index=\""+start+"\" /></start><end><singlePosition index=\""+end+"\" /></end></positioning>\n"; |
|
129 |
annotFile << "</unit>\n"; |
|
130 |
counter++; |
|
131 |
} |
|
132 |
|
|
133 |
// export units |
|
134 |
def units = analecCorpus.getUnites(unit_type); |
|
135 |
//units.sort() { a, b -> a.getDeb() <=> b.getDeb() ?: a.getFin() <=> b.getFin() }; |
|
136 |
def unitCount = 0 |
|
137 |
for (Unite unit : units) { |
|
138 |
unitCount++; |
|
139 |
annotFile << "<unit id=\"me_"+counter+"\">\n"; |
|
140 |
annotFile << "<metadata><author>me</author><creation-date>"+counter+"</creation-date></metadata>\n"; |
|
141 |
annotFile << "<characterisation>\n"; |
|
142 |
annotFile << "<type>"+unit_type+"</type>\n"; |
|
143 |
annotFile << "<featureSet>\n"; |
|
144 |
for (String propertyName : propertyList) { |
|
145 |
annotFile << "<feature name=\""+propertyName+"\">"+unit.getProp(propertyName)+"</feature>\n"; |
|
146 |
} |
|
147 |
annotFile << "</featureSet>\n"; |
|
148 |
annotFile << "</characterisation>\n"; |
|
149 |
start = positions[unit.getDeb()][0] |
|
150 |
end = positions[unit.getFin()][1] |
|
151 |
annotFile << "<positioning><start><singlePosition index=\""+start+"\" /></start><end><singlePosition index=\""+end+"\" /></end></positioning>\n"; |
|
152 |
annotFile << "</unit>\n"; |
|
153 |
counter++; |
|
154 |
} |
|
155 |
annotFile << "</annotations>\n"; |
|
156 |
|
|
157 |
println unitCount + " unit(s) found." |
|
158 |
|
|
159 |
println("Annotations written to `"+annotFilename+"'."); |
|
160 |
} |
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/export/ExportCorpusAsMacro.groovy (revision 2174) | ||
---|---|---|
1 |
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté |
|
2 |
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses) |
|
3 |
// @author mdecorde |
|
4 |
// @author sheiden |
|
5 |
// STANDARD DECLARATIONS |
|
6 |
package org.txm.macro.urs.export |
|
7 |
|
|
8 |
import groovy.transform.Field |
|
9 |
|
|
10 |
import org.jfree.chart.JFreeChart |
|
11 |
import org.kohsuke.args4j.* |
|
12 |
import org.txm.Toolbox |
|
13 |
import org.txm.annotation.urs.* |
|
14 |
import org.txm.macro.urs.AnalecUtils |
|
15 |
import org.txm.rcp.Application |
|
16 |
import org.txm.rcp.IImageKeys |
|
17 |
import org.txm.rcp.swt.widget.parameters.* |
|
18 |
import org.txm.searchengine.cqp.corpus.* |
|
19 |
import org.txm.searchengine.cqp.corpus.query.CQLQuery |
|
20 |
import org.txm.utils.io.FileCopy; |
|
21 |
import org.txm.utils.io.IOUtils |
|
22 |
import org.txm.utils.zip.Zip |
|
23 |
|
|
24 |
import visuAnalec.elements.* |
|
25 |
|
|
26 |
def scriptName = this.class.getSimpleName() |
|
27 |
if (!(corpusViewSelection instanceof MainCorpus)) { |
|
28 |
println "** $scriptName please select a MainCorpus to run the macro" |
|
29 |
return; |
|
30 |
} |
|
31 |
|
|
32 |
@Field @Option(name="new_name", usage="Corpus name in uppercase", widget="String", required=true, def="CORPUSNAME") |
|
33 |
String new_name |
|
34 |
if (!ParametersDialog.open(this)) return |
|
35 |
|
|
36 |
new_name = new_name.toUpperCase() |
|
37 |
def pattern = "[A-Z][-A-Z0-9]{1,20}" |
|
38 |
if (!new_name.matches(pattern)) { |
|
39 |
println "New corpus name not conformant to CQP corpus name: "+pattern |
|
40 |
return false |
|
41 |
} |
|
42 |
|
|
43 |
MainCorpus mainCorpus = corpusViewSelection.getMainCorpus() |
|
44 |
String name = mainCorpus.getName() |
|
45 |
if (mainCorpus.isModified()) { |
|
46 |
println "Selected corpus is not saved. Aborting" |
|
47 |
return false |
|
48 |
} |
|
49 |
|
|
50 |
visuAnalec.donnees.Corpus analecCorpus = URSCorpora.getCorpus(mainCorpus) |
|
51 |
if (analecCorpus.isModifie()) { |
|
52 |
println "Selected Analec corpus is not saved. Aborting" |
|
53 |
return false |
|
54 |
} |
|
55 |
|
|
56 |
File binDirectory = mainCorpus.getProjectDirectory() |
|
57 |
String binName = binDirectory.getName() |
|
58 |
|
|
59 |
|
|
60 |
File newBinDirectory = new File(binDirectory.getParentFile(), new_name) |
|
61 |
|
|
62 |
if (newBinDirectory.exists()) { |
|
63 |
println "The new corpus directory already exists: $newBinDirectory. Aborting." |
|
64 |
return false |
|
65 |
} |
|
66 |
|
|
67 |
FileCopy.copyFiles(binDirectory, newBinDirectory) |
|
68 |
if (!newBinDirectory.exists()) { |
|
69 |
println "Fail to copy binary directory $binDirectory to $newBinDirectory" |
|
70 |
return |
|
71 |
} |
|
72 |
|
|
73 |
File ecFile = new File(newBinDirectory, "analec/${name}.ec") |
|
74 |
File ecvFile = new File(newBinDirectory, "analec/${name}.ecv") |
|
75 |
File cssFile = new File(newBinDirectory, "css/${name}.css") |
|
76 |
File dataFile = new File(newBinDirectory, "data/${name}") |
|
77 |
File htmlFile = new File(newBinDirectory, "HTML/${name}") |
|
78 |
File defaultCSSFile = new File(newBinDirectory, "HTML/${name}/default/css/${name}.css") |
|
79 |
File registryFile = new File(newBinDirectory, "registry/${name.toLowerCase()}") |
|
80 |
File txmFile = new File(newBinDirectory, "txm/${name}") |
|
81 |
|
|
82 |
File ecFile2 = new File(newBinDirectory, "analec/${new_name}.ec") |
|
83 |
File ecvFile2 = new File(newBinDirectory, "analec/${new_name}.ecv") |
|
84 |
File cssFile2 = new File(newBinDirectory, "css/${new_name}.css") |
|
85 |
File dataFile2 = new File(newBinDirectory, "data/${new_name}") |
|
86 |
File htmlFile2 = new File(newBinDirectory, "HTML/${new_name}") |
|
87 |
File defaultCSSFile2 = new File(newBinDirectory, "HTML/${new_name}/default/css/${new_name}.css") |
|
88 |
File registryFile2 = new File(newBinDirectory, "registry/${new_name.toLowerCase()}") |
|
89 |
File txmFile2 = new File(newBinDirectory, "txm/${new_name}") |
|
90 |
|
|
91 |
println "renaming $ecFile : "+ecFile.renameTo(ecFile2) |
|
92 |
println "renaming $ecvFile : "+ecvFile.renameTo(ecvFile2) |
|
93 |
println "renaming $cssFile : "+cssFile.renameTo(cssFile2) |
|
94 |
println "renaming $dataFile : "+dataFile.renameTo(dataFile2) |
|
95 |
println "renaming $htmlFile : "+htmlFile.renameTo(htmlFile2) |
|
96 |
println "renaming $defaultCSSFile : "+defaultCSSFile.renameTo(defaultCSSFile2) |
|
97 |
println "renaming $registryFile : "+registryFile.renameTo(registryFile2) |
|
98 |
println "renaming $txmFile : "+txmFile.renameTo(txmFile2) |
|
99 |
|
|
100 |
// patch name in settings |
|
101 |
println "replacing old name $name -> ${new_name} in preferences" |
|
102 |
File settingsDirectory = new File(newBinDirectory, ".settings") |
|
103 |
for (File prefFile : settingsDirectory.listFiles()) { |
|
104 |
IOUtils.write(prefFile, prefFile.getText().replace(name, new_name)) |
|
105 |
} |
|
106 |
|
|
107 |
File projectSetting = new File(newBinDirectory, ".project") |
|
108 |
IOUtils.write(projectSetting, projectSetting.getText().replace(name, new_name)) |
|
109 |
|
|
110 |
// patch registry |
|
111 |
String oldcontent = registryFile2.getText(); |
|
112 |
content = oldcontent.replace(name, new_name) |
|
113 |
content = content.replace(name.toLowerCase(), new_name.toLowerCase()) |
|
114 |
registryFile2.withWriter { writer -> |
|
115 |
writer.write(content) |
|
116 |
} |
|
117 |
|
|
118 |
println "creating the TXM file..." |
|
119 |
File zipFile = new File(newBinDirectory.getAbsolutePath()+".txm") |
|
120 |
Zip.compress(newBinDirectory, zipFile, monitor) |
|
121 |
|
|
122 |
if (!zipFile.exists()) { |
|
123 |
println "Fail to zip binary directory $binDirectory to $zipFile" |
|
124 |
return |
|
125 |
} else { |
|
126 |
println "Done: $zipFile" |
|
127 |
} |
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/export/ExportToGlozz1_0_0.groovy (revision 2174) | ||
---|---|---|
1 |
package org.txm.macro.urs.export |
|
2 |
|
|
3 |
// @author: Bruno Oberlé |
|
4 |
// v1.0.0 2017-08-28 |
|
5 |
|
|
6 |
/* |
|
7 |
Cette macro exporte le corpus sélectionné et ses annotations vers deux fichiers de format Glozz: |
|
8 |
- un fichier .ac contenant le corpus brut, |
|
9 |
- un fichier .aa contenant les annotations au format XML utilisé par Glozz. |
|
10 |
Le corpus sélectionné dans TXM devrait contenir une structure Analec avec au moins un type d'unité défini (e.g. MENTION, maillon, etc.). S'il n'y a pas de structure, |
|
11 |
ce n'est pas grave: le fichier est exporter, mais aucune annotation n'est créer. Cela permet d'exporter n'importe quel corpus au format Glozz. |
|
12 |
Pour exporter un texte au format Glozz *sans* les annotations qu'il contient, simplement mettre un unit_type qui n'existe pas (e.g. "foobar" au lieu de "MENTION"). |
|
13 |
La macro ne produit pour l'instant pas automatiquement de modèle Glozz (fichier .aam). Cela n'est pas un problème pour ouvrir le résultat dans Glozz ou Analec. |
|
14 |
*/ |
|
15 |
|
|
16 |
// STANDARD DECLARATIONS |
|
17 |
|
|
18 |
import org.apache.commons.lang.* |
|
19 |
import org.kohsuke.args4j.* |
|
20 |
import groovy.transform.* |
|
21 |
import org.txm.* |
|
22 |
import org.txm.rcp.swt.widget.parameters.* |
|
23 |
import org.txm.annotation.urs.* |
|
24 |
import org.txm.searchengine.cqp.* |
|
25 |
import org.txm.searchengine.cqp.corpus.* |
|
26 |
import visuAnalec.Message.* |
|
27 |
import visuAnalec.donnees.* |
|
28 |
import visuAnalec.elements.* |
|
29 |
import visuAnalec.vue.* |
|
30 |
|
|
31 |
// CHECK CORPUS |
|
32 |
|
|
33 |
if (!(corpusViewSelection instanceof MainCorpus)) { |
|
34 |
println "Corpora selection is not a MainCorpus: "+corpusViewSelection |
|
35 |
return; |
|
36 |
} |
|
37 |
|
|
38 |
// BEGINNING OF PARAMETERS |
|
39 |
|
|
40 |
@Field @Option(name="unit_type",usage="", widget="String", required=true, def="MENTION") |
|
41 |
String unit_type |
|
42 |
|
|
43 |
@Field @Option(name="filename",usage="", widget="String", required=true, def="filename without extension (.ac/.aa)") |
|
44 |
String filename |
|
45 |
|
|
46 |
if (!ParametersDialog.open(this)) return; |
|
47 |
|
|
48 |
// VARIABLES |
|
49 |
|
|
50 |
corpus = corpusViewSelection |
|
51 |
|
|
52 |
doExport(corpus, unit_type, filename) |
|
53 |
|
|
54 |
public void doExport(MainCorpus corpus, String unit_type, String filename) { |
|
55 |
|
|
56 |
size = corpus.getSize() // you may also use: corpus.getTextEndLimits() (= index of last token = size-1) |
|
57 |
CQI = CQPSearchEngine.getCqiClient() |
|
58 |
word = corpus.getWordProperty() |
|
59 |
// note: using "lbn" seems to work better than "pn" (some imported corpora are missing this information) |
|
60 |
//pn = corpus.getProperty("pn") |
|
61 |
//if (pn == null) pn = corpus.getProperty("div") |
Formats disponibles : Unified diff