Révision 2174

tmp/org.txm.analec.rcp/plugin.xml (revision 2174)
50 50
                        id="export">
51 51
                  </dynamic>
52 52
               </menu>
53
               <command
54
                     commandId="org.txm.annotation.urs.commands.OpenDemocratTools"
55
                     style="push">
56
               </command>
53 57
            </menu>
54 58
            <command
55 59
                  commandId="org.txm.annotation.urs.commands.EditAnnotationStructure"
......
104 108
               label="%menu.label.0"
105 109
               mnemonic="%menu.mnemonic.0">
106 110
            <command
107
                  commandId="org.txm.annotation.urs.commands.LoadStructureFromGlozz"
111
                  commandId="org.txm.annotation.urs.commands.SaveCorpus"
108 112
                  style="push">
109 113
               <visibleWhen
110 114
                     checkEnabled="false">
......
113 117
                  </reference>
114 118
               </visibleWhen>
115 119
            </command>
120
            <menu
121
                  id="menu.urs.tools"
122
                  label="%menu.label.1">
123
               <menu
124
                     label="%menu.label.2">
125
                  <dynamic
126
                        class="org.txm.annotation.urs.commands.URSToolsMenuContribution"
127
                        id="edit">
128
                  </dynamic>
129
               </menu>
130
               <menu
131
                     label="%menu.label.3">
132
                  <dynamic
133
                        class="org.txm.annotation.urs.commands.URSToolsMenuContribution"
134
                        id="check">
135
                  </dynamic>
136
               </menu>
137
               <menu
138
                     label="%menu.label.4">
139
                  <dynamic
140
                        class="org.txm.annotation.urs.commands.URSToolsMenuContribution"
141
                        id="exploit">
142
                  </dynamic>
143
               </menu>
144
               <menu
145
                     label="%menu.label.5">
146
                  <dynamic
147
                        class="org.txm.annotation.urs.commands.URSToolsMenuContribution"
148
                        id="export">
149
                  </dynamic>
150
               </menu>
151
               <command
152
                     commandId="org.txm.annotation.urs.commands.OpenDemocratTools"
153
                     style="push">
154
               </command>
155
            </menu>
116 156
            <command
117 157
                  commandId="org.txm.annotation.urs.commands.EditAnnotationStructure"
118 158
                  style="push">
......
124 164
               </visibleWhen>
125 165
            </command>
126 166
            <command
127
                  commandId="org.txm.annotation.urs.commands.SaveCorpus"
167
                  commandId="org.txm.annotation.urs.commands.EditVue"
128 168
                  style="push">
129 169
               <visibleWhen
130 170
                     checkEnabled="false">
......
133 173
                  </reference>
134 174
               </visibleWhen>
135 175
            </command>
176
            <separator
177
                  name="URSRCP.separator1"
178
                  visible="true">
179
            </separator>
136 180
            <command
137
                  commandId="org.txm.annotation.urs.commands.ExportGlozzCorpus"
181
                  commandId="org.txm.annotation.urs.commands.LoadStructureFromGlozz"
138 182
                  style="push">
139 183
               <visibleWhen
140 184
                     checkEnabled="false">
......
144 188
               </visibleWhen>
145 189
            </command>
146 190
            <command
147
                  commandId="org.txm.annotation.urs.commands.ExportTEICorpus"
148
                  style="push">
149
               <visibleWhen
150
                     checkEnabled="false">
151
                  <reference
152
                        definitionId="OneCorpusSelected">
153
                  </reference>
154
               </visibleWhen>
155
            </command>
156
            <command
157 191
                  commandId="org.txm.annotation.urs.commands.ImportTEIAnnotations"
158 192
                  style="push">
159 193
               <visibleWhen
......
163 197
                  </reference>
164 198
               </visibleWhen>
165 199
            </command>
200
            <menu
201
                  label="Export">
202
               <command
203
                     commandId="org.txm.annotation.urs.commands.ExportTEICorpus"
204
                     label="%command.label.0"
205
                     style="push">
206
               </command>
207
               <command
208
                     commandId="org.txm.annotation.urs.commands.ExportGlozzCorpus"
209
                     label="%command.label.1"
210
                     style="push">
211
               </command>
212
            </menu>
166 213
         </menu>
167 214
      </menuContribution>
168 215
      <menuContribution
......
306 353
            id="org.txm.annotation.urs.commands.EditVue"
307 354
            name="%command.name.16">
308 355
      </command>
356
      <command
357
            categoryId="org.txm.rcp.category.txm"
358
            defaultHandler="org.txm.annotation.urs.commands.OpenDemocratTools"
359
            id="org.txm.annotation.urs.commands.OpenDemocratTools"
360
            name="Democrat tools">
361
      </command>
309 362
   </extension>
310 363
   <extension
311 364
         point="org.eclipse.ui.views">
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/DemoMacro.groovy (revision 2174)
1
package org.txm.macro.urs
2

  
3
import org.txm.annotation.urs.URSCorpora
4
import org.txm.searchengine.cqp.corpus.CQPCorpus
5
import visuAnalec.elements.*
6

  
7
// get the CQP corpus
8
if (!(corpusViewSelection instanceof CQPCorpus)) {
9
	println "Corpus view selection is no a corpus: $corpusViewSelection"
10
	return false;
11
}
12
def corpus = corpusViewSelection
13

  
14
// get the Analec corpus : stores the annotations and annotation structure
15

  
16
def analecCorpus = URSCorpora.getCorpus(corpus)
17

  
18
// get the annotations structure : stores the available annotation types, properties and values
19

  
20
def structure = analecCorpus.getStructure()
21

  
22
// get the annotations view : stores annotations and annotation structure display rules
23

  
24
def view = URSCorpora.getVue(corpus)
25

  
26
// set a default vue -> all is visible
27
view.retablirVueParDefaut()
28

  
29
// list the viewed types and properties
30
for (def type : view.getTypesAVoir(Unite.class)) {
31
	println "$type: "+view.getNomsChamps(Unite.class, type)
32
}
33

  
34
// manage the viewed&enabled type, properties ; the types and properties must be enabled to use the "view.setValeurChamp(TYPE, PROPERTY, VALUE)" method
35
view.ajouterType(Unite.class, "EXEMPLE")
36
view.ajouterProp(Unite.class, "EXEMPLE", "PEXEMPLE")
37

  
38
// display annotations per Element type (Unite, Relation, Schema) and per type
39

  
40
// Create annotation type
41

  
42
structure.ajouterType(Unite.class, "EXEMPLE");
43
//structure.ajouterType(Relation.class, "EXEMPLE");
44
//structure.ajouterType(Schema.class, "EXEMPLE");
45
println structure.getTypes(Unite.class)
46

  
47
// Create annotation property
48

  
49
structure.ajouterProp(Unite.class, "EXEMPLE", "PEXEMPLE")
50
println structure.getNomsProps(Unite.class, "EXEMPLE")
51

  
52
// Create annotation value
53

  
54
structure.ajouterVal(Unite.class, "EXEMPLE", "PEXEMPLE", "oui")
55
structure.ajouterVal(Unite.class, "EXEMPLE", "PEXEMPLE", "non")
56

  
57
// Create annotations
58

  
59
// Unite
60
def u = analecCorpus.addUniteSaisie("EXEMPLE", 0, 10, ["PEXEMPLE":"oui"])
61
def u2 = analecCorpus.addUniteSaisie("EXEMPLE", 12, 12, ["PEXEMPLE":"oui"])
62
println u
63

  
64
// Edit annotations
65

  
66
u.getProps()["PEXEMPLE"] = "non"
67

  
68
// Unit getters
69

  
70
println u.getDeb() // start of unit
71
println u.getFin() // end of unit
72

  
73
println u.getProp("EXEMPLE")
74
println u.getType()
75

  
76
// Relation
77
Relation relation = new Relation("REXEMPLE", u, u2)
78
relation.getProps().put("PEXEMPLE", "oui")
79
analecCorpus.addRelationLue(relation)  // add the new relation
80

  
81
// Schema
82
Schema schema = new Schema()
83
schema.type = "SEXEMPLE"
84
schema.props.put("PEXEMPLE", "oui")
85
schema.ajouter(u) // insert one unit
86

  
87
analecCorpus.addSchemaLu(schema)  // add the new schema
88

  
89
// Browse Units
90

  
91
println "Units:"
92
for (String type : structure.getUnites()) {
93
	def units = analecCorpus.getUnites(type)
94
	if (units.size() > 0) {
95
		println "	${units.size()} $type"
96
	}
97
}
98

  
99
// Browse Relations
100

  
101
println "Relations:"
102
for (String type : structure.getRelations()) {
103
	def relations = analecCorpus.getRelations(type)
104
	if (relations.size() > 0) {
105
		println "	${relations.size()} $type"
106
	}
107
}
108

  
109
// Browse Schemas
110

  
111
println "Schemas:"
112
for (String type : structure.getSchemas()) {
113
	def schemas = analecCorpus.getSchemas(type)
114
	if (schemas.size() > 0) {
115
		println "	${schemas.size()} $type"
116
	}
117
}
118

  
119
// URS selections
120

  
121
// select Schemas
122
def debug = 0 // 1 2 for more logs
123
def strict_inclusion = true
124
def position = 0
125
def minimum_schema_size = 1;
126
def maximum_schema_size = 10;
127
def schema_ursql = "SEXEMPLE"
128
def unit_ursql = "EXEMPLE@PEXEMPLE=oui"
129
def unit_type = "EXEMPLE" 
130
println AnalecUtils.selectSchemasInCorpus(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, maximum_schema_size, strict_inclusion)
131

  
132

  
133
// with URSQL
134
println AnalecUtils.filterElements(debug, analecCorpus.getUnites(unit_type), unit_ursql)
135

  
136
// with intersection with CQP corpus matches
137
println AnalecUtils.filterUniteByInclusion(debug, analecCorpus.getUnites(unit_type), corpus.getMatches(), strict_inclusion, position)
138

  
139
// by size
140
println AnalecUtils.filterBySize(analecCorpus.getSchemas(schema_ursql), minimum_schema_size, maximum_schema_size)
141

  
142
// Delete annotations
143

  
144
analecCorpus.supUnite(u)
145
analecCorpus.supUnite(u2)
146
analecCorpus.supRelation(relation)
147
analecCorpus.supSchema(schema)
148

  
149
// Delete annotation value
150

  
151
structure.supprimerVal(Unite.class, "EXEMPLE", "PEXEMPLE", "oui")
152
structure.supprimerVal(Unite.class, "EXEMPLE", "PEXEMPLE", "non")
153

  
154
// Delete annotation property
155

  
156
structure.supprimerProp(Unite.class, "EXEMPLE", "PEXEMPLE")
157
structure.supprimerProp(Relation.class, "REXEMPLE", "PEXEMPLE")
158
structure.supprimerProp(Schema.class, "SEXEMPLE", "PEXEMPLE")
159

  
160
// Delete annotation type
161

  
162
structure.supprimerType(Unite.class, "EXEMPLE");
163
structure.supprimerType(Relation.class, "REXEMPLE");
164
structure.supprimerType(Schema.class, "SEXEMPLE");
165

  
166
// Revert changes
167

  
168
//URSCorpora.revert(corpus);
169

  
170
// Save changes
171

  
172
//URSCorpora.saveCorpus(corpus)
173

  
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/democrat/SUJ-PARTINFMacro.groovy (revision 2174)
43 43
if (!ParametersDialog.open(this)) return
44 44

  
45 45
corpus = corpusViewSelection
46
CQI = Toolbox.getCqiClient()
46
CQI = CQPSearchEngine.getCqiClient()
47 47
word = corpus.getWordProperty()
48 48
posProperty = corpus.getProperty(pos_property_name)
49 49
if (posProperty == null) {
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/democrat/VerificationsMacro.groovy (revision 2174)
18 18
import org.kohsuke.args4j.*
19 19
import groovy.transform.*
20 20
import org.txm.*
21

  
21 22
import org.txm.rcpapplication.swt.widget.parameters.*
22
import org.txm.analec.*
23
import org.txm.urs.*
24
import org.txm.annotation.urs.*
23 25
import org.txm.searchengine.cqp.*
24 26
import org.txm.searchengine.cqp.corpus.*
25 27
import visuAnalec.Message.*
......
57 59
if (!ParametersDialog.open(this)) return
58 60

  
59 61
corpus = corpusViewSelection
60
CQI = Toolbox.getCqiClient()
62
CQI = CQPSearchEngine.getCqiClient()
61 63
word = corpus.getWordProperty()
62 64
posProperty = corpus.getProperty(pos_property_name)
63 65
if (posProperty == null) {
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/democrat/CreationAnaphoresMacro.groovy (revision 2174)
70 70
	units.sort() { a, b -> a.getDeb() <=> b.getDeb() ?: a.getFin() <=> b.getFin() } // sort them
71 71
	
72 72
	for (int i = 0 ; i < units.size() - 1 ; i++) { // build RELATIONS and don't process the last unit
73
		println "creating "+units[i+1]+", "+units[i]
73
		println "creating relation with "+units[i+1].getProps()+", "+units[i].getProps()
74 74
		Relation relation = new Relation("ANAPHORE", units[i+1], units[i])
75 75
		relation.getProps().put("TYPE", "COREFERENTE")
76 76
		analecCorpus.addRelationLue(relation)  // add the new relation
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/democrat/LongueurDesMentionsMacro.groovy (revision 2174)
107 107
	def prop = unit.getProp(LONGUEUR)
108 108
	if (!reset && prop != null && prop.length() > 0) continue // l'unité a déjà une LONGUEUR
109 109

  
110
		int[] positions = null
110
	int[] positions = null
111 111
	if (unit.getDeb() == unit.getFin()) positions = [unit.getDeb()]
112 112
	else positions = (unit.getDeb()..unit.getFin())
113 113

  
114
	def Mention = CQI.cpos2Str(posProperty.getQualifiedName(), positions)
115
	def cat = testRules(positions, Mention)
114
	//def Mention = CQI.cpos2Str(posProperty.getQualifiedName(), positions)
115
	def cat = testRules(positions, positions)
116 116

  
117 117
	if (cat != null) {
118 118
		vue.setValeurChamp(unit, LONGUEUR, cat)
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/democrat/Si2SingletonMacro.groovy (revision 2174)
26 26
}
27 27

  
28 28
// BEGINNING OF PARAMETERS
29
@Field @Option(name="unitType", usage="", widget="String", required=true, def="MENTION")
30
def unitType
31
@Field @Option(name="refPropertyName", usage="", widget="String", required=true, def="REF")
32
def refPropertyName
29
@Field @Option(name="unit_type", usage="", widget="String", required=true, def="MENTION")
30
def unit_type
31
@Field @Option(name="ref_property_name", usage="", widget="String", required=true, def="REF")
32
def ref_property_name
33 33
if (!ParametersDialog.open(this)) return
34 34

  
35 35
corpus = corpusViewSelection
......
38 38
analecCorpus = URSCorpora.getCorpus(corpus)
39 39
vue = URSCorpora.getVue(corpus)
40 40
structure = analecCorpus.getStructure()
41
if (!structure.getUnites().contains(unitType)) { // check if the structure contains the unitType units
42
	println "Error: corpus structure does not contains unit with name=$unitType"
41
if (!structure.getUnites().contains(unit_type)) { // check if the structure contains the unit_type units
42
	println "Error: corpus structure does not contains unit with name=$unit_type"
43 43
	return
44 44
}
45 45

  
46
if (!structure.getUniteProperties(unitType).contains(refPropertyName)) {
47
	println "Error: corpus structure does not contains property name=$unitType"
46
if (!structure.getUniteProperties(unit_type).contains(ref_property_name)) {
47
	println "Error: corpus structure does not contains property name=$unit_type"
48 48
	return
49 49
} 
50 50

  
51 51
def nModified = 0
52 52
def nIgnored = 0
53 53

  
54
def units = analecCorpus.getUnites(unitType)
54
def units = analecCorpus.getUnites(unit_type)
55 55
//units.sort() { a, b -> a.getDeb() <=> b.getDeb() ?: a.getFin() <=> b.getFin() }
56 56

  
57 57
def refSet = new HashSet()
58 58
for (Unite unit : units) { // process all units
59
	def prop = unit.getProp(refPropertyName)
59
	def prop = unit.getProp(ref_property_name)
60 60
	refSet.add(prop)
61 61
}
62 62

  
63 63
def counter = 1
64 64
for (Unite unit : units) { // process all units
65 65
	
66
	def prop = unit.getProp(refPropertyName)
66
	def prop = unit.getProp(ref_property_name)
67 67
	if (prop && prop == "SI") {
68 68
		def name = "SI_" + counter
69 69
		while (refSet.contains(name)) {
......
72 72
		}
73 73
		counter++
74 74
		//println "old prop"+ prop
75
		unit.getProps().put(refPropertyName, name)
75
		unit.getProps().put(ref_property_name, name)
76 76
		//println "new prop"+ name
77 77
		nModified++
78 78
	} else {
......
81 81
}
82 82

  
83 83
println "Result:"
84
println "- $nModified units of type $unitType have been modified."
85
println "- $nIgnored units of type $unitType have not been modified.\n"
84
println "- $nModified units of type $unit_type have been modified."
85
println "- $nIgnored units of type $unit_type have not been modified.\n"
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/export/ExportToGlozz1_0_0Macro.groovy (revision 2174)
1
package org.txm.macro.urs.export
2

  
3
// @author: Bruno Oberlé
4
// v1.0.0 2017-08-28
5

  
6
/*
7
 Cette macro exporte le corpus sélectionné et ses annotations vers deux fichiers de format Glozz:
8
 - un fichier .ac contenant le corpus brut,
9
 - un fichier .aa contenant les annotations au format XML utilisé par Glozz.
10
 Le corpus sélectionné dans TXM devrait contenir une structure Analec avec au moins un type d'unité défini (e.g. MENTION, maillon, etc.). S'il n'y a pas de structure,
11
 ce n'est pas grave: le fichier est exporter, mais aucune annotation n'est créer. Cela permet d'exporter n'importe quel corpus au format Glozz.
12
 Pour exporter un texte au format Glozz *sans* les annotations qu'il contient, simplement mettre un unit_type qui n'existe pas (e.g. "foobar" au lieu de "MENTION").
13
 La macro ne produit pour l'instant pas automatiquement de modèle Glozz (fichier .aam).  Cela n'est pas un problème pour ouvrir le résultat dans Glozz ou Analec.
14
 */
15

  
16
// STANDARD DECLARATIONS
17

  
18
import org.apache.commons.lang.*
19
import org.kohsuke.args4j.*
20
import groovy.transform.*
21
import org.txm.*
22
import org.txm.rcp.swt.widget.parameters.*
23
import org.txm.annotation.urs.*
24
import org.txm.searchengine.cqp.*
25
import org.txm.searchengine.cqp.corpus.*
26
import visuAnalec.Message.*
27
import visuAnalec.donnees.*
28
import visuAnalec.elements.*
29
import visuAnalec.vue.*
30

  
31
// CHECK CORPUS
32

  
33
if (!(corpusViewSelection instanceof MainCorpus)) {
34
	println "Corpora selection is not a MainCorpus: "+corpusViewSelection
35
	return;
36
}
37

  
38
// BEGINNING OF PARAMETERS
39

  
40
@Field @Option(name="unit_type",usage="", widget="String", required=true, def="MENTION")
41
		String unit_type
42

  
43
@Field @Option(name="filename",usage="", widget="String", required=true, def="filename without extension (.ac/.aa)")
44
		String filename
45

  
46
if (!ParametersDialog.open(this)) return;
47

  
48
// VARIABLES
49

  
50
corpus = corpusViewSelection
51

  
52
doExport(corpus, unit_type, filename)
53

  
54
public void doExport(MainCorpus corpus, String unit_type, String filename) {
55

  
56
	size = corpus.getSize() // you may also use: corpus.getTextEndLimits() (= index of last token = size-1)
57
	CQI = CQPSearchEngine.getCqiClient()
58
	word = corpus.getWordProperty()
59
	// note: using "lbn" seems to work better than "pn" (some imported corpora are missing this information)
60
	//pn = corpus.getProperty("pn")
61
	//if (pn == null) pn = corpus.getProperty("div")
62
	pn = corpus.getProperty("lbn")
63

  
64
	// BUILD THE RAW TEXT, THE POSITIONS AND FIND THE PARAGRAPHS
65

  
66
	rawText = "" // the corpus for the .ac file
67
	positions = [] // each element is an array [start, end] indicating the position in the rawText
68
	pnCount = 0 // the par counter, used for indexing the pns array
69
	lastPn = -1 // the last paragraph number
70
	pns = [] // each element is an array [start, end] representing the start and end of the paragraph in the rawText
71
	for (def i=0; i<size; i++) {
72
		f = CQI.cpos2Str(word.getQualifiedName(), (int[])[i])[0]
73
		if (pn == null) {
74
			p = 1
75
		} else {
76
			p = CQI.cpos2Str(pn.getQualifiedName(), (int[])[i])[0]
77
		}
78
		start = rawText.length()
79
		rawText += f
80
		if (lastPn != p) {
81
			pnCount++;
82
			if (pnCount > 1) {
83
				pns[pnCount-2][1] = end
84
			}
85
			pns[pnCount-1] = [start, 0]
86
		}
87
		lastPn = p
88
		end = rawText.length() // must be after setting it up in pns!
89
		if (i != size-1) rawText += " "
90
		positions[i] = [start, end]
91
	}
92
	pns[pnCount-1][1] = end
93
	println pnCount + " paragraph(s) found."
94

  
95

  
96

  
97
	// CORPUS ANALEC (GET THE ANNOTATIONS)
98

  
99
	// note that unit_type has been defined with an option of the dialog at the beginning
100
	def analecCorpus = URSCorpora.getCorpus(corpus);
101

  
102
	// list of properties
103

  
104
	struct = analecCorpus.getStructure();
105
	propertyList = struct.getUniteProperties(unit_type);
106

  
107
	// export to file (corpus)
108

  
109
	corpusFilename = filename + ".ac";
110
	def corpusFile = new File(corpusFilename);
111
	corpusFile.write(rawText)
112
	println("Corpus written to `"+corpusFilename+"'.");
113

  
114
	// export to file (annotations)
115

  
116
	annotFilename = filename + ".aa";
117
	def annotFile = new File(annotFilename)
118
	annotFile.write("<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n<annotations>\n")
119
		// erase (if you use << you don't erase!)
120
	def counter = 0
121

  
122
	// export paragraphs
123
	for (def i=0; i<pns.size(); i++) {
124
		def start = pns[i][0]
125
		def end = pns[i][1]
126
		annotFile << "<unit id=\"me_"+counter+"\">\n";
127
		annotFile << "<metadata><author>me</author><creation-date>"+counter+"</creation-date></metadata>\n";
128
		annotFile << "<characterisation><type>paragraph</type><featureSet /></characterisation>\n";
129
		annotFile << "<positioning><start><singlePosition index=\""+start+"\" /></start><end><singlePosition index=\""+end+"\" /></end></positioning>\n";
130
		annotFile << "</unit>\n";
131
		counter++;
132
	}
133

  
134
	// export units
135
	def units = analecCorpus.getUnites(unit_type);
136
	//units.sort() { a, b -> a.getDeb() <=> b.getDeb() ?: a.getFin() <=> b.getFin() };
137
	def unitCount = 0
138
	for (Unite unit : units) {
139
		unitCount++;
140
		annotFile << "<unit id=\"me_"+counter+"\">\n";
141
		annotFile << "<metadata><author>me</author><creation-date>"+counter+"</creation-date></metadata>\n";
142
		annotFile << "<characterisation>\n";
143
		annotFile << "<type>"+unit_type+"</type>\n";
144
		annotFile << "<featureSet>\n";
145
		for (String propertyName : propertyList) {
146
			annotFile << "<feature name=\""+propertyName+"\">"+unit.getProp(propertyName)+"</feature>\n";
147
		}
148
		annotFile << "</featureSet>\n";
149
		annotFile << "</characterisation>\n";
150
		start = positions[unit.getDeb()][0]
151
		end = positions[unit.getFin()][1]
152
		annotFile << "<positioning><start><singlePosition index=\""+start+"\" /></start><end><singlePosition index=\""+end+"\" /></end></positioning>\n";
153
		annotFile << "</unit>\n";
154
		counter++;
155
	}
156
	annotFile << "</annotations>\n";
157

  
158
	println unitCount + " unit(s) found."
159

  
160
	println("Annotations written to `"+annotFilename+"'.");
161
}
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/export/ExportToGlozz1_1_0Macro.groovy (revision 2174)
1
package org.txm.macro.urs.export
2

  
3
// @author: Bruno Oberle
4
// v1.1.0 2017-10-25:
5
//  - using the "p" structural unit if available to get paragraphs
6
//  - no space before comma, parenthesis, full stop, hyphen, etc.
7
//  - no space after hyphen, parenthesis, apostrophe, etc.
8
// v1.0.0 2017-08-28
9

  
10
/*
11
 Cette macro exporte le corpus sélectionné et ses annotations vers deux fichiers de format Glozz:
12
 - un fichier .ac contenant le corpus brut,
13
 - un fichier .aa contenant les annotations au format XML utilisé par Glozz.
14
 Le corpus sélectionné dans TXM devrait contenir une structure Analec avec au moins un type d'unité défini (e.g. MENTION, maillon, etc.). S'il n'y a pas de structure,
15
 ce n'est pas grave: le fichier est exporter, mais aucune annotation n'est créer. Cela permet d'exporter n'importe quel corpus au format Glozz.
16
 Pour exporter un texte au format Glozz *sans* les annotations qu'il contient, simplement mettre un unit_type qui n'existe pas (e.g. "foobar" au lieu de "MENTION").
17
 La macro ne produit pour l'instant pas automatiquement de modèle Glozz (fichier .aam).  Cela n'est pas un problème pour ouvrir le résultat dans Glozz ou Analec.
18
 */
19

  
20
// STANDARD DECLARATIONS
21

  
22
import org.apache.commons.lang.*
23
import org.kohsuke.args4j.*
24
import groovy.transform.*
25
import org.txm.*
26
import org.txm.rcp.swt.widget.parameters.*
27
import org.txm.annotation.urs.*
28
import org.txm.searchengine.cqp.*
29
import org.txm.searchengine.cqp.corpus.*
30
import visuAnalec.Message.*
31
import visuAnalec.donnees.*
32
import visuAnalec.elements.*
33
import visuAnalec.vue.*
34

  
35

  
36
// GLOBAL VARIABLES
37

  
38
corpus = corpusViewSelection
39
CQI = CQPSearchEngine.getCqiClient()
40

  
41
// CHECK CORPUS
42

  
43
if (!(corpusViewSelection instanceof MainCorpus)) {
44
	println "Corpora selection is not a MainCorpus: "+corpusViewSelection
45
	return;
46
}
47

  
48
// PARAGRAPH AS STRUCTURAL UNIT?
49

  
50
/* note: some text have a structural unit called "p", which represents a paragraph.
51
   If this structural unit is present, we use it.  Otherwise we ask the user (the
52
   best way is to use the "pn" (paragraph number) or "lbn" (line number) property),
53
   depending on what is available in the corpus. If the user give not property name,
54
   we only define one paragraph for the whole text. */
55

  
56
parUnit = corpus.getStructuralUnit("p")
57

  
58
// BEGINNING OF PARAMETERS
59

  
60
@Field @Option(name="unit_type",usage="", widget="String", required=true, def="MENTION")
61
		String unit_type
62

  
63
@Field @Option(name="filename",usage="", widget="String", required=true, def="filename without extension (.ac/.aa)")
64
		String filename
65

  
66
if (!parUnit) {
67
	@Field @Option(name="par_prop",usage="the property used to compute paragraphs", widget="String", required=false, def="lbn")
68
		String par_prop
69
}
70

  
71
if (!ParametersDialog.open(this)) return;
72

  
73
// what paragraph unit to use?
74

  
75
pn = null
76
if (!parUnit && !par_prop.equals("")) {
77
	pn = corpus.getProperty(par_prop)
78
	if (!pn) {
79
		println "Error: I can't find a the property `$par_prop'."
80
		return
81
	}
82
}
83

  
84
/*********************************/
85

  
86
doExport(corpus, unit_type, filename)
87

  
88
public void doExport(MainCorpus corpus, String unit_type, String filename) {
89

  
90
	size = corpus.getSize() // you may also use: corpus.getTextEndLimits() (= index of last token = size-1)
91
	word = corpus.getWordProperty()
92

  
93
	// BUILD THE RAW TEXT, THE POSITIONS AND FIND THE PARAGRAPHS
94

  
95
	rawText = "" // the corpus for the .ac file
96
	positions = [] // each element is an array [start, end] indicating the position in the rawText
97
	pnCount = 0 // the par counter, used for indexing the pars array
98
	lastPn = -1 // the last paragraph number
99
	pars = [] // each element is an array [start, end] representing the start and end of the paragraph in the rawText
100
	insertSpace = true
101
	for (def i=0; i<size; i++) {
102
		f = CQI.cpos2Str(word.getQualifiedName(), (int[])[i])[0]
103
		if (parUnit) {
104
			p = CQI.cpos2Struc(parUnit.getQualifiedName(), (int[])[i])[0]
105
		} else if (pn == null) {
106
			p = 1
107
		} else {
108
			p = CQI.cpos2Str(pn.getQualifiedName(), (int[])[i])[0]
109
		}
110
		if (i > 0 && insertSpace
111
				&& !f.equals(".") && !f.equals(",") && !f.equals("'") && !f.equals("’") && !f.equals("-")
112
				&& !f.equals(")") && !f.equals("]") && !f.startsWith("-")) {
113
			rawText += " "
114
		}
115
		insertSpace = true // reset
116
		if (f.equals("-") || f.equals("[") || f.equals("(")
117
				|| f.endsWith("-") || f.endsWith("'") || f.endsWith("’") || f.endsWith("-")) {
118
			insertSpace = false
119
		}
120
		start = rawText.length()
121
		rawText += f
122
		if (lastPn != p) {
123
			pnCount++;
124
			if (pnCount > 1) {
125
				pars[pnCount-2][1] = end
126
			}
127
			pars[pnCount-1] = [start, 0]
128
		}
129
		lastPn = p
130
		end = rawText.length() // must be after setting it up in pars!
131
		positions[i] = [start, end]
132
	}
133
	pars[pnCount-1][1] = end
134
	println pnCount + " paragraph(s) found."
135

  
136
	// CORPUS ANALEC (GET THE ANNOTATIONS)
137

  
138
	// note that unit_type has been defined with an option of the dialog at the beginning
139
	def analecCorpus = URSCorpora.getCorpus(corpus);
140

  
141
	// list of properties
142

  
143
	struct = analecCorpus.getStructure();
144
	propertyList = struct.getUniteProperties(unit_type);
145

  
146
	// export to file (corpus)
147

  
148
	corpusFilename = filename + ".ac";
149
	def corpusFile = new File(corpusFilename);
150
	corpusFile.write(rawText)
151
	println("Corpus written to `"+corpusFilename+"'.");
152

  
153
	// export to file (annotations)
154

  
155
	annotFilename = filename + ".aa";
156
	def annotFile = new File(annotFilename)
157
	annotFile.write("<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n<annotations>\n")
158
		// erase (if you use << you don't erase!)
159
	def counter = 0
160

  
161
	// export paragraphs
162
	for (def i=0; i<pars.size(); i++) {
163
		def start = pars[i][0]
164
		def end = pars[i][1]
165
		annotFile << "<unit id=\"me_"+counter+"\">\n";
166
		annotFile << "<metadata><author>me</author><creation-date>"+counter+"</creation-date></metadata>\n";
167
		annotFile << "<characterisation><type>paragraph</type><featureSet /></characterisation>\n";
168
		annotFile << "<positioning><start><singlePosition index=\""+start+"\" /></start><end><singlePosition index=\""+end+"\" /></end></positioning>\n";
169
		annotFile << "</unit>\n";
170
		counter++;
171
	}
172

  
173
	// export units
174
	def units = analecCorpus.getUnites(unit_type);
175
	//units.sort() { a, b -> a.getDeb() <=> b.getDeb() ?: a.getFin() <=> b.getFin() };
176
	def unitCount = 0
177
	for (Unite unit : units) {
178
		unitCount++;
179
		annotFile << "<unit id=\"me_"+counter+"\">\n";
180
		annotFile << "<metadata><author>me</author><creation-date>"+counter+"</creation-date></metadata>\n";
181
		annotFile << "<characterisation>\n";
182
		annotFile << "<type>"+unit_type+"</type>\n";
183
		annotFile << "<featureSet>\n";
184
		for (String propertyName : propertyList) {
185
			annotFile << "<feature name=\""+propertyName+"\">"+unit.getProp(propertyName)+"</feature>\n";
186
		}
187
		annotFile << "</featureSet>\n";
188
		annotFile << "</characterisation>\n";
189
		start = positions[unit.getDeb()][0]
190
		end = positions[unit.getFin()][1]
191
		annotFile << "<positioning><start><singlePosition index=\""+start+"\" /></start><end><singlePosition index=\""+end+"\" /></end></positioning>\n";
192
		annotFile << "</unit>\n";
193
		counter++;
194
	}
195
	annotFile << "</annotations>\n";
196

  
197
	println unitCount + " unit(s) found."
198

  
199
	println("Annotations written to `"+annotFilename+"'.");
200
}
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/export/ExportToGlozzMacro.groovy (revision 2174)
1
package org.txm.macro.urs.export
2

  
3
// @author: Bruno Oberlé
4
// v1.0.0 2017-08-28
5

  
6
/*
7
 Cette macro exporte le corpus sélectionné et ses annotations vers deux fichiers de format Glozz:
8
 - un fichier .ac contenant le corpus brut,
9
 - un fichier .aa contenant les annotations au format XML utilisé par Glozz.
10
 Le corpus sélectionné dans TXM devrait contenir une structure Analec avec au moins un type d'unité défini (e.g. MENTION, maillon, etc.). S'il n'y a pas de structure,
11
 ce n'est pas grave: le fichier est exporter, mais aucune annotation n'est créer. Cela permet d'exporter n'importe quel corpus au format Glozz.
12
 Pour exporter un texte au format Glozz *sans* les annotations qu'il contient, simplement mettre un unit_type qui n'existe pas (e.g. "foobar" au lieu de "MENTION").
13
 La macro ne produit pour l'instant pas automatiquement de modèle Glozz (fichier .aam).  Cela n'est pas un problème pour ouvrir le résultat dans Glozz ou Analec.
14
 */
15

  
16
// STANDARD DECLARATIONS
17

  
18
import org.apache.commons.lang.*
19
import org.kohsuke.args4j.*
20
import groovy.transform.*
21
import org.txm.*
22
import org.txm.rcp.swt.widget.parameters.*
23
import org.txm.annotation.urs.*
24
import org.txm.searchengine.cqp.*
25
import org.txm.searchengine.cqp.corpus.*
26
import visuAnalec.Message.*
27
import visuAnalec.donnees.*
28
import visuAnalec.elements.*
29
import visuAnalec.vue.*
30

  
31
// CHECK CORPUS
32

  
33
if (!(corpusViewSelection instanceof MainCorpus)) {
34
	println "Corpora selection is not a MainCorpus: "+corpusViewSelection
35
	return;
36
}
37

  
38
// BEGINNING OF PARAMETERS
39

  
40
@Field @Option(name="unit_type",usage="", widget="String", required=true, def="MENTION")
41
		String unit_type
42

  
43
@Field @Option(name="filename",usage="", widget="String", required=true, def="filename without extension (.ac/.aa)")
44
		String filename
45

  
46
if (!ParametersDialog.open(this)) return;
47

  
48
// VARIABLES
49

  
50
corpus = corpusViewSelection
51

  
52
doExport(corpus, unit_type, filename)
53

  
54
public void doExport(MainCorpus corpus, String unit_type, String filename) {
55

  
56
	size = corpus.getSize() // you may also use: corpus.getTextEndLimits() (= index of last token = size-1)
57
	CQI = CQPSearchEngine.getCqiClient()
58
	word = corpus.getWordProperty()
59
	// note: using "lbn" seems to work better than "pn" (some imported corpora are missing this information)
60
	//pn = corpus.getProperty("pn")
61
	//if (pn == null) pn = corpus.getProperty("div")
62
	pn = corpus.getProperty("lbn")
63

  
64
	// BUILD THE RAW TEXT, THE POSITIONS AND FIND THE PARAGRAPHS
65

  
66
	rawText = "" // the corpus for the .ac file
67
	positions = [] // each element is an array [start, end] indicating the position in the rawText
68
	pnCount = 0 // the par counter, used for indexing the pns array
69
	lastPn = -1 // the last paragraph number
70
	pns = [] // each element is an array [start, end] representing the start and end of the paragraph in the rawText
71
	for (def i=0; i<size; i++) {
72
		f = CQI.cpos2Str(word.getQualifiedName(), (int[])[i])[0]
73
		if (pn == null) {
74
			p = 1
75
		} else {
76
			p = CQI.cpos2Str(pn.getQualifiedName(), (int[])[i])[0]
77
		}
78
		start = rawText.length()
79
		rawText += f
80
		if (lastPn != p) {
81
			pnCount++;
82
			if (pnCount > 1) {
83
				pns[pnCount-2][1] = end
84
			}
85
			pns[pnCount-1] = [start, 0]
86
		}
87
		lastPn = p
88
		end = rawText.length() // must be after setting it up in pns!
89
		if (i != size-1) rawText += " "
90
		positions[i] = [start, end]
91
	}
92
	pns[pnCount-1][1] = end
93
	println pnCount + " paragraph(s) found."
94

  
95

  
96

  
97
	// CORPUS ANALEC (GET THE ANNOTATIONS)
98

  
99
	// note that unit_type has been defined with an option of the dialog at the beginning
100
	def analecCorpus = URSCorpora.getCorpus(corpus);
101

  
102
	// list of properties
103

  
104
	struct = analecCorpus.getStructure();
105
	propertyList = struct.getUniteProperties(unit_type);
106

  
107
	// export to file (corpus)
108

  
109
	corpusFilename = filename + ".ac";
110
	def corpusFile = new File(corpusFilename);
111
	corpusFile.write(rawText)
112
	println("Corpus written to `"+corpusFilename+"'.");
113

  
114
	// export to file (annotations)
115

  
116
	annotFilename = filename + ".aa";
117
	def annotFile = new File(annotFilename)
118
	annotFile.write("<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n<annotations>\n") // erase
119
	def counter = 0
120

  
121
	// export paragraphs
122
	for (def i=0; i<pns.size(); i++) {
123
		def start = pns[i][0]
124
		def end = pns[i][1]
125
		annotFile << "<unit id=\"me_"+counter+"\">\n";
126
		annotFile << "<metadata><author>me</author><creation-date>"+counter+"</creation-date></metadata>\n";
127
		annotFile << "<characterisation><type>paragraph</type><featureSet /></characterisation>\n";
128
		annotFile << "<positioning><start><singlePosition index=\""+start+"\" /></start><end><singlePosition index=\""+end+"\" /></end></positioning>\n";
129
		annotFile << "</unit>\n";
130
		counter++;
131
	}
132

  
133
	// export units
134
	def units = analecCorpus.getUnites(unit_type);
135
	//units.sort() { a, b -> a.getDeb() <=> b.getDeb() ?: a.getFin() <=> b.getFin() };
136
	def unitCount = 0
137
	for (Unite unit : units) {
138
		unitCount++;
139
		annotFile << "<unit id=\"me_"+counter+"\">\n";
140
		annotFile << "<metadata><author>me</author><creation-date>"+counter+"</creation-date></metadata>\n";
141
		annotFile << "<characterisation>\n";
142
		annotFile << "<type>"+unit_type+"</type>\n";
143
		annotFile << "<featureSet>\n";
144
		for (String propertyName : propertyList) {
145
			annotFile << "<feature name=\""+propertyName+"\">"+unit.getProp(propertyName)+"</feature>\n";
146
		}
147
		annotFile << "</featureSet>\n";
148
		annotFile << "</characterisation>\n";
149
		start = positions[unit.getDeb()][0]
150
		end = positions[unit.getFin()][1]
151
		annotFile << "<positioning><start><singlePosition index=\""+start+"\" /></start><end><singlePosition index=\""+end+"\" /></end></positioning>\n";
152
		annotFile << "</unit>\n";
153
		counter++;
154
	}
155
	annotFile << "</annotations>\n";
156

  
157
	println unitCount + " unit(s) found."
158

  
159
	println("Annotations written to `"+annotFilename+"'.");
160
}
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/export/ExportAsMacro.groovy (revision 2174)
1
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté
2
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses)
3
// @author mdecorde
4
// @author sheiden
5
// STANDARD DECLARATIONS
6
package org.txm.macro.urs.export
7

  
8
import groovy.transform.Field
9

  
10
import org.jfree.chart.JFreeChart
11
import org.kohsuke.args4j.*
12
import org.txm.Toolbox
13
import org.txm.annotation.urs.*
14
import org.txm.macro.urs.AnalecUtils
15
import org.txm.rcp.Application
16
import org.txm.rcp.IImageKeys
17
import org.txm.rcp.swt.widget.parameters.*
18
import org.txm.searchengine.cqp.corpus.*
19
import org.txm.searchengine.cqp.corpus.query.CQLQuery
20
import org.txm.utils.io.FileCopy;
21
import org.txm.utils.io.IOUtils
22
import org.txm.utils.zip.Zip
23

  
24
import visuAnalec.elements.*
25

  
26
def scriptName = this.class.getSimpleName()
27
if (!(corpusViewSelection instanceof MainCorpus)) {
28
	println "** $scriptName please select a MainCorpus to run the macro"
29
	return;
30
}
31

  
32
@Field @Option(name="new_name", usage="Corpus name in uppercase", widget="String", required=true, def="CORPUSNAME")
33
String new_name
34
if (!ParametersDialog.open(this)) return
35

  
36
new_name = new_name.toUpperCase()
37
def pattern = "[A-Z][-A-Z0-9]{1,20}"
38
if (!new_name.matches(pattern)) {
39
	println "New corpus name not conformant to CQP corpus name: "+pattern
40
	return false
41
}
42

  
43
MainCorpus mainCorpus = corpusViewSelection.getMainCorpus()
44
String name = mainCorpus.getName()
45
if (mainCorpus.isModified()) {
46
	println "Selected corpus is not saved. Aborting"
47
	return false
48
}
49

  
50
visuAnalec.donnees.Corpus analecCorpus = URSCorpora.getCorpus(mainCorpus)
51
if (analecCorpus.isModifie()) {
52
	println "Selected Analec corpus is not saved. Aborting"
53
	return false
54
}
55

  
56
File binDirectory = mainCorpus.getProjectDirectory()
57
String binName = binDirectory.getName()
58

  
59

  
60
File newBinDirectory = new File(binDirectory.getParentFile(), new_name)
61

  
62
if (newBinDirectory.exists()) {
63
	println "The new corpus directory already exists: $newBinDirectory. Aborting."
64
	return false
65
}
66

  
67
FileCopy.copyFiles(binDirectory, newBinDirectory)
68
if (!newBinDirectory.exists()) {
69
	println "Fail to copy binary directory $binDirectory to $newBinDirectory"
70
	return
71
}
72

  
73
File ecFile = new File(newBinDirectory, "analec/${name}.ec")
74
File ecvFile = new File(newBinDirectory, "analec/${name}.ecv")
75
File cssFile = new File(newBinDirectory, "css/${name}.css")
76
File dataFile = new File(newBinDirectory, "data/${name}")
77
File htmlFile = new File(newBinDirectory, "HTML/${name}")
78
File defaultCSSFile = new File(newBinDirectory, "HTML/${name}/default/css/${name}.css")
79
File registryFile = new File(newBinDirectory, "registry/${name.toLowerCase()}")
80
File txmFile = new File(newBinDirectory, "txm/${name}")
81

  
82
File ecFile2 = new File(newBinDirectory, "analec/${new_name}.ec")
83
File ecvFile2 = new File(newBinDirectory, "analec/${new_name}.ecv")
84
File cssFile2 = new File(newBinDirectory, "css/${new_name}.css")
85
File dataFile2 = new File(newBinDirectory, "data/${new_name}")
86
File htmlFile2 = new File(newBinDirectory, "HTML/${new_name}")
87
File defaultCSSFile2 = new File(newBinDirectory, "HTML/${new_name}/default/css/${new_name}.css")
88
File registryFile2 = new File(newBinDirectory, "registry/${new_name.toLowerCase()}")
89
File txmFile2 = new File(newBinDirectory, "txm/${new_name}")
90

  
91
println "renaming $ecFile : "+ecFile.renameTo(ecFile2)
92
println "renaming $ecvFile : "+ecvFile.renameTo(ecvFile2)
93
println "renaming $cssFile : "+cssFile.renameTo(cssFile2)
94
println "renaming $dataFile : "+dataFile.renameTo(dataFile2)
95
println "renaming $htmlFile : "+htmlFile.renameTo(htmlFile2)
96
println "renaming $defaultCSSFile : "+defaultCSSFile.renameTo(defaultCSSFile2)
97
println "renaming $registryFile : "+registryFile.renameTo(registryFile2)
98
println "renaming $txmFile : "+txmFile.renameTo(txmFile2)
99

  
100
// patch name in settings
101
println "replacing old name $name ->  ${new_name} in preferences"
102
File settingsDirectory = new File(newBinDirectory, ".settings")
103
for (File prefFile : settingsDirectory.listFiles()) {
104
	IOUtils.write(prefFile, prefFile.getText().replace(name, new_name))
105
}
106

  
107
File projectSetting = new File(newBinDirectory, ".project")
108
IOUtils.write(projectSetting, projectSetting.getText().replace(name, new_name))
109

  
110
// patch registry
111
String oldcontent = registryFile2.getText();
112
content = oldcontent.replace(name, new_name)
113
content = content.replace(name.toLowerCase(), new_name.toLowerCase())
114
registryFile2.withWriter { writer -> 
115
	writer.write(content)
116
}
117

  
118

  
119
File zipFile = new File(newBinDirectory.getAbsolutePath()+".txm")
120
Zip.compress(newBinDirectory, zipFile, monitor)
121

  
122
if (!zipFile.exists()) {
123
	println "Fail to zip binary directory $binDirectory to $zipFile"
124
	return
125
} else {
126
	println "Done: $zipFile"
127
}
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/export/ExportUnitsToGlozz.groovy (revision 2174)
1
package org.txm.macro.urs.export
2

  
3
// @author: Bruno Oberlé
4
// v1.0.0 2017-08-28
5

  
6
/*
7
 Cette macro exporte le corpus sélectionné et ses annotations vers deux fichiers de format Glozz:
8
 - un fichier .ac contenant le corpus brut,
9
 - un fichier .aa contenant les annotations au format XML utilisé par Glozz.
10
 Le corpus sélectionné dans TXM devrait contenir une structure Analec avec au moins un type d'unité défini (e.g. MENTION, maillon, etc.). S'il n'y a pas de structure,
11
 ce n'est pas grave: le fichier est exporter, mais aucune annotation n'est créer. Cela permet d'exporter n'importe quel corpus au format Glozz.
12
 Pour exporter un texte au format Glozz *sans* les annotations qu'il contient, simplement mettre un unit_type qui n'existe pas (e.g. "foobar" au lieu de "MENTION").
13
 La macro ne produit pour l'instant pas automatiquement de modèle Glozz (fichier .aam).  Cela n'est pas un problème pour ouvrir le résultat dans Glozz ou Analec.
14
 */
15

  
16
// STANDARD DECLARATIONS
17

  
18
import org.apache.commons.lang.*
19
import org.kohsuke.args4j.*
20
import groovy.transform.*
21
import org.txm.*
22
import org.txm.rcp.swt.widget.parameters.*
23
import org.txm.annotation.urs.*
24
import org.txm.searchengine.cqp.*
25
import org.txm.searchengine.cqp.corpus.*
26
import visuAnalec.Message.*
27
import visuAnalec.donnees.*
28
import visuAnalec.elements.*
29
import visuAnalec.vue.*
30

  
31
// CHECK CORPUS
32

  
33
if (!(corpusViewSelection instanceof MainCorpus)) {
34
	println "Corpora selection is not a MainCorpus: "+corpusViewSelection
35
	return;
36
}
37

  
38
// BEGINNING OF PARAMETERS
39

  
40
@Field @Option(name="unit_type",usage="", widget="String", required=true, def="MENTION")
41
		String unit_type
42

  
43
@Field @Option(name="filename",usage="", widget="String", required=true, def="filename without extension (.ac/.aa)")
44
		String filename
45

  
46
if (!ParametersDialog.open(this)) return;
47

  
48
// VARIABLES
49

  
50
corpus = corpusViewSelection
51

  
52
doExport(corpus, unit_type, filename)
53

  
54
public void doExport(MainCorpus corpus, String unit_type, String filename) {
55

  
56
	size = corpus.getSize() // you may also use: corpus.getTextEndLimits() (= index of last token = size-1)
57
	CQI = CQPSearchEngine.getCqiClient()
58
	word = corpus.getWordProperty()
59
	// note: using "lbn" seems to work better than "pn" (some imported corpora are missing this information)
60
	//pn = corpus.getProperty("pn")
61
	//if (pn == null) pn = corpus.getProperty("div")
62
	pn = corpus.getProperty("lbn")
63

  
64
	// BUILD THE RAW TEXT, THE POSITIONS AND FIND THE PARAGRAPHS
65

  
66
	rawText = "" // the corpus for the .ac file
67
	positions = [] // each element is an array [start, end] indicating the position in the rawText
68
	pnCount = 0 // the par counter, used for indexing the pns array
69
	lastPn = -1 // the last paragraph number
70
	pns = [] // each element is an array [start, end] representing the start and end of the paragraph in the rawText
71
	for (def i=0; i<size; i++) {
72
		f = CQI.cpos2Str(word.getQualifiedName(), (int[])[i])[0]
73
		if (pn == null) {
74
			p = 1
75
		} else {
76
			p = CQI.cpos2Str(pn.getQualifiedName(), (int[])[i])[0]
77
		}
78
		start = rawText.length()
79
		rawText += f
80
		if (lastPn != p) {
81
			pnCount++;
82
			if (pnCount > 1) {
83
				pns[pnCount-2][1] = end
84
			}
85
			pns[pnCount-1] = [start, 0]
86
		}
87
		lastPn = p
88
		end = rawText.length() // must be after setting it up in pns!
89
		if (i != size-1) rawText += " "
90
		positions[i] = [start, end]
91
	}
92
	pns[pnCount-1][1] = end
93
	println pnCount + " paragraph(s) found."
94

  
95

  
96

  
97
	// CORPUS ANALEC (GET THE ANNOTATIONS)
98

  
99
	// note that unit_type has been defined with an option of the dialog at the beginning
100
	def analecCorpus = URSCorpora.getCorpus(corpus);
101

  
102
	// list of properties
103

  
104
	struct = analecCorpus.getStructure();
105
	propertyList = struct.getUniteProperties(unit_type);
106

  
107
	// export to file (corpus)
108

  
109
	corpusFilename = filename + ".ac";
110
	def corpusFile = new File(corpusFilename);
111
	corpusFile.write(rawText)
112
	println("Corpus written to `"+corpusFilename+"'.");
113

  
114
	// export to file (annotations)
115

  
116
	annotFilename = filename + ".aa";
117
	def annotFile = new File(annotFilename)
118
	annotFile.write("<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n<annotations>\n") // erase
119
	def counter = 0
120

  
121
	// export paragraphs
122
	for (def i=0; i<pns.size(); i++) {
123
		def start = pns[i][0]
124
		def end = pns[i][1]
125
		annotFile << "<unit id=\"me_"+counter+"\">\n";
126
		annotFile << "<metadata><author>me</author><creation-date>"+counter+"</creation-date></metadata>\n";
127
		annotFile << "<characterisation><type>paragraph</type><featureSet /></characterisation>\n";
128
		annotFile << "<positioning><start><singlePosition index=\""+start+"\" /></start><end><singlePosition index=\""+end+"\" /></end></positioning>\n";
129
		annotFile << "</unit>\n";
130
		counter++;
131
	}
132

  
133
	// export units
134
	def units = analecCorpus.getUnites(unit_type);
135
	//units.sort() { a, b -> a.getDeb() <=> b.getDeb() ?: a.getFin() <=> b.getFin() };
136
	def unitCount = 0
137
	for (Unite unit : units) {
138
		unitCount++;
139
		annotFile << "<unit id=\"me_"+counter+"\">\n";
140
		annotFile << "<metadata><author>me</author><creation-date>"+counter+"</creation-date></metadata>\n";
141
		annotFile << "<characterisation>\n";
142
		annotFile << "<type>"+unit_type+"</type>\n";
143
		annotFile << "<featureSet>\n";
144
		for (String propertyName : propertyList) {
145
			annotFile << "<feature name=\""+propertyName+"\">"+unit.getProp(propertyName)+"</feature>\n";
146
		}
147
		annotFile << "</featureSet>\n";
148
		annotFile << "</characterisation>\n";
149
		start = positions[unit.getDeb()][0]
150
		end = positions[unit.getFin()][1]
151
		annotFile << "<positioning><start><singlePosition index=\""+start+"\" /></start><end><singlePosition index=\""+end+"\" /></end></positioning>\n";
152
		annotFile << "</unit>\n";
153
		counter++;
154
	}
155
	annotFile << "</annotations>\n";
156

  
157
	println unitCount + " unit(s) found."
158

  
159
	println("Annotations written to `"+annotFilename+"'.");
160
}
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/export/ExportCorpusAsMacro.groovy (revision 2174)
1
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté
2
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses)
3
// @author mdecorde
4
// @author sheiden
5
// STANDARD DECLARATIONS
6
package org.txm.macro.urs.export
7

  
8
import groovy.transform.Field
9

  
10
import org.jfree.chart.JFreeChart
11
import org.kohsuke.args4j.*
12
import org.txm.Toolbox
13
import org.txm.annotation.urs.*
14
import org.txm.macro.urs.AnalecUtils
15
import org.txm.rcp.Application
16
import org.txm.rcp.IImageKeys
17
import org.txm.rcp.swt.widget.parameters.*
18
import org.txm.searchengine.cqp.corpus.*
19
import org.txm.searchengine.cqp.corpus.query.CQLQuery
20
import org.txm.utils.io.FileCopy;
21
import org.txm.utils.io.IOUtils
22
import org.txm.utils.zip.Zip
23

  
24
import visuAnalec.elements.*
25

  
26
def scriptName = this.class.getSimpleName()
27
if (!(corpusViewSelection instanceof MainCorpus)) {
28
	println "** $scriptName please select a MainCorpus to run the macro"
29
	return;
30
}
31

  
32
@Field @Option(name="new_name", usage="Corpus name in uppercase", widget="String", required=true, def="CORPUSNAME")
33
String new_name
34
if (!ParametersDialog.open(this)) return
35

  
36
new_name = new_name.toUpperCase()
37
def pattern = "[A-Z][-A-Z0-9]{1,20}"
38
if (!new_name.matches(pattern)) {
39
	println "New corpus name not conformant to CQP corpus name: "+pattern
40
	return false
41
}
42

  
43
MainCorpus mainCorpus = corpusViewSelection.getMainCorpus()
44
String name = mainCorpus.getName()
45
if (mainCorpus.isModified()) {
46
	println "Selected corpus is not saved. Aborting"
47
	return false
48
}
49

  
50
visuAnalec.donnees.Corpus analecCorpus = URSCorpora.getCorpus(mainCorpus)
51
if (analecCorpus.isModifie()) {
52
	println "Selected Analec corpus is not saved. Aborting"
53
	return false
54
}
55

  
56
File binDirectory = mainCorpus.getProjectDirectory()
57
String binName = binDirectory.getName()
58

  
59

  
60
File newBinDirectory = new File(binDirectory.getParentFile(), new_name)
61

  
62
if (newBinDirectory.exists()) {
63
	println "The new corpus directory already exists: $newBinDirectory. Aborting."
64
	return false
65
}
66

  
67
FileCopy.copyFiles(binDirectory, newBinDirectory)
68
if (!newBinDirectory.exists()) {
69
	println "Fail to copy binary directory $binDirectory to $newBinDirectory"
70
	return
71
}
72

  
73
File ecFile = new File(newBinDirectory, "analec/${name}.ec")
74
File ecvFile = new File(newBinDirectory, "analec/${name}.ecv")
75
File cssFile = new File(newBinDirectory, "css/${name}.css")
76
File dataFile = new File(newBinDirectory, "data/${name}")
77
File htmlFile = new File(newBinDirectory, "HTML/${name}")
78
File defaultCSSFile = new File(newBinDirectory, "HTML/${name}/default/css/${name}.css")
79
File registryFile = new File(newBinDirectory, "registry/${name.toLowerCase()}")
80
File txmFile = new File(newBinDirectory, "txm/${name}")
81

  
82
File ecFile2 = new File(newBinDirectory, "analec/${new_name}.ec")
83
File ecvFile2 = new File(newBinDirectory, "analec/${new_name}.ecv")
84
File cssFile2 = new File(newBinDirectory, "css/${new_name}.css")
85
File dataFile2 = new File(newBinDirectory, "data/${new_name}")
86
File htmlFile2 = new File(newBinDirectory, "HTML/${new_name}")
87
File defaultCSSFile2 = new File(newBinDirectory, "HTML/${new_name}/default/css/${new_name}.css")
88
File registryFile2 = new File(newBinDirectory, "registry/${new_name.toLowerCase()}")
89
File txmFile2 = new File(newBinDirectory, "txm/${new_name}")
90

  
91
println "renaming $ecFile : "+ecFile.renameTo(ecFile2)
92
println "renaming $ecvFile : "+ecvFile.renameTo(ecvFile2)
93
println "renaming $cssFile : "+cssFile.renameTo(cssFile2)
94
println "renaming $dataFile : "+dataFile.renameTo(dataFile2)
95
println "renaming $htmlFile : "+htmlFile.renameTo(htmlFile2)
96
println "renaming $defaultCSSFile : "+defaultCSSFile.renameTo(defaultCSSFile2)
97
println "renaming $registryFile : "+registryFile.renameTo(registryFile2)
98
println "renaming $txmFile : "+txmFile.renameTo(txmFile2)
99

  
100
// patch name in settings
101
println "replacing old name $name ->  ${new_name} in preferences"
102
File settingsDirectory = new File(newBinDirectory, ".settings")
103
for (File prefFile : settingsDirectory.listFiles()) {
104
	IOUtils.write(prefFile, prefFile.getText().replace(name, new_name))
105
}
106

  
107
File projectSetting = new File(newBinDirectory, ".project")
108
IOUtils.write(projectSetting, projectSetting.getText().replace(name, new_name))
109

  
110
// patch registry
111
String oldcontent = registryFile2.getText();
112
content = oldcontent.replace(name, new_name)
113
content = content.replace(name.toLowerCase(), new_name.toLowerCase())
114
registryFile2.withWriter { writer -> 
115
	writer.write(content)
116
}
117

  
118
println "creating the TXM file..."
119
File zipFile = new File(newBinDirectory.getAbsolutePath()+".txm")
120
Zip.compress(newBinDirectory, zipFile, monitor)
121

  
122
if (!zipFile.exists()) {
123
	println "Fail to zip binary directory $binDirectory to $zipFile"
124
	return
125
} else {
126
	println "Done: $zipFile"
127
}
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/export/ExportToGlozz1_0_0.groovy (revision 2174)
1
package org.txm.macro.urs.export
2

  
3
// @author: Bruno Oberlé
4
// v1.0.0 2017-08-28
5

  
6
/*
7
 Cette macro exporte le corpus sélectionné et ses annotations vers deux fichiers de format Glozz:
8
 - un fichier .ac contenant le corpus brut,
9
 - un fichier .aa contenant les annotations au format XML utilisé par Glozz.
10
 Le corpus sélectionné dans TXM devrait contenir une structure Analec avec au moins un type d'unité défini (e.g. MENTION, maillon, etc.). S'il n'y a pas de structure,
11
 ce n'est pas grave: le fichier est exporter, mais aucune annotation n'est créer. Cela permet d'exporter n'importe quel corpus au format Glozz.
12
 Pour exporter un texte au format Glozz *sans* les annotations qu'il contient, simplement mettre un unit_type qui n'existe pas (e.g. "foobar" au lieu de "MENTION").
13
 La macro ne produit pour l'instant pas automatiquement de modèle Glozz (fichier .aam).  Cela n'est pas un problème pour ouvrir le résultat dans Glozz ou Analec.
14
 */
15

  
16
// STANDARD DECLARATIONS
17

  
18
import org.apache.commons.lang.*
19
import org.kohsuke.args4j.*
20
import groovy.transform.*
21
import org.txm.*
22
import org.txm.rcp.swt.widget.parameters.*
23
import org.txm.annotation.urs.*
24
import org.txm.searchengine.cqp.*
25
import org.txm.searchengine.cqp.corpus.*
26
import visuAnalec.Message.*
27
import visuAnalec.donnees.*
28
import visuAnalec.elements.*
29
import visuAnalec.vue.*
30

  
31
// CHECK CORPUS
32

  
33
if (!(corpusViewSelection instanceof MainCorpus)) {
34
	println "Corpora selection is not a MainCorpus: "+corpusViewSelection
35
	return;
36
}
37

  
38
// BEGINNING OF PARAMETERS
39

  
40
@Field @Option(name="unit_type",usage="", widget="String", required=true, def="MENTION")
41
		String unit_type
42

  
43
@Field @Option(name="filename",usage="", widget="String", required=true, def="filename without extension (.ac/.aa)")
44
		String filename
45

  
46
if (!ParametersDialog.open(this)) return;
47

  
48
// VARIABLES
49

  
50
corpus = corpusViewSelection
51

  
52
doExport(corpus, unit_type, filename)
53

  
54
public void doExport(MainCorpus corpus, String unit_type, String filename) {
55

  
56
	size = corpus.getSize() // you may also use: corpus.getTextEndLimits() (= index of last token = size-1)
57
	CQI = CQPSearchEngine.getCqiClient()
58
	word = corpus.getWordProperty()
59
	// note: using "lbn" seems to work better than "pn" (some imported corpora are missing this information)
60
	//pn = corpus.getProperty("pn")
61
	//if (pn == null) pn = corpus.getProperty("div")
... Ce différentiel a été tronqué car il excède la taille maximale pouvant être affichée.

Formats disponibles : Unified diff