Statistics
| Revision:

root / tmp / org.txm.groovy.core / src / groovy / org / txm / macro / commands / CrossedPartitionBuilderMacro.groovy @ 499

History | View | Annotate | Download (11.2 kB)

1
package org.txm.macro.commands
2

    
3
import org.kohsuke.args4j.*
4

    
5
import groovy.transform.Field
6

    
7
import org.txm.rcp.swt.widget.parameters.*
8
import org.txm.searchengine.cqp.corpus.*
9
import org.txm.utils.logger.Log;
10
import org.txm.rcp.views.*
11

    
12
//BEGINNING OF PARAMETERS
13
def corpus = corpusViewSelection
14
if (!(corpus instanceof Corpus)) {
15
        println "Error: this macro should be run with a Corpus selected"
16
        return
17
}
18

    
19
@Field @Option(name="structuralUnit", usage="the structural Unit to use", widget="String", required=true, def="text")
20
String structuralUnit = "text"
21

    
22
@Field @Option(name="structuralUnitPropertiesList", usage="the structural Unit properties list separated with commas", widget="String", required=true, def="p1,p2")
23
def structuralUnitPropertiesList = "loc, type";
24
                
25
//@Field @Option(name="expandTarget", usage="Expand structure", widget="String", required=false, def="")
26
def expandTarget = "";
27

    
28
//@Field @Option(name="structuralUnitToIgnore", usage="the structural Unit properties list to ignore separated with commas", widget="String", required=false, def="")
29
def structuralUnitToIgnore = "";
30

    
31
//@Field @Option(name="partitionName", usage="The partition name to use", widget="String", required=false, def="")
32
def partitionName = ""
33

    
34
//@Field @Option(name="partNamePrefix", usage="the part prefix to use", widget="String", required=true, def="")
35
def partNamePrefix = ""
36

    
37
@Field @Option(name="debug", usage="If debug, then show only queries", widget="Boolean", required=false, def="false")
38
def debug = true;
39

    
40
//Open the parameters input dialog box
41
if (!ParametersDialog.open(this)) return;
42

    
43
//END OF PARAMETERS
44

    
45
/**
46
 * Create partition with advanced CQL queries and autoname the parts.
47
 * Can create partitions by defining a multi-level structural units hierarchy or by defining several properties values for one structural unit.
48
 * Can define some structural units to remove from the resulting parts subcorpus.
49
 * 
50
 */
51

    
52
// TODO: add this variables to the macro parameters
53

    
54
def structuralUnits = []
55
def structuralUnitProperties = []
56
structuralUnit = structuralUnit.trim()
57
structuralUnitPropertiesList = structuralUnitPropertiesList.trim()
58

    
59
// Build lists
60
def split = structuralUnit.split(",")
61
def split2 = structuralUnitPropertiesList.split(",")
62
if (split.size() == 1) {
63
        for (String p : split2) {
64
                p = p.trim();
65
                structuralUnits << structuralUnit
66
                structuralUnitProperties << p
67
        }
68
} else {
69
        if (split.size() == split2.size()) {
70
                for (int i = 0 ; i < split.size() ; i++) {
71
                        def su = split[i]
72
                        def p = split2[i];
73
                        structuralUnits << su
74
                        structuralUnitProperties << p
75
                }
76
        } else {
77
                println "Error: structuralUnitList size is different from structuralUnitPropertiesList"
78
                println "structuralUnitList=structuralUnitList=$structuralUnit structuralUnitPropertiesList=$structuralUnitPropertiesList"
79
                return false
80
        }
81
}
82

    
83
// check properties existance
84
try {
85
        boolean ok = true;
86
        for (int i = 0 ; i < structuralUnits.size() ; i++) {
87
                def suName = structuralUnits[i]
88
                def pName = structuralUnitProperties[i]
89
                
90
                if (corpus.getStructuralUnit(suName) == null) {
91
                        println "Error: no '$suName' structure in the '$corpus' corpus"
92
                        ok = false;
93
                } else {
94
                        def su = corpus.getStructuralUnit(suName)
95
                        for (String p : structuralUnitProperties) {
96
                                if (su.getProperty(pName) == null) {
97
                                        println "Error: no '$pName' structure property in the '$corpus' corpus"
98
                                        ok = false;
99
                                }
100
                        }
101
                }
102
        }
103
        if (!ok) return;
104
} catch(Exception e) {
105
        println "Error: $e"
106
        Log.printStackTrace(e)
107
        return
108
}
109

    
110

    
111

    
112
//Running
113
PartitionQueriesGenerator pqg = new PartitionQueriesGenerator();
114
try {
115
        def partition = pqg.createPartition(corpus, debug,
116
                        partitionName, partNamePrefix,
117
                        structuralUnits, structuralUnitProperties,
118
                        structuralUnitToIgnore, expandTarget);
119

    
120
        if (partition != null)        {
121
                monitor.syncExec(new Runnable() {
122
                                        public void run() {
123
                                                CorporaView.refresh();
124
                                                CorporaView.expand(partition.getParent());
125
                                        }
126
                                });
127
        }
128
} catch(Exception e) {
129
        println "Exception $e"
130
        Log.printStackTrace(e);
131
        return;
132
}
133

    
134
/**
135
 * Create a list of queries and part names regarding the structural units, structural units properties, structural units to ignore user defined lists and expand target value specified.
136
 * @author s
137
 *
138
 */
139
public class PartitionQueriesGenerator        {
140

    
141
        public boolean DEBUG = false;                                        // si DEBUG, alors les requêtes sont affichées mais la partition n'est pas créée
142

    
143
        public String PARTITION_NAME = "";                                // Nom de la partition (optionnel)
144
        public def STRUCTURAL_UNITS = [];                                // Liste des unités structurelles sur lesquelles effectuer la partition, ex: ['text', 'div1']
145
        public def STRUCTURAL_UNITS_PROPERTIES = [];        // Propriétés des unités structurelles sur lesquelles effectuer la partition, ex : ['id', 'name']
146
        public def STRUCTURAL_UNITS_TO_IGNORE = [];        // Structure à ignorer, ex. CQL : !speaker (optionnel)
147
        public String PART_NAMES_PREFIX = '';                        // Prefix pour les noms de partie (optionnel)
148
        public String EXPAND_TARGET = null;                        // Expand to target, englobe les empans jusqu'à la balise parente spécifiée.
149
        // NOTE : Le expand entre en conflit avec les sections à ignorer.
150
        // Si la target est à un niveau supérieur aux balises à ignorer, il les remet dans liste de résultats CWB et elles ne sont donc pas ignorées
151

    
152
        public def queries = [];
153
        public def partNames = [];
154

    
155
        /**
156
         * Init the generator and process.
157
         * @param corpusName
158
         */
159
        public Partition createPartition(Corpus corpus, boolean debug,
160
                        String partitionName, String partNamePrefix,
161
                        def structuralUnits, def structuralUnitProperties,
162
                        def structuralUnitToIgnore, String expandTarget)         {
163

    
164
                PARTITION_NAME = partitionName
165
                STRUCTURAL_UNITS = structuralUnits
166
                STRUCTURAL_UNITS_PROPERTIES = structuralUnitProperties
167
                STRUCTURAL_UNITS_TO_IGNORE = structuralUnitToIgnore
168
                PART_NAMES_PREFIX = partNamePrefix
169
                EXPAND_TARGET = expandTarget
170
                DEBUG = debug;
171

    
172
                if (DEBUG) {
173
                        println "Arguments: "
174
                        println "PARTITION_NAME = $PARTITION_NAME"
175
                        println "STRUCTURAL_UNITS = $STRUCTURAL_UNITS"
176
                        println "STRUCTURAL_UNITS_PROPERTIES = $STRUCTURAL_UNITS_PROPERTIES"
177
                        println "STRUCTURAL_UNITS_TO_IGNORE = $STRUCTURAL_UNITS_TO_IGNORE"
178
                        println "PART_NAMES_PREFIX = $PART_NAMES_PREFIX"
179
                        println "EXPAND_TARGET = $EXPAND_TARGET"
180
                        println "DEBUG = $DEBUG"
181
                }
182

    
183
                if (STRUCTURAL_UNITS.size() > 0 && STRUCTURAL_UNITS.size() == STRUCTURAL_UNITS_PROPERTIES.size())        {
184

    
185
                        if (DEBUG) println 'Creating the queries on corpus "' + corpus + "'" ;
186
                        if (DEBUG) println 'PARTITION_NAME: ' + PARTITION_NAME;
187

    
188
                        //Corpus corpus = CorpusManager.getCorpusManager().getCorpus(corpusName);
189

    
190
                        // Recursing through the corpus and subcorpus
191
                        process(corpus, 0, '', '');
192

    
193
                        // Displaying the queries
194
                        if (DEBUG) {
195
                                println "Queries processed: "
196
                                for (int i = 0 ; i < queries.size() ; i++) {
197
                                        println partNames[i] + " = " + queries[i]
198
                                }
199
                        }
200

    
201
                        // Finalizing the queries
202
                        finalizeQueries();
203

    
204
                        // Displaying the queries
205
                        if (DEBUG) {
206
                                println "Queries finalized: "
207
                                for (int i = 0 ; i < queries.size() ; i++) {
208
                                        println partNames[i] + " = " + queries[i]
209
                                }
210
                        }
211

    
212
                        if (DEBUG) println 'Queries created.';
213

    
214
                        // Creating the partition
215
                        if (!DEBUG  && queries.size() == partNames.size()) {
216
                                return corpus.createPartition(PARTITION_NAME, queries, partNames);
217
                        }
218
                } else {
219
                        println "Error: Structural units count or structural units properties count error.";
220
                        return null
221
                }
222
        }
223

    
224
        /**
225
         * Recurse through structural units and structural units properties of corpus and create the queries and the part names. 
226
         * @param corpus the corpus or subcorpus
227
         * @param index the index for recursion
228
         * @param tmpQuery the temporary query for creating subcorpus part
229
         * @param tmpPartName the temporary part name of the subcorpus part
230
         */
231
        protected void process(Corpus corpus, int index, String tmpQuery, String tmpPartName)        {
232
                // End of array
233
                if (index >= STRUCTURAL_UNITS.size()) {
234

    
235
                        queries.add(tmpQuery);
236
                        partNames.add(PART_NAMES_PREFIX + tmpPartName);
237

    
238
                        return;
239
                }
240

    
241
                StructuralUnit su = corpus.getStructuralUnit(STRUCTURAL_UNITS[index]);
242
                StructuralUnitProperty sup = su.getProperty(STRUCTURAL_UNITS_PROPERTIES[index]);
243

    
244
                if (DEBUG) {
245
                        if (index == 0)        {
246
                                println 'Pocessing Structural Unit Property "' + sup.getFullName() + '" on mother corpus "' + corpus.getName() + '"';
247
                        } else {
248
                                println 'Pocessing Structural Unit Property "' + sup.getFullName() + '" on subcorpus part "' + tmpPartName + '"';
249
                        }
250
                }
251

    
252

    
253
                // Creating the queries parts for each structural units properties values
254
                // for (supValue in sup.getOrderedValues()) { // TODO : signaler bug Matthieu, on ne devrait pas être obligé de repasser le sous-corpus à la méthode car sup a déjà été créée depuis le sous-corpus ? getValues() bugge aussi
255
                for (supValue in sup.getOrderedValues(corpus)) {
256

    
257
                        // TODO : Log
258
                        if (DEBUG) println 'Value "' + supValue + '"';
259

    
260
                        // Getting the subcorpus linked to the structural unit property value
261
                        Subcorpus tmpSubcorpus = corpus.createSubcorpusWithQueryString(su, sup, supValue, "tmp" + UUID.randomUUID());
262

    
263
                        // Partition conditions and part name separators
264
                        String and = '';
265
                        String underscore = '';
266
                        if (tmpQuery != '')        {
267
                                underscore = '';
268
                                and = ' & ';
269
                        }
270

    
271
                        process(tmpSubcorpus, index + 1, (tmpQuery + and + '_.' + sup.getFullName() + '="' + supValue + '"'), tmpPartName + underscore + supValue);
272

    
273
                        // Deleting the temporary subcorpus
274
                        // TODO : bug : cette méthode ne supprime pas le corpus sans doute car il faut que le sous-corpus ne contienne pas d'autres sous-corpus ? le delete() en revanche fonctionne.
275
                        //                        corpus.dropSubcorpus(tmpSubcorpus);
276
                        tmpSubcorpus.delete();
277
                }
278
        }
279

    
280

    
281
        /**
282
         *         Autoname the partition.
283
         * @param partitionName
284
         */
285
        protected void autoNamePartition(String partitionName) {
286

    
287
                // Structural units names and properties
288
                for (int i = 0; i < STRUCTURAL_UNITS.size(); i++) {
289
                        partitionName +=  STRUCTURAL_UNITS[i] + '_' + STRUCTURAL_UNITS_PROPERTIES[i] + ' x ';
290
                }
291

    
292
                // Structural units to ignore
293
                for (int i = 0; i < STRUCTURAL_UNITS_TO_IGNORE.size(); i++) {
294
                        partitionName +=  'NOT_' + STRUCTURAL_UNITS_TO_IGNORE[i] + '.';
295
                }
296

    
297
                // Removing last 'x' in partition name
298
                PARTITION_NAME = partitionName.substring(0, partitionName.length() - 3);
299
        }
300

    
301
        /**
302
         * Finalize the queries.
303
         */
304
        protected void finalizeQueries() {
305

    
306
                String expandTo = '';
307
                // Expanding to user defined target
308
                if (EXPAND_TARGET != null && EXPAND_TARGET.length() > 0) {
309
                        expandTo = ' expand to ' + EXPAND_TARGET;
310
                }
311
                // Expanding to last child structural unit in user defined hierarchy
312
                else if(STRUCTURAL_UNITS_TO_IGNORE.size() == 0)         {
313
                        expandTo = ' expand to ' + STRUCTURAL_UNITS[STRUCTURAL_UNITS.size() - 1];
314
                }
315

    
316
                // Autonaming the partition
317
                if (PARTITION_NAME != null && PARTITION_NAME.length() == 0) {
318
                        autoNamePartition(PARTITION_NAME);
319
                        // Finalizing partition name
320
                        if (EXPAND_TARGET != null && EXPAND_TARGET.length() > 0)
321
                                PARTITION_NAME += expandTo.replace(' expand to', '.EXPAND TO').replace(' ', '_');
322
                }
323

    
324
                // Finalizing queries
325
                for (int j = 0; j < queries.size(); j++) {
326

    
327
                        String queryEnd = '';
328

    
329
                        // Removing some sections
330
                        for (sectionToIgnore in STRUCTURAL_UNITS_TO_IGNORE) {
331
                                queryEnd += ' & !' + sectionToIgnore;
332
                        }
333

    
334
                        queryEnd += ']' + expandTo;
335
                        queries.set(j, '[' +  queries.get(j) + queryEnd);
336
                }
337
        }
338
}