Statistics
| Revision:

root / tmp / org.txm.core / src / groovy / org / txm / macro / commands / CrossedPartitionBuilderMacro.groovy @ 187

History | View | Annotate | Download (11.2 kB)

1
package org.txm.macro.commands
2

    
3
import groovy.transform.Field
4

    
5
import org.kohsuke.args4j.*
6
import org.txm.rcpapplication.swt.widget.parameters.*
7
import org.txm.rcpapplication.views.*
8
import org.txm.searchengine.cqp.corpus.*
9
import org.txm.utils.logger.Log
10

    
11
//BEGINNING OF PARAMETERS
12
def corpus = corpusViewSelection
13
if (!(corpus instanceof Corpus)) {
14
        println "Error: this macro should be run with a Corpus selected"
15
        return
16
}
17

    
18
@Field @Option(name="structuralUnit", usage="the structural Unit to use", widget="String", required=true, def="text")
19
String structuralUnit = "text"
20

    
21
@Field @Option(name="structuralUnitPropertiesList", usage="the structural Unit properties list separated with commas", widget="String", required=true, def="p1,p2")
22
def structuralUnitPropertiesList = "loc, type";
23
                
24
//@Field @Option(name="expandTarget", usage="Expand structure", widget="String", required=false, def="")
25
def expandTarget = "";
26

    
27
//@Field @Option(name="structuralUnitToIgnore", usage="the structural Unit properties list to ignore separated with commas", widget="String", required=false, def="")
28
def structuralUnitToIgnore = "";
29

    
30
//@Field @Option(name="partitionName", usage="The partition name to use", widget="String", required=false, def="")
31
def partitionName = ""
32

    
33
//@Field @Option(name="partNamePrefix", usage="the part prefix to use", widget="String", required=true, def="")
34
def partNamePrefix = ""
35

    
36
@Field @Option(name="debug", usage="If debug, then show only queries", widget="Boolean", required=false, def="false")
37
def debug = true;
38

    
39
//Open the parameters input dialog box
40
if (!ParametersDialog.open(this)) return;
41

    
42
//END OF PARAMETERS
43

    
44
/**
45
 * Create partition with advanced CQL queries and autoname the parts.
46
 * Can create partitions by defining a multi-level structural units hierarchy or by defining several properties values for one structural unit.
47
 * Can define some structural units to remove from the resulting parts subcorpus.
48
 * 
49
 */
50

    
51
// TODO: add this variables to the macro parameters
52

    
53
def structuralUnits = []
54
def structuralUnitProperties = []
55
structuralUnit = structuralUnit.trim()
56
structuralUnitPropertiesList = structuralUnitPropertiesList.trim()
57

    
58
// Build lists
59
def split = structuralUnit.split(",")
60
def split2 = structuralUnitPropertiesList.split(",")
61
if (split.size() == 1) {
62
        for (String p : split2) {
63
                p = p.trim();
64
                structuralUnits << structuralUnit
65
                structuralUnitProperties << p
66
        }
67
} else {
68
        if (split.size() == split2.size()) {
69
                for (int i = 0 ; i < split.size() ; i++) {
70
                        def su = split[i]
71
                        def p = split2[i];
72
                        structuralUnits << su
73
                        structuralUnitProperties << p
74
                }
75
        } else {
76
                println "Error: structuralUnitList size is different from structuralUnitPropertiesList"
77
                println "structuralUnitList=structuralUnitList=$structuralUnit structuralUnitPropertiesList=$structuralUnitPropertiesList"
78
                return false
79
        }
80
}
81

    
82
// check properties existance
83
try {
84
        boolean ok = true;
85
        for (int i = 0 ; i < structuralUnits.size() ; i++) {
86
                def suName = structuralUnits[i]
87
                def pName = structuralUnitProperties[i]
88
                
89
                if (corpus.getStructuralUnit(suName) == null) {
90
                        println "Error: no '$suName' structure in the '$corpus' corpus"
91
                        ok = false;
92
                } else {
93
                        def su = corpus.getStructuralUnit(suName)
94
                        for (String p : structuralUnitProperties) {
95
                                if (su.getProperty(pName) == null) {
96
                                        println "Error: no '$pName' structure property in the '$corpus' corpus"
97
                                        ok = false;
98
                                }
99
                        }
100
                }
101
        }
102
        if (!ok) return;
103
} catch(Exception e) {
104
        println "Error: $e"
105
        Log.printStackTrace(e)
106
        return
107
}
108

    
109

    
110

    
111
//Running
112
PartitionQueriesGenerator pqg = new PartitionQueriesGenerator();
113
try {
114
        def partition = pqg.createPartition(corpus, debug,
115
                        partitionName, partNamePrefix,
116
                        structuralUnits, structuralUnitProperties,
117
                        structuralUnitToIgnore, expandTarget);
118

    
119
        if (partition != null)        {
120
                monitor.syncExec(new Runnable() {
121
                                        public void run() {
122
                                                CorporaView.refresh();
123
                                                CorporaView.expand(partition.getParent());
124
                                        }
125
                                });
126
        }
127
} catch(Exception e) {
128
        println "Exception $e"
129
        Log.printStackTrace(e);
130
        return;
131
}
132

    
133
/**
134
 * Create a list of queries and part names regarding the structural units, structural units properties, structural units to ignore user defined lists and expand target value specified.
135
 * @author s
136
 *
137
 */
138
public class PartitionQueriesGenerator        {
139

    
140
        public boolean DEBUG = false;                                        // si DEBUG, alors les requêtes sont affichées mais la partition n'est pas créée
141

    
142
        public String PARTITION_NAME = "";                                // Nom de la partition (optionnel)
143
        public def STRUCTURAL_UNITS = [];                                // Liste des unités structurelles sur lesquelles effectuer la partition, ex: ['text', 'div1']
144
        public def STRUCTURAL_UNITS_PROPERTIES = [];        // Propriétés des unités structurelles sur lesquelles effectuer la partition, ex : ['id', 'name']
145
        public def STRUCTURAL_UNITS_TO_IGNORE = [];        // Structure à ignorer, ex. CQL : !speaker (optionnel)
146
        public String PART_NAMES_PREFIX = '';                        // Prefix pour les noms de partie (optionnel)
147
        public String EXPAND_TARGET = null;                        // Expand to target, englobe les empans jusqu'à la balise parente spécifiée.
148
        // NOTE : Le expand entre en conflit avec les sections à ignorer.
149
        // Si la target est à un niveau supérieur aux balises à ignorer, il les remet dans liste de résultats CWB et elles ne sont donc pas ignorées
150

    
151
        public def queries = [];
152
        public def partNames = [];
153

    
154
        /**
155
         * Init the generator and process.
156
         * @param corpusName
157
         */
158
        public Partition createPartition(Corpus corpus, boolean debug,
159
                        String partitionName, String partNamePrefix,
160
                        def structuralUnits, def structuralUnitProperties,
161
                        def structuralUnitToIgnore, String expandTarget)         {
162

    
163
                PARTITION_NAME = partitionName
164
                STRUCTURAL_UNITS = structuralUnits
165
                STRUCTURAL_UNITS_PROPERTIES = structuralUnitProperties
166
                STRUCTURAL_UNITS_TO_IGNORE = structuralUnitToIgnore
167
                PART_NAMES_PREFIX = partNamePrefix
168
                EXPAND_TARGET = expandTarget
169
                DEBUG = debug;
170

    
171
                if (DEBUG) {
172
                        println "Arguments: "
173
                        println "PARTITION_NAME = $PARTITION_NAME"
174
                        println "STRUCTURAL_UNITS = $STRUCTURAL_UNITS"
175
                        println "STRUCTURAL_UNITS_PROPERTIES = $STRUCTURAL_UNITS_PROPERTIES"
176
                        println "STRUCTURAL_UNITS_TO_IGNORE = $STRUCTURAL_UNITS_TO_IGNORE"
177
                        println "PART_NAMES_PREFIX = $PART_NAMES_PREFIX"
178
                        println "EXPAND_TARGET = $EXPAND_TARGET"
179
                        println "DEBUG = $DEBUG"
180
                }
181

    
182
                if (STRUCTURAL_UNITS.size() > 0 && STRUCTURAL_UNITS.size() == STRUCTURAL_UNITS_PROPERTIES.size())        {
183

    
184
                        if (DEBUG) println 'Creating the queries on corpus "' + corpus + "'" ;
185
                        if (DEBUG) println 'PARTITION_NAME: ' + PARTITION_NAME;
186

    
187
                        //Corpus corpus = CorpusManager.getCorpusManager().getCorpus(corpusName);
188

    
189
                        // Recursing through the corpus and subcorpus
190
                        process(corpus, 0, '', '');
191

    
192
                        // Displaying the queries
193
                        if (DEBUG) {
194
                                println "Queries processed: "
195
                                for (int i = 0 ; i < queries.size() ; i++) {
196
                                        println partNames[i] + " = " + queries[i]
197
                                }
198
                        }
199

    
200
                        // Finalizing the queries
201
                        finalizeQueries();
202

    
203
                        // Displaying the queries
204
                        if (DEBUG) {
205
                                println "Queries finalized: "
206
                                for (int i = 0 ; i < queries.size() ; i++) {
207
                                        println partNames[i] + " = " + queries[i]
208
                                }
209
                        }
210

    
211
                        if (DEBUG) println 'Queries created.';
212

    
213
                        // Creating the partition
214
                        if (!DEBUG  && queries.size() == partNames.size()) {
215
                                return corpus.createPartition(PARTITION_NAME, queries, partNames);
216
                        }
217
                } else {
218
                        println "Error: Structural units count or structural units properties count error.";
219
                        return null
220
                }
221
        }
222

    
223
        /**
224
         * Recurse through structural units and structural units properties of corpus and create the queries and the part names. 
225
         * @param corpus the corpus or subcorpus
226
         * @param index the index for recursion
227
         * @param tmpQuery the temporary query for creating subcorpus part
228
         * @param tmpPartName the temporary part name of the subcorpus part
229
         */
230
        protected void process(Corpus corpus, int index, String tmpQuery, String tmpPartName)        {
231
                // End of array
232
                if (index >= STRUCTURAL_UNITS.size()) {
233

    
234
                        queries.add(tmpQuery);
235
                        partNames.add(PART_NAMES_PREFIX + tmpPartName);
236

    
237
                        return;
238
                }
239

    
240
                StructuralUnit su = corpus.getStructuralUnit(STRUCTURAL_UNITS[index]);
241
                StructuralUnitProperty sup = su.getProperty(STRUCTURAL_UNITS_PROPERTIES[index]);
242

    
243
                if (DEBUG) {
244
                        if (index == 0)        {
245
                                println 'Pocessing Structural Unit Property "' + sup.getFullName() + '" on mother corpus "' + corpus.getName() + '"';
246
                        } else {
247
                                println 'Pocessing Structural Unit Property "' + sup.getFullName() + '" on subcorpus part "' + tmpPartName + '"';
248
                        }
249
                }
250

    
251

    
252
                // Creating the queries parts for each structural units properties values
253
                // for (supValue in sup.getOrderedValues()) { // TODO : signaler bug Matthieu, on ne devrait pas être obligé de repasser le sous-corpus à la méthode car sup a déjà été créée depuis le sous-corpus ? getValues() bugge aussi
254
                for (supValue in sup.getOrderedValues(corpus)) {
255

    
256
                        // TODO : Log
257
                        if (DEBUG) println 'Value "' + supValue + '"';
258

    
259
                        // Getting the subcorpus linked to the structural unit property value
260
                        Subcorpus tmpSubcorpus = corpus.createSubcorpusWithQueryString(su, sup, supValue, "tmp" + UUID.randomUUID());
261

    
262
                        // Partition conditions and part name separators
263
                        String and = '';
264
                        String underscore = '';
265
                        if (tmpQuery != '')        {
266
                                underscore = '';
267
                                and = ' & ';
268
                        }
269

    
270
                        process(tmpSubcorpus, index + 1, (tmpQuery + and + '_.' + sup.getFullName() + '="' + supValue + '"'), tmpPartName + underscore + supValue);
271

    
272
                        // Deleting the temporary subcorpus
273
                        // TODO : bug : cette méthode ne supprime pas le corpus sans doute car il faut que le sous-corpus ne contienne pas d'autres sous-corpus ? le delete() en revanche fonctionne.
274
                        //                        corpus.dropSubcorpus(tmpSubcorpus);
275
                        tmpSubcorpus.delete();
276
                }
277
        }
278

    
279

    
280
        /**
281
         *         Autoname the partition.
282
         * @param partitionName
283
         */
284
        protected void autoNamePartition(String partitionName) {
285

    
286
                // Structural units names and properties
287
                for (int i = 0; i < STRUCTURAL_UNITS.size(); i++) {
288
                        partitionName +=  STRUCTURAL_UNITS[i] + '_' + STRUCTURAL_UNITS_PROPERTIES[i] + ' x ';
289
                }
290

    
291
                // Structural units to ignore
292
                for (int i = 0; i < STRUCTURAL_UNITS_TO_IGNORE.size(); i++) {
293
                        partitionName +=  'NOT_' + STRUCTURAL_UNITS_TO_IGNORE[i] + '.';
294
                }
295

    
296
                // Removing last 'x' in partition name
297
                PARTITION_NAME = partitionName.substring(0, partitionName.length() - 3);
298
        }
299

    
300
        /**
301
         * Finalize the queries.
302
         */
303
        protected void finalizeQueries() {
304

    
305
                String expandTo = '';
306
                // Expanding to user defined target
307
                if (EXPAND_TARGET != null && EXPAND_TARGET.length() > 0) {
308
                        expandTo = ' expand to ' + EXPAND_TARGET;
309
                }
310
                // Expanding to last child structural unit in user defined hierarchy
311
                else if(STRUCTURAL_UNITS_TO_IGNORE.size() == 0)         {
312
                        expandTo = ' expand to ' + STRUCTURAL_UNITS[STRUCTURAL_UNITS.size() - 1];
313
                }
314

    
315
                // Autonaming the partition
316
                if (PARTITION_NAME != null && PARTITION_NAME.length() == 0) {
317
                        autoNamePartition(PARTITION_NAME);
318
                        // Finalizing partition name
319
                        if (EXPAND_TARGET != null && EXPAND_TARGET.length() > 0)
320
                                PARTITION_NAME += expandTo.replace(' expand to', '.EXPAND TO').replace(' ', '_');
321
                }
322

    
323
                // Finalizing queries
324
                for (int j = 0; j < queries.size(); j++) {
325

    
326
                        String queryEnd = '';
327

    
328
                        // Removing some sections
329
                        for (sectionToIgnore in STRUCTURAL_UNITS_TO_IGNORE) {
330
                                queryEnd += ' & !' + sectionToIgnore;
331
                        }
332

    
333
                        queryEnd += ']' + expandTo;
334
                        queries.set(j, '[' +  queries.get(j) + queryEnd);
335
                }
336
        }
337
}