Statistics
| Revision:

root / tmp / org.txm.analec.rcp / src / org / txm / macro / urs / exploit / CQPUtils.groovy @ 1217

History | View | Annotate | Download (8.3 kB)

1
// Copyright © 2017 ENS de Lyon, CNRS, University of Franche-Comté
2
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses)
3
// @author sheiden
4

    
5
package org.txm.macro.urs.exploit
6

    
7
import org.txm.searchengine.cqp.corpus.*
8

    
9
def getCorpusByName(name) {
10
        def cl = CorpusManager.getCorpusManager().getCorpora()
11
        def rc = cl.find {
12
                it.getID() == name
13
        }
14
        
15
        if (rc) return rc
16
        
17
        def rsc = null
18
        rc = cl.find { c ->
19
                def scl = c.getSubcorpora()
20
                def rrsc = scl.find { sc ->
21
                        sc.getID() == name
22
                }
23
                rsc = rrsc
24
        }
25

    
26
        return rsc
27
}
28

    
29
def getCorpora(def script) {
30

    
31
        def scriptName = this.class.getSimpleName()
32

    
33
        if (script.binding.variables["args"]) {
34
                def args = script.binding.variables["args"]
35
                if (args["corpus"]) {
36
                        return [getCorpusByName(args["corpus"])]
37
                } else if (args["corpora"]) {
38
                        return args["corpora"].tokenize(',').collect {
39
                                return getCorpusByName(it)
40
                        }
41
                } else {
42
                        if (script.corpusViewSelection == null || !(script.corpusViewSelection instanceof Subcorpus || script.corpusViewSelection instanceof MainCorpus)) {
43
                                return null
44
                        }
45
                        return script.corpusViewSelections.findAll { it instanceof Subcorpus || it instanceof MainCorpus }
46
                }
47
        } else {
48
                if (script.corpusViewSelection == null || !(script.corpusViewSelection instanceof Subcorpus || script.corpusViewSelection instanceof MainCorpus)) {
49
                        return null
50
                }
51
                return script.corpusViewSelections.findAll { it instanceof Subcorpus || it instanceof MainCorpus }
52
        }
53

    
54
}
55

    
56
def newTestSubCorpusName(name) {
57
        def cl = CorpusManager.getCorpusManager().getCorpora()
58
        def rc = cl.find {
59
                it.getName() == name
60
        }
61
        
62
        if (!rc) {
63
                println "** corpus '"+name+"' not found."
64
                return null
65
        }
66
        
67
        def scl = rc.getSubcorpora()
68
        def n = 1
69
        def loop = true
70
        while (loop) {
71
                def rsc = scl.find { sc ->
72
                        sc.getName() == "CORPUS"+n
73
                }
74
                if (rsc) n++ else loop = false
75
        }
76

    
77
        return "CORPUS"+n
78
}
79

    
80
def positions2cql(matches) {
81
        // [0],[1],[2-3],[4-5],[6]
82
        // 0,1,2-3,4-5,6
83
        // "(n1:[])|(n2:[] []{1}) :: (n1 = 0)|(n1 = 1)|(n1 = 6)|(n2 = 2)|(n2 = 4)"
84

    
85
        def singletons = []
86
        def intervals = [:]
87
        matches.tokenize(',').each { interval ->
88
                if (interval.indexOf('-') != -1) { // start-end
89
                        (start, end) = interval.tokenize('-') as int[]
90
                        l = end-start
91
                        if (intervals[l]) {
92
                                intervals.put(l, intervals[l] << start)
93
                        } else {
94
                                intervals.put(l, [start])
95
                        }
96
                } else { // singleton
97
                        singletons << interval.toInteger()
98
                }
99
        }
100
        
101
        def singletonPositions = ""
102
        def intervalsCQLs = ""
103
        def intervalsPositions = ""
104
        
105
        // "(n1:[])|(n2:[] []{1}) :: (n1 = 0)|(n1 = 1)|(n1 = 6)|(n2 = 2)|(n2 = 4)"
106

    
107
        def firstSingleton = true
108
        singletons.each {
109
                if (!firstSingleton) singletonPositions += "|"
110
                firstSingleton = false
111
                singletonPositions += "n1="+it+""
112
        }
113

    
114
        def firstInterval = true
115
        intervals.each { length, positions ->
116
                if (!firstInterval) intervalsCQLs += "|"
117
                if (!firstInterval) intervalsPositions += "|"
118
                firstInterval = false
119
                intervalsCQLs += "n"+(length+1)+":[][]{"+length+"}"
120
                def firstPosition = true
121
                positions.each {
122
                        if (!firstPosition) intervalsPositions += "|"
123
                        firstPosition = false
124
                        intervalsPositions += "n"+(length+1)+"="+it
125
                }
126
        }
127

    
128
        def q = ""
129
        if (singletons.size() > 0) {
130
                q += "n1:[]"
131
        }
132
        if (singletons.size() > 0 && intervals.size() > 0) {
133
                q += "|"
134
        }
135
        if (intervals.size() > 0) {
136
                q += intervalsCQLs
137
        }
138
        q += "::"
139
        if (singletons.size() > 0) {
140
                q += singletonPositions
141
        }
142
        if (singletons.size() > 0 && intervals.size() > 0) {
143
                q += "|"
144
        }
145
        if (intervals.size() > 0) {
146
                q += intervalsPositions
147
        }
148
        
149
        return q
150
        
151
}
152

    
153
def corpus2positions(def corpus, def maxMatches, def pretty = false) {
154
        
155
        def res = ""
156
        def matches = corpus.getMatches()
157

    
158
        if (matches.size() == 0) {
159
                return "<empty>"
160
        } else {
161
                def first = 1
162
                matches.take(maxMatches).each {
163
                        if (first) { first = 0 } else {
164
                                if (pretty) {
165
                                        res += ", "
166
                                } else {
167
                                        res += ","
168
                                }
169
                        }
170
                        // res += "["
171
                        if (it.getStart() < it.getEnd()) {
172
                                res += it.getStart()+"-"+it.getEnd()
173
                        } else {
174
                                res += it.getStart()
175
                        }
176
                        // res += "]"
177
                }
178
        }
179

    
180
        if (matches.size() > maxMatches) {
181
                res += "..."
182
        }
183
        
184
        return res
185
}
186

    
187
methods = this.getClass().declaredMethods.findAll { !it.synthetic }.name.sort().unique()
188
methods.removeAll(['main', 'run'])
189
println "** CQPUtils is not meant to be called directly.\nPlease import its methods ${methods}\nwith the following declaration:\nimport org.txm.macro.cqp.CQPUtilsMacro"
190

    
191
/*
192
      ___           ___           ___           ___           ___           ___     
193
     /\  \         /\__\         /\__\         /\  \         |\__\         /\  \    
194
    /::\  \       /::|  |       /::|  |       /::\  \        |:|  |       /::\  \   
195
   /:/\:\  \     /:|:|  |      /:|:|  |      /:/\:\  \       |:|  |      /:/\:\  \  
196
  /::\~\:\  \   /:/|:|  |__   /:/|:|  |__   /::\~\:\  \      |:|__|__   /::\~\:\  \ 
197
 /:/\:\ \:\__\ /:/ |:| /\__\ /:/ |:| /\__\ /:/\:\ \:\__\ ____/::::\__\ /:/\:\ \:\__\
198
 \/__\:\/:/  / \/__|:|/:/  / \/__|:|/:/  / \:\~\:\ \/__/ \::::/~~/~    \:\~\:\ \/__/
199
      \::/  /      |:/:/  /      |:/:/  /   \:\ \:\__\    ~~|:|~~|      \:\ \:\__\  
200
      /:/  /       |::/  /       |::/  /     \:\ \/__/      |:|  |       \:\ \/__/  
201
     /:/  /        /:/  /        /:/  /       \:\__\        |:|  |        \:\__\    
202
     \/__/         \/__/         \/__/         \/__/         \|__|         \/__/    
203

204

205

206
0 1 2 3 4 5 6 7 8 9
207

208
+ + + +
209
0 1 2 3 4 5 6 7 8 9
210
+0,+1,+2,+3
211
n:[] :: n = 0|n = 1|n = 2|n = 3
212

213
[ ] [ ]
214
0 1 2 3 4 5 6 7 8 9
215
0-1,2-3
216
n:[] []{1} :: n = 0
217
n:[] []{1} :: n = 2
218

219
[   ] +
220
0 1 2 3 4 5 6 7 8 9
221
0-2,+3
222
n:[] []{2} :: n = 0
223
n:[] :: n = 3
224

225

226
 */
227

    
228
/*
229

230
Touching intervals mergers
231
==========================
232

233
A) from http://stackoverflow.com/questions/31670849/merge-overlapping-intervals
234

235
import java.util.*;
236

237
public class Ideone
238
{   
239
    public static void main (String[] args) throws java.lang.Exception
240
    {
241
        ArrayList<Interval> x = new ArrayList<>();
242

243
        x.add(new Interval(1, 3));
244
        x.add(new Interval(2, 6));
245
        x.add(new Interval(8, 10));
246
        x.add(new Interval(15, 18));
247
        x.add(new Interval(17, 20));
248

249
        x = merge(x);
250

251
        for(Interval i : x)
252
        {
253
            System.out.println(i.getStart() + " " + i.getEnd());
254
        }
255
    }
256

257
    public static ArrayList<Interval> merge(ArrayList<Interval> intervals) {
258

259
        if(intervals.size() == 0 || intervals.size() == 1)
260
            return intervals;
261

262
        Collections.sort(intervals, new IntervalComparator());
263

264
        Interval first = intervals.get(0);
265
        int start = first.getStart();
266
        int end = first.getEnd();
267

268
        ArrayList<Interval> result = new ArrayList<Interval>();
269

270
        for (int i = 1; i < intervals.size(); i++) {
271
            Interval current = intervals.get(i);
272
            if (current.getStart() <= end) {
273
                end = Math.max(current.getEnd(), end);
274
            } else {
275
                result.add(new Interval(start, end));
276
                start = current.getStart();
277
                end = current.getEnd();
278
            }
279
        }
280

281
        result.add(new Interval(start, end));
282
        return result;
283
    }
284
}
285

286
class Interval 
287
{
288
    private int start;
289
    private int end;
290

291
    Interval() {
292
        start = 0;
293
        end = 0;
294
    }
295

296
    Interval(int s, int e) 
297
    {
298
        start = s;
299
        end = e;
300
    }
301

302
    public int getStart() {
303
        return start;
304
    }
305

306
    public int getEnd() {
307
        return end;
308
    }
309
}
310

311
class IntervalComparator implements Comparator<Interval>
312
{
313
    public int compare(Interval i1, Interval i2)
314
    {
315
        return i1.getStart() - i2.getStart();
316
    }
317
}
318

319
B) from http://www.programcreek.com/2012/12/leetcode-merge-intervals/
320

321
public List<Interval> merge(List<Interval> intervals) {
322
    List<Interval> result = new ArrayList<Interval>();
323
 
324
    if(intervals==null||intervals.size()==0)
325
        return result;
326
 
327
    Collections.sort(intervals, new Comparator<Interval>(){
328
        public int compare(Interval i1, Interval i2){
329
            if(i1.start!=i2.start)
330
                return i1.start-i2.start;
331
            else
332
                return i1.end-i2.end;
333
        }
334
    });
335
 
336
    Interval pre = intervals.get(0);
337
    for(int i=0; i<intervals.size(); i++){
338
        Interval curr = intervals.get(i);
339
        if(curr.start>pre.end){
340
            result.add(pre);
341
            pre = curr;
342
        }else{
343
            Interval merged = new Interval(pre.start, Math.max(pre.end, curr.end));
344
            pre = merged;
345
        }
346
    }
347
    result.add(pre);
348
 
349
    return result;
350
}
351
 */