Statistics
| Revision:

root / tmp / org.txm.groovy.core / src / groovy / org / txm / macro / cqp / CQPUtils.groovy @ 2051

History | View | Annotate | Download (7.8 kB)

1
// Copyright © 2017 ENS de Lyon, CNRS, University of Franche-Comté
2
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses)
3
// @author sheiden
4

    
5
package org.txm.macro.cqp
6

    
7
import org.txm.searchengine.cqp.corpus.*
8

    
9
def getCorpusByName(name) {
10
        def cl = CorpusManager.getCorpusManager().getCorpora()
11
        def rc = cl.find {
12
                it.getName() == name
13
        }
14
        
15
        if (rc) return rc
16
        
17
        def rsc = null
18
        rc = cl.find { c ->
19
                def scl = c.getSubcorpora()
20
                def rrsc = scl.find { sc ->
21
                        sc.getName() == name
22
                }
23
                rsc = rrsc
24
        }
25

    
26
        return rsc
27
}
28

    
29
def getCorpora(def script) {
30

    
31
        def scriptName = this.class.getSimpleName()
32

    
33
        if (script.binding.variables["args"]) {
34
                def args = script.binding.variables["args"]
35
                if (args["corpus"]) {
36
                        return [getCorpusByName(args["corpus"])]
37
                } else if (args["corpora"]) {
38
                        return args["corpora"].tokenize(',').collect {
39
                                return getCorpusByName(it)
40
                        }
41
                } else {
42
                        if (script.corpusViewSelection == null || !(script.corpusViewSelection instanceof Subcorpus || script.corpusViewSelection instanceof MainCorpus)) {
43
                                return null
44
                        }
45
                        return script.corpusViewSelections.findAll { it instanceof Subcorpus || it instanceof MainCorpus }
46
                }
47
        } else {
48
                if (script.corpusViewSelection == null || !(script.corpusViewSelection instanceof Subcorpus || script.corpusViewSelection instanceof MainCorpus)) {
49
                        return null
50
                }
51
                return script.corpusViewSelections.findAll { it instanceof Subcorpus || it instanceof MainCorpus }
52
        }
53

    
54
}
55

    
56
def newTestSubCorpusName(name) {
57
        def cl = CorpusManager.getCorpusManager().getCorpora()
58
        def rc = cl.find {
59
                it.getName() == name
60
        }
61
        
62
        if (!rc) {
63
                println "** corpus '"+name+"' not found."
64
                return null
65
        }
66
        
67
        def scl = rc.getSubcorpora()
68
        def n = 1
69
        def loop = true
70
        while (loop) {
71
                def rsc = scl.find { sc ->
72
                        sc.getName() == "CORPUS"+n
73
                }
74
                if (rsc) n++ else loop = false
75
        }
76

    
77
        return "CORPUS"+n
78
}
79

    
80
def positions2cql(matches) {
81
        // [0],[1],[2-3],[4-5],[6]
82
        // 0,1,2-3,4-5,6
83
        // "(n1:[])|(n2:[] []{1}) :: (n1 = 0)|(n1 = 1)|(n1 = 6)|(n2 = 2)|(n2 = 4)"
84

    
85
        def singletons = []
86
        def intervals = [:]
87
        matches.tokenize(',').each { interval ->
88
                if (interval.indexOf('-') != -1) { // start-end
89
                        (start, end) = interval.tokenize('-') as int[]
90
                        l = end-start
91
                        if (intervals[l]) {
92
                                intervals.put(l, intervals[l] << start)
93
                        } else {
94
                                intervals.put(l, [start])
95
                        }
96
                } else { // singleton
97
                        singletons << interval.toInteger()
98
                }
99
        }
100
        
101
        def singletonPositions = ""
102
        def intervalsCQLs = ""
103
        def intervalsPositions = ""
104
        
105
        // "(n1:[])|(n2:[] []{1}) :: (n1 = 0)|(n1 = 1)|(n1 = 6)|(n2 = 2)|(n2 = 4)"
106

    
107
        def firstSingleton = true
108
        singletons.each {
109
                if (!firstSingleton) singletonPositions += "|"
110
                firstSingleton = false
111
                singletonPositions += "n1="+it+""
112
        }
113

    
114
        def firstInterval = true
115
        intervals.each { length, positions ->
116
                if (!firstInterval) intervalsCQLs += "|"
117
                if (!firstInterval) intervalsPositions += "|"
118
                firstInterval = false
119
                intervalsCQLs += "n"+(length+1)+":[][]{"+length+"}"
120
                def firstPosition = true
121
                positions.each {
122
                        if (!firstPosition) intervalsPositions += "|"
123
                        firstPosition = false
124
                        intervalsPositions += "n"+(length+1)+"="+it
125
                }
126
        }
127

    
128
        def q = ""
129
        if (singletons.size() > 0) {
130
                q += "n1:[]"
131
        }
132
        if (singletons.size() > 0 && intervals.size() > 0) {
133
                q += "|"
134
        }
135
        if (intervals.size() > 0) {
136
                q += intervalsCQLs
137
        }
138
        q += "::"
139
        if (singletons.size() > 0) {
140
                q += singletonPositions
141
        }
142
        if (singletons.size() > 0 && intervals.size() > 0) {
143
                q += "|"
144
        }
145
        if (intervals.size() > 0) {
146
                q += intervalsPositions
147
        }
148
        
149
        return q
150
        
151
}
152

    
153
def corpus2positions(def corpus, def maxMatches, def pretty = false) {
154
        
155
        def res = ""
156
        def matches = corpus.getMatches()
157

    
158
        if (matches.size() == 0) {
159
                return "<empty>"
160
        } else {
161
                def first = 1
162
                matches.take(maxMatches).each {
163
                        if (first) { first = 0 } else {
164
                                if (pretty) {
165
                                        res += ", "
166
                                } else {
167
                                        res += ","
168
                                }
169
                        }
170
                        // res += "["
171
                        if (it.getStart() < it.getEnd()) {
172
                                res += it.getStart()+"-"+it.getEnd()
173
                        } else {
174
                                res += it.getStart()
175
                        }
176
                        // res += "]"
177
                }
178
        }
179

    
180
        if (matches.size() > maxMatches) {
181
                res += "..."
182
        }
183
        
184
        return res
185
}
186

    
187
methods = this.getClass().declaredMethods.findAll { !it.synthetic }.name.sort().unique()
188
methods.removeAll(['main', 'run'])
189
println "** CQPUtils is not meant to be called directly.\nPlease import its methods ${methods}\nwith the following declaration:\nimport org.txm.macro.cqp.CQPUtilsMacro"
190

    
191
/*
192
          ___           ___           ___           ___           ___           ___
193
         /\  \         /\__\         /\__\         /\  \         |\__\         /\  \
194
        /::\  \       /::|  |       /::|  |       /::\  \        |:|  |       /::\  \
195
   /:/\:\  \     /:|:|  |      /:|:|  |      /:/\:\  \       |:|  |      /:/\:\  \
196
  /::\~\:\  \   /:/|:|  |__   /:/|:|  |__   /::\~\:\  \      |:|__|__   /::\~\:\  \
197
 /:/\:\ \:\__\ /:/ |:| /\__\ /:/ |:| /\__\ /:/\:\ \:\__\ ____/::::\__\ /:/\:\ \:\__\
198
 \/__\:\/:/  / \/__|:|/:/  / \/__|:|/:/  / \:\~\:\ \/__/ \::::/~~/~    \:\~\:\ \/__/
199
          \::/  /      |:/:/  /      |:/:/  /   \:\ \:\__\    ~~|:|~~|      \:\ \:\__\
200
          /:/  /       |::/  /       |::/  /     \:\ \/__/      |:|  |       \:\ \/__/
201
         /:/  /        /:/  /        /:/  /       \:\__\        |:|  |        \:\__\
202
         \/__/         \/__/         \/__/         \/__/         \|__|         \/__/
203

204

205

206
0 1 2 3 4 5 6 7 8 9
207

208
+ + + +
209
0 1 2 3 4 5 6 7 8 9
210
+0,+1,+2,+3
211
n:[] :: n = 0|n = 1|n = 2|n = 3
212

213
[ ] [ ]
214
0 1 2 3 4 5 6 7 8 9
215
0-1,2-3
216
n:[] []{1} :: n = 0
217
n:[] []{1} :: n = 2
218

219
[   ] +
220
0 1 2 3 4 5 6 7 8 9
221
0-2,+3
222
n:[] []{2} :: n = 0
223
n:[] :: n = 3
224

225

226
 */
227

    
228
/*
229

230
Touching intervals mergers
231
==========================
232

233
A) from http://stackoverflow.com/questions/31670849/merge-overlapping-intervals
234

235
import java.util.*;
236

237
public class Ideone
238
{
239
        public static void main (String[] args) throws java.lang.Exception
240
        {
241
                ArrayList<Interval> x = new ArrayList<>();
242

243
                x.add(new Interval(1, 3));
244
                x.add(new Interval(2, 6));
245
                x.add(new Interval(8, 10));
246
                x.add(new Interval(15, 18));
247
                x.add(new Interval(17, 20));
248

249
                x = merge(x);
250

251
                for(Interval i : x)
252
                {
253
                        System.out.println(i.getStart() + " " + i.getEnd());
254
                }
255
        }
256

257
        public static ArrayList<Interval> merge(ArrayList<Interval> intervals) {
258

259
                if(intervals.size() == 0 || intervals.size() == 1)
260
                        return intervals;
261

262
                Collections.sort(intervals, new IntervalComparator());
263

264
                Interval first = intervals.get(0);
265
                int start = first.getStart();
266
                int end = first.getEnd();
267

268
                ArrayList<Interval> result = new ArrayList<Interval>();
269

270
                for (int i = 1; i < intervals.size(); i++) {
271
                        Interval current = intervals.get(i);
272
                        if (current.getStart() <= end) {
273
                                end = Math.max(current.getEnd(), end);
274
                        } else {
275
                                result.add(new Interval(start, end));
276
                                start = current.getStart();
277
                                end = current.getEnd();
278
                        }
279
                }
280

281
                result.add(new Interval(start, end));
282
                return result;
283
        }
284
}
285

286
class Interval
287
{
288
        private int start;
289
        private int end;
290

291
        Interval() {
292
                start = 0;
293
                end = 0;
294
        }
295

296
        Interval(int s, int e)
297
        {
298
                start = s;
299
                end = e;
300
        }
301

302
        public int getStart() {
303
                return start;
304
        }
305

306
        public int getEnd() {
307
                return end;
308
        }
309
}
310

311
class IntervalComparator implements Comparator<Interval>
312
{
313
        public int compare(Interval i1, Interval i2)
314
        {
315
                return i1.getStart() - i2.getStart();
316
        }
317
}
318

319
B) from http://www.programcreek.com/2012/12/leetcode-merge-intervals/
320

321
public List<Interval> merge(List<Interval> intervals) {
322
        List<Interval> result = new ArrayList<Interval>();
323
 
324
        if(intervals==null||intervals.size()==0)
325
                return result;
326
 
327
        Collections.sort(intervals, new Comparator<Interval>(){
328
                public int compare(Interval i1, Interval i2){
329
                        if(i1.start!=i2.start)
330
                                return i1.start-i2.start;
331
                        else
332
                                return i1.end-i2.end;
333
                }
334
        });
335
 
336
        Interval pre = intervals.get(0);
337
        for(int i=0; i<intervals.size(); i++){
338
                Interval curr = intervals.get(i);
339
                if(curr.start>pre.end){
340
                        result.add(pre);
341
                        pre = curr;
342
                }else{
343
                        Interval merged = new Interval(pre.start, Math.max(pre.end, curr.end));
344
                        pre = merged;
345
                }
346
        }
347
        result.add(pre);
348
 
349
        return result;
350
}
351
 */