Statistics
| Revision:

root / tmp / org.txm.core / src / groovy / org / txm / export / CooccurrencesFilter.groovy @ 148

History | View | Annotate | Download (1.7 kB)

1
package org.txm.export
2

    
3
// we import the packages containing the functions we are going to use
4
import org.txm.Toolbox
5
import org.txm.searchengine.cqp.corpus.*
6
import org.txm.searchengine.cqp.corpus.query.*
7
import org.txm.functions.cooccurrences.*
8

    
9
//////////// PARAMETERS ////////////
10
def corpus = CorpusManager.getCorpusManager().getCorpus("DISCOURS")
11

    
12
def word = corpus.getProperty("word")
13
def pos = corpus.getProperty("pos")
14
def viewprops = [word, pos]
15

    
16
def query = new Query(Query.fixQuery("je"))
17
def PROPSVALUETOKEEP = "V.+"
18
def PROPNUMBER = 1;
19

    
20
def limit = corpus.getStructuralUnit("s") // contexts by <s>
21
def includeXpivot = false; // used only if limit != null
22
//def limit = null // contexts by word 
23
def minleft = 1 // min = 1
24
def maxleft = 20 // min = 1
25
def minright = 1 // min = 1
26
def maxright = 20 // min = 1
27

    
28
def minf = 0
29
def mincof = 0
30
def minscore = 0
31

    
32
def buildLTWithOnlyCooccurrents = false
33

    
34
//////////// END OF PARAMETERS ////////////
35

    
36
println "computing cooc lines..."
37
long time = System.currentTimeMillis()
38
//TODO temporary commented
39
//def cooccurrence = new Cooccurrence(corpus, query, viewprops, limit, maxleft, minleft, minleft, maxright, minf,mincof, minscore, includeXpivot, buildLTWithOnlyCooccurrents)
40
cooccurrence.process();
41
println("Compute time : "+(System.currentTimeMillis()-time)+" ms")
42

    
43
println "filtering lines..."
44
def lines = cooccurrence.getLines()
45
for(int i = 0 ; i < lines.size(); i++)
46
{
47
        def line = lines.get(i)
48
        if( !line.props.get(PROPNUMBER).matches(PROPSVALUETOKEEP))
49
        {
50
                lines.remove(i--)
51
        }        
52
}
53

    
54
println "writing result..."
55
def file = new File("cooc.txt")
56
cooccurrence.toTxt(file, "UTF-8")
57
println "printed cooccurrents in "+file.getAbsolutePath()