Statistics
| Revision:

root / tmp / org.txm.groovy.core / src / groovy / org / txm / export / CooccurrencesFilter.groovy @ 479

History | View | Annotate | Download (1.7 kB)

1
package org.txm.export
2

    
3
// we import the packages containing the functions we are going to use
4
import org.txm.Toolbox
5
import org.txm.searchengine.cqp.corpus.*
6
import org.txm.searchengine.cqp.corpus.query.*
7
import org.txm.cooccurrence.core.functions.Cooccurrence
8
import org.txm.functions.cooccurrences.*
9

    
10
//////////// PARAMETERS ////////////
11
def corpus = CorpusManager.getCorpusManager().getCorpus("VOEUX")
12

    
13
def word = corpus.getProperty("word")
14
def pos = corpus.getProperty("frpos")
15
def viewprops = [word, pos]
16

    
17
def query = new Query(Query.fixQuery("je"))
18
def PROPSVALUETOKEEP = "V.+"
19
def PROPNUMBER = 1;
20

    
21
def limit = corpus.getStructuralUnit("s") // contexts by <s>
22
def includeXpivot = false; // used only if limit != null
23
//def limit = null // contexts by word 
24
def minleft = 1 // min = 1
25
def maxleft = 20 // min = 1
26
def minright = 1 // min = 1
27
def maxright = 20 // min = 1
28

    
29
def minf = 0
30
def mincof = 0
31
def minscore = 0
32

    
33
def buildLTWithOnlyCooccurrents = false
34

    
35
//////////// END OF PARAMETERS ////////////
36

    
37
println "computing cooc lines..."
38
long time = System.currentTimeMillis()
39
def cooccurrence = new Cooccurrence(corpus, query, viewprops, limit, maxleft, minleft, minleft, maxright, minf,mincof, minscore, includeXpivot, buildLTWithOnlyCooccurrents)
40
cooccurrence.process();
41
println("Compute time : "+(System.currentTimeMillis()-time)+" ms")
42

    
43
println "filtering lines..."
44
def lines = cooccurrence.getLines()
45
for(int i = 0 ; i < lines.size(); i++)
46
{
47
        def line = lines.get(i)
48
        if( !line.props.get(PROPNUMBER).matches(PROPSVALUETOKEEP))
49
        {
50
                lines.remove(i--)
51
        }        
52
}
53

    
54
println "writing result..."
55
def file = new File("cooc.txt")
56
cooccurrence.toTxt(file, "UTF-8")
57
println "printed cooccurrents in "+file.getAbsolutePath()