Revision 148 tmp/org.txm.core/src/groovy/org/txm/test/CooccurencesAndReferenceCorpus.groovy

CooccurencesAndReferenceCorpus.groovy (revision 148)
1
package org.txm.test
2
import org.txm.Toolbox
3
import org.txm.searchengine.cqp.corpus.*
4
import org.txm.searchengine.cqp.corpus.query.*
5
import org.txm.functions.cooccurrences.*
6
import org.txm.stat.engine.r.RWorkspace;
7
import org.txm.stat.engine.r.data.*;
8
import org.txm.functions.cooccurrences.comparators.*;
9

  
10
/////////////////PARAMS/////////////////////
11

  
12
def corpusname = "DANS16FRANTEXT"
13
def PROP = "word"
14
def QUERY ="""
15
[type="expFound"]
16
"""
17

  
18
def minleft = 0
19
def maxleft = 10
20
def minright = 0
21
def maxright = 10
22

  
23
def minf = 1
24
def mincof = 1
25
def minscore = 0
26

  
27
File refFile = new File("/home/mdecorde/TEMP/lexiqueFrantext16.txt");
28
def file = new File("/home/mdecorde/TEMP/cooc_dans_ref.tsv")
29

  
30
///////////////////////////////////////////
31

  
32
minleft++
33
maxleft++
34
minright++
35
maxright++
36
if (!refFile.exists()) { println "ref file does not exists: $refFile"; return;}
37
QUERY = QUERY.trim();
38
MainCorpus corpus = CorpusManager.getCorpusManager().getCorpus(corpusname)
39
Property property = corpus.getProperty(PROP)
40

  
41
// Build the reference corpus
42
println "building reference corpus."
43
def ref_lt = LexicalTableImpl.createLexicalTableImpl(refFile);
44

  
45
// Build the cooccurrences
46
def limit = null
47
def query = new Query(Query.fixQuery(QUERY))
48
print "building cooc"
49
def cooccurrence = new Cooccurrence(corpus, query, [property], limit, maxleft, minleft, minright, maxright, minf, mincof, minscore, false)
50
System.out.println("cooc: "+corpus+" "+query+" "+[property]+" "+null+" "+maxleft+" "+minleft+" "+minright+" "+maxright+" "+minf+" "+mincof+" "+minscore+" "+false);
51
print "."
52
if (refFile != null)
53
	cooccurrence.setReferenceCorpus(ref_lt.getSymbol()) // SET THE REFERENCE CORPUS
54
print "."
55
cooccurrence.stepQueryLimits();
56
print "."
57
cooccurrence.stepGetMatches();
58
print "."
59
cooccurrence.stepBuildSignatures();
60
print "."
61
cooccurrence.stepCount();
62
print "."
63
cooccurrence.stepBuildLexicalTable();
64
print "."
65
cooccurrence.stepGetScores();
66
print "."
67
cooccurrence.getLines();
68
cooccurrence.sort(new OccComparator())
69
println ""
70

  
71
println "exporting."
72
cooccurrence.toTxt(file, "UTF-8", "\t", "")
73
println "printed cooccurrents in "+file.getAbsolutePath()
1
//package org.txm.test
2
//import org.txm.Toolbox
3
//import org.txm.searchengine.cqp.corpus.*
4
//import org.txm.searchengine.cqp.corpus.query.*
5
//import org.txm.functions.cooccurrences.*
6
//import org.txm.stat.engine.r.RWorkspace;
7
//import org.txm.stat.engine.r.data.*;
8
//import org.txm.functions.cooccurrences.comparators.*;
9
//
10
///////////////////PARAMS/////////////////////
11
//
12
//def corpusname = "DANS16FRANTEXT"
13
//def PROP = "word"
14
//def QUERY ="""
15
//[type="expFound"]
16
//"""
17
//
18
//def minleft = 0
19
//def maxleft = 10
20
//def minright = 0
21
//def maxright = 10
22
//
23
//def minf = 1
24
//def mincof = 1
25
//def minscore = 0
26
//
27
//File refFile = new File("/home/mdecorde/TEMP/lexiqueFrantext16.txt");
28
//def file = new File("/home/mdecorde/TEMP/cooc_dans_ref.tsv")
29
//
30
/////////////////////////////////////////////
31
//
32
//minleft++
33
//maxleft++
34
//minright++
35
//maxright++
36
//if (!refFile.exists()) { println "ref file does not exists: $refFile"; return;}
37
//QUERY = QUERY.trim();
38
//MainCorpus corpus = CorpusManager.getCorpusManager().getCorpus(corpusname)
39
//Property property = corpus.getProperty(PROP)
40
//
41
//// Build the reference corpus
42
//println "building reference corpus."
43
//def ref_lt = LexicalTableImpl.createLexicalTableImpl(refFile);
44
//
45
//// Build the cooccurrences
46
//def limit = null
47
//def query = new Query(Query.fixQuery(QUERY))
48
//print "building cooc"
49
//def cooccurrence = new Cooccurrence(corpus, query, [property], limit, maxleft, minleft, minright, maxright, minf, mincof, minscore, false)
50
//System.out.println("cooc: "+corpus+" "+query+" "+[property]+" "+null+" "+maxleft+" "+minleft+" "+minright+" "+maxright+" "+minf+" "+mincof+" "+minscore+" "+false);
51
//print "."
52
//if (refFile != null)
53
//	cooccurrence.setReferenceCorpus(ref_lt.getSymbol()) // SET THE REFERENCE CORPUS
54
//print "."
55
//cooccurrence.stepQueryLimits();
56
//print "."
57
//cooccurrence.stepGetMatches();
58
//print "."
59
//cooccurrence.stepBuildSignatures();
60
//print "."
61
//cooccurrence.stepCount();
62
//print "."
63
//cooccurrence.stepBuildLexicalTable();
64
//print "."
65
//cooccurrence.stepGetScores();
66
//print "."
67
//cooccurrence.getLines();
68
//cooccurrence.sort(new OccComparator())
69
//println ""
70
//
71
//println "exporting."
72
//cooccurrence.toTxt(file, "UTF-8", "\t", "")
73
//println "printed cooccurrents in "+file.getAbsolutePath()

Also available in: Unified diff