Statistics
| Revision:

root / tmp / org.txm.core / src / groovy / org / txm / export / ConcordancesExport.groovy @ 187

History | View | Annotate | Download (4.7 kB)

1
/**
2
 * Main.
3
 *
4
 * @param args the args
5
 */
6
// Copyright © 2010-2013 ENS de Lyon.
7
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of
8
// Lyon 2, University of Franche-Comté, University of Nice
9
// Sophia Antipolis, University of Paris 3.
10
// 
11
// The TXM platform is free software: you can redistribute it
12
// and/or modify it under the terms of the GNU General Public
13
// License as published by the Free Software Foundation,
14
// either version 2 of the License, or (at your option) any
15
// later version.
16
// 
17
// The TXM platform is distributed in the hope that it will be
18
// useful, but WITHOUT ANY WARRANTY; without even the implied
19
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
20
// PURPOSE. See the GNU General Public License for more
21
// details.
22
// 
23
// You should have received a copy of the GNU General
24
// Public License along with the TXM platform. If not, see
25
// http://www.gnu.org/licenses.
26
//
27
//
28
//
29
// $LastChangedDate: 2013-05-06 17:38:43 +0200 (lun., 06 mai 2013) $
30
// $LastChangedRevision: 2386 $
31
// $LastChangedBy: mdecorde $
32
//
33
package org.txm.export
34

    
35
// import the packages containing the functions we are going to use
36
import org.txm.Toolbox
37
import org.txm.searchengine.cqp.corpus.*
38
import org.txm.searchengine.cqp.corpus.query.*
39
import org.txm.functions.concordances.*
40
import org.txm.functions.concordances.comparators.*
41
import org.txm.functions.ReferencePattern
42

    
43
// TODO: Auto-generated Javadoc
44
/* (non-Javadoc)
45
 * @see groovy.lang.Script#run()
46
 */
47
def manualInit = false
48
if (!org.txm.Toolbox.isInitialized())
49
{
50
        manualInit = true
51
        org.txm.Toolbox.initialize(new File("~/TXM/install.prefs"))
52
}
53

    
54
// get the BFM corpus
55
def discours = CorpusManager.getCorpusManager().getCorpus("GRAAL")
56

    
57
// get some properties
58
def pos = discours.getProperty("fropos")
59
println "pos: "+pos
60
def pb = discours.getProperty("pb")
61
println "pb: "+pb
62
def id = discours.getProperty("id")
63
println "id: "+id
64
def word = discours.getProperty("word")
65
println "word: "+word
66
def text = discours.getStructuralUnit("text")
67
println "text: "+text
68
def text_id = text.getProperty("id")
69
println "text_id: "+text_id
70

    
71
// create a query. Edit the query here
72
def querys = javax.swing.JOptionPane.showInputDialog(null, "query ")
73
if (querys == null)
74
        return
75
def query = new Query(Query.fixQuery(querys))
76

    
77
println "Compute concordance "
78
def start = System.currentTimeMillis()
79

    
80
// define the references pattern for each concordance line
81
def referencePattern = new ReferencePattern()
82
referencePattern.addProperty(text_id)
83
referencePattern.addProperty(pb)
84
referencePattern.addProperty(pos)
85
referencePattern.addProperty(id)
86

    
87
def sortReferencePattern = new ReferencePattern()
88
sortReferencePattern.addProperty(text_id)
89
sortReferencePattern.addProperty(pb)
90
sortReferencePattern.addProperty(pos)
91
sortReferencePattern.addProperty(id)
92

    
93
// compute the concordance with contexts of 15 words on each side of the keyword
94
def concordance = new Concordance(discours, query, word, [word, pos], referencePattern, sortReferencePattern, 10, 10)
95

    
96
// get a builtin sort function
97
def comparator = new LexicographicKeywordComparator()
98
comparator.initialize(discours)
99

    
100
// sort the concordance
101
//concordance.sort(comparator)
102

    
103
println "done "+(System.currentTimeMillis()-start)
104

    
105
def writer = new FileWriter("ExportConcordanceBFM3TT.csv")
106

    
107
println "write lines"
108
start = System.currentTimeMillis()
109

    
110
// define which occurrence properties will be displayed
111
concordance.setViewProperties([word])
112

    
113
for (int i = 0 ; i < concordance.getNLines() ; i += 1000)
114
{
115
        List<Line> lines
116
        if (i+1000 > concordance.getNLines())
117
                lines = concordance.getLines(i, concordance.getNLines()-1)
118
        else
119
                lines = concordance.getLines(i, i+1000)
120
                
121
        // println "lines: from "+i
122
        for (Line l : lines)
123
        {
124
                /*
125
                 A : référence (nom de texte + page)
126
                 B : contexte gauche (10 mots)
127
                 C : pivot (cf. la remarque sur "ce que")
128
                 D : pos du pivot
129
                 E : colonne vide pour les commentaires
130
                 F : contexte droit (10 mots)
131
                 G : identifiant de l'occurrence (pour injecter les corrections après)
132
                 */
133
                
134
                String ntext = l.getViewRef().getValue(text_id)
135
                String page = l.getViewRef().getValue(pb)
136
                String lcontext = l.leftContextToString()
137
                String pivot = l.keywordToString()
138
                String rien = ""
139
                String rcontext = l.rightContextToString()
140
                String ids = l.getViewRef().getValue(id)
141
                
142
                String poss = ""
143
                for(String s : l.getKeywordsViewProperties().get(pos))
144
                {
145
                        poss += s+" "
146
                }
147
                
148
                //println l.getKeywordsViewProperties().get(pos)
149
                writer.write(ntext+"_"+page+"\t"+lcontext+"\t"+pivot+"\t"+poss+"\t"+rien+"\t"+rcontext+"\t"+ids+"\n")
150
        }
151
        writer.flush()
152
}
153
writer.close()
154

    
155
println "done "+(System.currentTimeMillis()-start)+" results"
156

    
157
println("Concordance "+query.getQueryString()+" saved in file "+new File("ExportConcordanceBFM3TT.csv").getAbsolutePath())
158

    
159
if(manualInit)
160
        org.txm.Toolbox.shutdown()
161