Statistics
| Revision:

root / tmp / org.txm.groovy.core / src / groovy / org / txm / export / ConcordancesExport_discours.groovy @ 345

History | View | Annotate | Download (6.4 kB)

1
/**
2
 * Main.
3
 *
4
 * @param args the args
5
 */
6
// Copyright © 2010-2013 ENS de Lyon.
7
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of
8
// Lyon 2, University of Franche-Comté, University of Nice
9
// Sophia Antipolis, University of Paris 3.
10
// 
11
// The TXM platform is free software: you can redistribute it
12
// and/or modify it under the terms of the GNU General Public
13
// License as published by the Free Software Foundation,
14
// either version 2 of the License, or (at your option) any
15
// later version.
16
// 
17
// The TXM platform is distributed in the hope that it will be
18
// useful, but WITHOUT ANY WARRANTY; without even the implied
19
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
20
// PURPOSE. See the GNU General Public License for more
21
// details.
22
// 
23
// You should have received a copy of the GNU General
24
// Public License along with the TXM platform. If not, see
25
// http://www.gnu.org/licenses.
26
// 
27
// 
28
// 
29
// $LastChangedDate: 2012-10-15 17:35:36 +0200 (lun., 15 oct. 2012) $
30
// $LastChangedRevision: 2279 $
31
// $LastChangedBy: mdecorde $ 
32
//
33

    
34
// WARNING when this script is executed for the second time,
35
// the window asking for the query parameter is displayed behind all the other windows,
36
// so that one can have the feeling that TXM is frozen :
37
// just move the other windows to get and answer the query-window hidden behind,
38
// and this solves the problem.
39

    
40
package org.txm.export
41

    
42
// import the packages containing the functions we are going to use
43
import org.txm.Toolbox
44
import org.txm.searchengine.cqp.corpus.*
45
import org.txm.searchengine.cqp.corpus.query.*
46
import org.txm.concordances.functions.*
47
import org.txm.concordances.functions.comparators.*
48
import org.txm.functions.ReferencePattern
49

    
50
// TODO: Auto-generated Javadoc
51
/* (non-Javadoc)
52
 * @see groovy.lang.Script#run()
53
 */
54
//"ExportConcordanceBFM.csv"
55
def outfile = new File("ExportConcordanceVOEUX.csv")
56

    
57
// get the corpus
58
// **PARAMETER** : name of the corpus (here DISCOURS)
59
// def discours = CorpusManager.getCorpusManager().getCorpus("QUETE")
60
def discours = CorpusManager.getCorpusManager().getCorpus("DISCOURS")
61

    
62
// **PARAMETER** : selection and name of the properties - "word", "id", "text" and "text_id" are always available
63
// get some properties
64
// def pos = discours.getProperty("ttpos")
65
// def pos = discours.getProperty("frpos")
66
def pos = discours.getProperty("pos")
67
println "pos: "+pos
68
// def pb = discours.getProperty("pb")
69
// println "pb: "+pb
70
def id = discours.getProperty("id")
71
println "id: "+id
72
def word = discours.getProperty("word")
73
println "word: "+word
74
def text = discours.getStructuralUnit("text")
75
println "text: "+text
76
def text_id = text.getProperty("id")
77
println "text_id: "+text_id
78
def text_date = text.getProperty("date")
79
println "text_date: "+text_date
80
def text_loc = text.getProperty("loc")
81
println "text_loc: "+text_loc
82

    
83
// create a query. Edit the query here
84
def querys = javax.swing.JOptionPane.showInputDialog(null, "query ")
85
if (querys == null)
86
        return
87
def query = new Query(Query.fixQuery(querys))
88

    
89
println "Compute concordance "
90
def start = System.currentTimeMillis()
91

    
92
// define the references pattern for each concordance line
93
def referencePattern = new ReferencePattern()
94
// **PARAMETER** take properties among the ones selected and defined above,
95
// to compose the reference pattern (in the first column)
96
//referencePattern.addProperty(text_id)
97
referencePattern.addProperty(text_loc)
98
referencePattern.addProperty(text_date)
99
//referencePattern.addProperty(pb)
100
//referencePattern.addProperty(pos)
101
//referencePattern.addProperty(id)
102

    
103
def sortReferencePattern = new ReferencePattern()
104
// **PARAMETER** take properties among the ones selected and defined above,
105
// to set the sorting order of the first column
106
sortReferencePattern.addProperty(text_id)
107
//sortReferencePattern.addProperty(pb)
108
//sortReferencePattern.addProperty(pos)
109
sortReferencePattern.addProperty(id)
110

    
111

    
112
// compute the concordance with contexts of 15 words on each side of the keyword
113
// **PARAMETER** choose the properties : here [word, pos] are the properties according to which the pivot is sorted
114
// def concordance = new Concordance(discours, query, word, [word, pos], referencePattern, 10, 10)
115
def concordance = new Concordance(discours, query, word, [word, pos], referencePattern, sortReferencePattern, 10, 10)
116

    
117
// get a builtin sort function
118
def comparator = new LexicographicKeywordComparator()
119
comparator.initialize(discours)
120

    
121
// sort the concordance
122
//concordance.sort(comparator)
123

    
124
println "done "+(System.currentTimeMillis()-start)
125

    
126
// **PARAMETER** name of the output file
127
//def writer = new FileWriter("ExportConcordanceBFM3TT.csv")
128
def writer = new FileWriter(outfile)
129

    
130
println "write lines"
131
start = System.currentTimeMillis()
132

    
133
// define which occurrence properties will be displayed
134
concordance.setViewProperties([word])
135

    
136
for (int i = 0 ; i < concordance.getNLines() ; i += 1000) {
137
        List<Line> lines
138
        if (i+1000 > concordance.getNLines())
139
                lines = concordance.getLines(i, concordance.getNLines()-1)
140
        else
141
                lines = concordance.getLines(i, i+1000)
142
                
143
        // println "lines: from "+i
144
        for (Line l : lines) {
145
                /*
146
                 A : référence (nom de texte + page)
147
                 B : contexte gauche (10 mots)
148
                 C : pivot (cf. la remarque sur "ce que")
149
                 D : pos du pivot
150
                 E : colonne vide pour les commentaires
151
                 F : contexte droit (10 mots)
152
                 G : identifiant de l'occurrence (pour injecter les corrections après) 
153
                 */
154

    
155
// **PARAMETERS** define the strings which will be used to print the result                
156
//                String ntext = l.getViewRef().getValue(text_id)
157
                String loc = l.getViewRef().getValue(text_loc)
158
                String date = l.getViewRef().getValue(text_date)
159
//                String page = l.getViewRef().getValue(pb)
160
                String lcontext = l.leftContextToString()
161
                String pivot = l.keywordToString()
162
//                String rien = ""
163
                String rcontext = l.rightContextToString()
164
//                String ids = l.getViewRef().getValue(id)
165
                
166
                String poss = ""
167
                for (String s : l.getKeywordsViewProperties().get(pos)) {
168
                        poss += s+" "
169
                }
170
                
171
                //println l.getKeywordsViewProperties().get(pos)
172
// **PARAMETERS** composition of the printed line, cf. strings defined above
173
//                writer.write(ntext+"_"+page+"\t"+lcontext+"\t"+pivot+"\t"+poss+"\t"+rien+"\t"+rcontext+"\t"+ids+"\n")
174
                writer.write(loc+", "+date+"\t"+lcontext+"\t"+pivot+"\t"+poss+"\t"+rcontext+"\n")
175
        }
176
        writer.flush()
177
}
178
writer.close()
179

    
180
println "done "+(System.currentTimeMillis()-start)+" results"
181

    
182
//BP change nom du fichier
183
println("Concordance "+query.getQueryString()+" saved in file "+outfile.getAbsolutePath())