Statistics
| Revision:

root / tmp / org.txm.groovy.core / src / groovy / org / txm / export / ParaConc.groovy @ 479

History | View | Annotate | Download (4.7 kB)

1
// Copyright © 2010-2013 ENS de Lyon.
2
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of
3
// Lyon 2, University of Franche-Comté, University of Nice
4
// Sophia Antipolis, University of Paris 3.
5
// 
6
// The TXM platform is free software: you can redistribute it
7
// and/or modify it under the terms of the GNU General Public
8
// License as published by the Free Software Foundation,
9
// either version 2 of the License, or (at your option) any
10
// later version.
11
// 
12
// The TXM platform is distributed in the hope that it will be
13
// useful, but WITHOUT ANY WARRANTY; without even the implied
14
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15
// PURPOSE. See the GNU General Public License for more
16
// details.
17
// 
18
// You should have received a copy of the GNU General
19
// Public License along with the TXM platform. If not, see
20
// http://www.gnu.org/licenses.
21
//
22
//
23
//
24
// $LastChangedDate: 2011-09-27 16:44:37 +0200 (mar., 27 sept. 2011) $
25
// $LastChangedRevision: 2008 $
26
// $LastChangedBy: mdecorde $
27
//
28
package org.txm.export
29

    
30
import org.txm.Toolbox
31
import org.txm.searchengine.cqp.corpus.*
32
import org.txm.searchengine.cqp.corpus.query.*
33
import org.txm.concordance.core.functions.Concordance
34
import org.txm.concordance.core.functions.Line
35
import org.txm.functions.concordances.*
36
import org.txm.functions.concordances.comparators.*
37
import org.txm.searchengine.cqp.ReferencePattern
38

    
39
////// PARAMETERS //////
40

    
41
def output = "nomfichier"
42

    
43
def tgtCorpusName = "BLLATLAT"
44
def srcCorpusName = "BLLATFRO"
45

    
46
def tgtWords = "sum|ego" // use to highlight lat words
47
def srcWords = "je|j'" // use to highlight fro words
48

    
49
def tgtQuery = "<seg> []* \""+tgtWords+"\" []* </seg> :"+srcCorpusName+" \""+srcWords+"\"" // default lat query
50
def srcQuery = "<seg> []* \""+srcWords+"\" []* </seg> :"+tgtCorpusName+" \""+tgtWords+"\"" // default fro query
51

    
52
println tgtCorpusName+": "+tgtQuery
53
println srcCorpusName+": "+srcQuery
54

    
55
new File(output).withWriter("UTF-8"){writer->
56
//// LAT CONCORDANCE ////
57
def tgtCorpus = CorpusManager.getCorpusManager().getCorpus(tgtCorpusName)
58
def text_id = tgtCorpus.getStructuralUnit("text").getProperty("id")
59
def seg_id = tgtCorpus.getStructuralUnit("seg").getProperty("id")
60
def word = tgtCorpus.getProperty("word")
61
def referencePattern = new ReferencePattern()
62
//referencePattern.addProperty(text_id)
63
referencePattern.addProperty(seg_id)
64
def tgtConcordance = new Concordance(tgtCorpus, new Query(tgtQuery), word, [word], referencePattern, referencePattern, 0, 0)
65

    
66
//// LAT CONCORDANCE ////
67
def srcCorpus = CorpusManager.getCorpusManager().getCorpus(srcCorpusName)
68
text_id = srcCorpus.getStructuralUnit("text").getProperty("id")
69
seg_id = srcCorpus.getStructuralUnit("seg").getProperty("id")
70
word = srcCorpus.getProperty("word")
71
referencePattern = new ReferencePattern()
72
//referencePattern.addProperty(text_id)
73
referencePattern.addProperty(seg_id)
74
def srcConcordance = new Concordance(srcCorpus, new Query(srcQuery), word, [word], referencePattern, referencePattern, 0, 0)
75

    
76
//// MERGE CONCORDANCE RESULT ////
77
HashSet<String> allKeys = new HashSet<String>()
78
HashMap<String, Line> srcLines = [:]
79
HashMap<String, Line> tgtLines = [:]
80

    
81
println "nb tgt result: "+tgtConcordance.getNLines()
82
for(Line line : tgtConcordance.getLines(0, tgtConcordance.getNLines()))
83
{
84
        allKeys.add(line.getViewRef().toString())
85
        tgtLines.put(line.getViewRef().toString(), line)
86
}
87

    
88
println "nb src result: "+srcConcordance.getNLines()
89
for(Line line : srcConcordance.getLines(0, srcConcordance.getNLines()))
90
{
91
        allKeys.add(line.getViewRef().toString())
92
        srcLines.put(line.getViewRef().toString(), line)
93
}
94
println "src seg: "+srcLines.keySet()
95
println "tgt seg: "+tgtLines.keySet()
96
println "nb segments: "+allKeys.size()
97

    
98
writer.println "segment\tsource\ttarget"
99
for(Line line : srcConcordance.getLines(0, srcConcordance.getNLines()))
100
{
101
        String key = line.getViewRef().toString();
102
        def tgtLine = tgtLines.get(key)
103
        String srcString = line.keywordToString();
104
        String tgtString = "";
105
        if(tgtLine != null)        {
106
                tgtString = tgtLine.keywordToString();
107
                tgtLines.remove(key)
108
                }else
109
        {
110
                key = key+"*"
111
        }
112
        srcString = srcString.replaceAll("("+srcWords+")", "*\$1*");
113
        tgtString = tgtString.replaceAll("("+tgtWords+")", "*\$1*");
114
        writer.println key+"\t"+srcString+"\t"+tgtString
115
}
116

    
117
println "\n"
118
for(String key : tgtLines.keySet()) // process remaining tgt lines
119
{
120
        def srcline = srcLines.get(key)
121
        def tgtline = tgtLines.get(key)
122
        String srcString = "";
123
        String tgtString = tgtLines.keywordToString();
124
        if(srcline == null){
125
                srcString = srcline.keywordToString();
126
        }else
127
        {
128
                key = key+"*"
129
        }
130
        srcString = srcString.replaceAll("("+srcWords+")", "*\$1*");
131
        tgtString = tgtString.replaceAll("("+tgtWords+")", "*\$1*");
132
        writer.println key+"\t"+srcString+"\t"+tgtString
133
}
134
}
135

    
136
println "Done : written in "+new File(output).getAbsolutePath();