Statistics
| Revision:

root / tmp / org.txm.groovy.core / src / groovy / org / txm / export / ParaConc.groovy @ 399

History | View | Annotate | Download (4.6 kB)

1
// Copyright © 2010-2013 ENS de Lyon.
2
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of
3
// Lyon 2, University of Franche-Comté, University of Nice
4
// Sophia Antipolis, University of Paris 3.
5
// 
6
// The TXM platform is free software: you can redistribute it
7
// and/or modify it under the terms of the GNU General Public
8
// License as published by the Free Software Foundation,
9
// either version 2 of the License, or (at your option) any
10
// later version.
11
// 
12
// The TXM platform is distributed in the hope that it will be
13
// useful, but WITHOUT ANY WARRANTY; without even the implied
14
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15
// PURPOSE. See the GNU General Public License for more
16
// details.
17
// 
18
// You should have received a copy of the GNU General
19
// Public License along with the TXM platform. If not, see
20
// http://www.gnu.org/licenses.
21
//
22
//
23
//
24
// $LastChangedDate: 2011-09-27 16:44:37 +0200 (mar., 27 sept. 2011) $
25
// $LastChangedRevision: 2008 $
26
// $LastChangedBy: mdecorde $
27
//
28
package org.txm.export
29

    
30
import org.txm.concordance.core.functions.*
31
import org.txm.concordance.core.functions.comparators.*
32
import org.txm.searchengine.cqp.ReferencePattern
33
import org.txm.searchengine.cqp.corpus.*
34
import org.txm.searchengine.cqp.corpus.query.*
35

    
36
////// PARAMETERS //////
37

    
38
def output = "nomfichier"
39

    
40
def tgtCorpusName = "BLLATLAT"
41
def srcCorpusName = "BLLATFRO"
42

    
43
def tgtWords = "sum|ego" // use to highlight lat words
44
def srcWords = "je|j'" // use to highlight fro words
45

    
46
def tgtQuery = "<seg> []* \""+tgtWords+"\" []* </seg> :"+srcCorpusName+" \""+srcWords+"\"" // default lat query
47
def srcQuery = "<seg> []* \""+srcWords+"\" []* </seg> :"+tgtCorpusName+" \""+tgtWords+"\"" // default fro query
48

    
49
println tgtCorpusName+": "+tgtQuery
50
println srcCorpusName+": "+srcQuery
51

    
52
new File(output).withWriter("UTF-8"){writer->
53
//// LAT CONCORDANCE ////
54
def tgtCorpus = CorpusManager.getCorpusManager().getCorpus(tgtCorpusName)
55
def text_id = tgtCorpus.getStructuralUnit("text").getProperty("id")
56
def seg_id = tgtCorpus.getStructuralUnit("seg").getProperty("id")
57
def word = tgtCorpus.getProperty("word")
58
def referencePattern = new ReferencePattern()
59
//referencePattern.addProperty(text_id)
60
referencePattern.addProperty(seg_id)
61
def tgtConcordance = new Concordance(tgtCorpus, new Query(tgtQuery), word, [word], referencePattern, referencePattern, 0, 0)
62

    
63
//// LAT CONCORDANCE ////
64
def srcCorpus = CorpusManager.getCorpusManager().getCorpus(srcCorpusName)
65
text_id = srcCorpus.getStructuralUnit("text").getProperty("id")
66
seg_id = srcCorpus.getStructuralUnit("seg").getProperty("id")
67
word = srcCorpus.getProperty("word")
68
referencePattern = new ReferencePattern()
69
//referencePattern.addProperty(text_id)
70
referencePattern.addProperty(seg_id)
71
def srcConcordance = new Concordance(srcCorpus, new Query(srcQuery), word, [word], referencePattern, referencePattern, 0, 0)
72

    
73
//// MERGE CONCORDANCE RESULT ////
74
HashSet<String> allKeys = new HashSet<String>()
75
HashMap<String, Line> srcLines = [:]
76
HashMap<String, Line> tgtLines = [:]
77

    
78
println "nb tgt result: "+tgtConcordance.getNLines()
79
for(Line line : tgtConcordance.getLines(0, tgtConcordance.getNLines()))
80
{
81
        allKeys.add(line.getViewRef().toString())
82
        tgtLines.put(line.getViewRef().toString(), line)
83
}
84

    
85
println "nb src result: "+srcConcordance.getNLines()
86
for(Line line : srcConcordance.getLines(0, srcConcordance.getNLines()))
87
{
88
        allKeys.add(line.getViewRef().toString())
89
        srcLines.put(line.getViewRef().toString(), line)
90
}
91
println "src seg: "+srcLines.keySet()
92
println "tgt seg: "+tgtLines.keySet()
93
println "nb segments: "+allKeys.size()
94

    
95
writer.println "segment\tsource\ttarget"
96
for(Line line : srcConcordance.getLines(0, srcConcordance.getNLines()))
97
{
98
        String key = line.getViewRef().toString();
99
        def tgtLine = tgtLines.get(key)
100
        String srcString = line.keywordToString();
101
        String tgtString = "";
102
        if(tgtLine != null)        {
103
                tgtString = tgtLine.keywordToString();
104
                tgtLines.remove(key)
105
                }else
106
        {
107
                key = key+"*"
108
        }
109
        srcString = srcString.replaceAll("("+srcWords+")", "*\$1*");
110
        tgtString = tgtString.replaceAll("("+tgtWords+")", "*\$1*");
111
        writer.println key+"\t"+srcString+"\t"+tgtString
112
}
113

    
114
println "\n"
115
for(String key : tgtLines.keySet()) // process remaining tgt lines
116
{
117
        def srcline = srcLines.get(key)
118
        def tgtline = tgtLines.get(key)
119
        String srcString = "";
120
        String tgtString = tgtLines.keywordToString();
121
        if(srcline == null){
122
                srcString = srcline.keywordToString();
123
        }else
124
        {
125
                key = key+"*"
126
        }
127
        srcString = srcString.replaceAll("("+srcWords+")", "*\$1*");
128
        tgtString = tgtString.replaceAll("("+tgtWords+")", "*\$1*");
129
        writer.println key+"\t"+srcString+"\t"+tgtString
130
}
131
}
132

    
133
println "Done : written in "+new File(output).getAbsolutePath();