Statistics
| Revision:

root / tmp / org.txm.groovy.core / src / groovy / org / txm / scripts / tigersearch / InsertAnnotationsBeroul.groovy @ 479

History | View | Annotate | Download (3.3 kB)

1
/**
2
 * Main.
3
 *
4
 * @param args the args
5
 */
6
// Copyright © 2010-2013 ENS de Lyon.
7
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of
8
// Lyon 2, University of Franche-Comté, University of Nice
9
// Sophia Antipolis, University of Paris 3.
10
// 
11
// The TXM platform is free software: you can redistribute it
12
// and/or modify it under the terms of the GNU General Public
13
// License as published by the Free Software Foundation,
14
// either version 2 of the License, or (at your option) any
15
// later version.
16
// 
17
// The TXM platform is distributed in the hope that it will be
18
// useful, but WITHOUT ANY WARRANTY; without even the implied
19
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
20
// PURPOSE. See the GNU General Public License for more
21
// details.
22
// 
23
// You should have received a copy of the GNU General
24
// Public License along with the TXM platform. If not, see
25
// http://www.gnu.org/licenses.
26
// 
27
// 
28
// 
29
// $LastChangedDate: 2013-05-06 17:38:43 +0200 (lun. 06 mai 2013) $
30
// $LastChangedRevision: 2386 $
31
// $LastChangedBy: mdecorde $ 
32
//
33
package org.txm.scripts.tigersearch;
34

    
35
import org.txm.utils.treetagger.TreeTagger;
36
import org.txm.scripts.teitxm.*;
37

    
38
// TODO: Auto-generated Javadoc
39
/**
40
 * script to insert annotation into beroul file.
41
 *
42
 * @return the java.lang. object
43
 */
44

    
45
String home = System.getProperty("user.home")
46
File rootDir = new File(home, "xml/fullberoul/")
47

    
48
ArrayList<String> milestones = new ArrayList<String>();//the tags who you want them to stay milestones
49
milestones.add("tagUsage");
50
milestones.add("pb");
51
milestones.add("lb");
52
milestones.add("catRef");
53

    
54
//transform xml tiger >> TTsrc
55
File srcfile = new File(rootDir,"beroul.xml");
56
File resultfile = new File(rootDir,"beroul.tt");
57
println("xml>>TT from : "+srcfile+" to : "+resultfile );
58

    
59
def builder = new BuildTTFile(srcfile.toURL(), milestones);
60
builder.process(resultfile, "t");
61

    
62
//tag TT
63
String infile = resultfile;
64
String modelfile = home+"/treetagger/models/fro.par";
65
String outfile = rootDir.getAbsolutePath()+"/result.tt";
66

    
67
println("proj "+modelfile+ " on " +resultfile +" >> "+outfile);
68

    
69
TreeTagger tt = new TreeTagger(home+"/treetagger/bin/");
70
tt.settoken();
71
tt.setquiet();
72
tt.setsgml();
73
tt.seteostag("<s>");
74
tt.treetagger( modelfile, infile, outfile)
75

    
76
//inject new TTattributes
77
//File srcfile = new File(rootDir,"beroul.xml");
78
File annotationsfiles = new File(rootDir,"result.tt");
79
File lastresultfile = new File(rootDir,"beroul-result.xml");
80
println("insert TT annotations : "+srcfile+" to : "+resultfile );
81

    
82
builder = new InjectAnnotations(srcfile.toURL(),annotationsfiles, milestones);
83
builder.process(lastresultfile);
84

    
85
builder.getFeature(new File(rootDir,"feature.xml"));
86
/*
87
//TAG with TnT
88
//need to replace <s> by nothing and </s> by \n
89
String encoding = "UTF-8"
90
for(String text : texts)
91
{
92
        //patch src files
93
        File f = new File(textsDir,text+".t");
94
        File temp = new File("tempFileCVScleaner")
95
        println("patch texts files "+f+": rmv <s> and replace </s>");
96
        Reader reader = new InputStreamReader(new FileInputStream(f),encoding);
97
        Writer writer = new FileWriter(temp);
98
        reader.eachLine 
99
                        {
100
                                if(it.trim().startsWith("</s"))
101
                                        writer.write("\n")
102
                                else if(it.trim().startsWith("<s"))
103
                                        writer.write("")
104
                                else
105
                                        writer.write(it+"\n")
106
                        }
107
        reader.close();
108
        writer.close();
109
        if (!(f.delete() && temp.renameTo(f))) println "Warning can't rename file "+temp+" to "+f
110
}*/