Statistics
| Revision:

root / tmp / org.txm.core / src / groovy / org / txm / importer / corptef / annotate.groovy @ 187

History | View | Annotate | Download (4.7 kB)

1
// Copyright © 2010-2013 ENS de Lyon.
2
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of
3
// Lyon 2, University of Franche-Comté, University of Nice
4
// Sophia Antipolis, University of Paris 3.
5
// 
6
// The TXM platform is free software: you can redistribute it
7
// and/or modify it under the terms of the GNU General Public
8
// License as published by the Free Software Foundation,
9
// either version 2 of the License, or (at your option) any
10
// later version.
11
// 
12
// The TXM platform is distributed in the hope that it will be
13
// useful, but WITHOUT ANY WARRANTY; without even the implied
14
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15
// PURPOSE. See the GNU General Public License for more
16
// details.
17
// 
18
// You should have received a copy of the GNU General
19
// Public License along with the TXM platform. If not, see
20
// http://www.gnu.org/licenses.
21
// 
22
// 
23
// 
24
// $LastChangedDate: 2013-11-08 13:38:06 +0100 (ven., 08 nov. 2013) $
25
// $LastChangedRevision: 2569 $
26
// $LastChangedBy: mdecorde $ 
27
//
28
package org.txm.importer.corptef;
29

    
30
import org.txm.importer.cwb.*
31
import org.txm.scripts.teitxm.*;
32
import org.txm.importer.*;
33
import org.txm.utils.treetagger.TreeTagger;
34
import org.txm.Toolbox;
35

    
36
// TODO: Auto-generated Javadoc
37
/**
38
 * The Class annotate.
39
 */
40
class annotate {
41
        
42
        /**
43
         * Run.
44
         *
45
         * @param dir the dir
46
         */
47
        public void run(File dir)
48
        {
49
                String rootDir = dir.getAbsolutePath()+"/";//"~/xml/discours/src";
50
                String txmDir = dir.getAbsolutePath()+"/txm/"
51

    
52
                //cleaning
53
                new File(rootDir,"annotations").deleteDir();
54
                new File(rootDir,"annotations").mkdir();
55
                new File(rootDir,"treetagger").deleteDir();
56
                new File(rootDir,"treetagger").mkdir();
57
                println txmDir
58
                List<File> listfiles = new File(txmDir).listFiles();
59
                for(File teifile : listfiles)
60
                {
61
                        println("annotate "+teifile)
62
                        File modelfile = new File(Toolbox.getParam(Toolbox.TREETAGGER_MODELS_PATH),"/rgaqcj.par");
63
                        if(!modelfile.exists())
64
                        {
65
                                println "Skipping ANNOTATE: Incorrect modelfile path: "+modelfile;
66
                                return;
67
                        }
68
                        File annotfile = new File(rootDir+"annotations",teifile.getName()+"-STDOFF.xml");
69
                        File ttsrcfile = new File(rootDir+"treetagger",teifile.getName()+"-src.tt");
70
                        File ttrezfile = new File(rootDir+"treetagger",teifile.getName()+"-out.tt");
71
                        
72
                        //prepare file to be tagged
73
                        def builder = new BuildTTSrc(teifile.toURL());
74
                        builder.process(ttsrcfile);
75
                        
76
                        //Apply TT
77
                        if(!new File(Toolbox.getParam(Toolbox.TREETAGGER_INSTALL_PATH)+"/bin/").exists() || Toolbox.getParam(Toolbox.TREETAGGER_INSTALL_PATH).length() == 0 )
78
                        {
79
                                println("Path to TreeTagger is wrong: "+Toolbox.getParam(Toolbox.TREETAGGER_INSTALL_PATH)+"/bin/")
80
                                return;
81
                        }
82
                        TreeTagger tt = new TreeTagger(Toolbox.getParam(Toolbox.TREETAGGER_INSTALL_PATH)+"/bin/");
83
                        tt.settoken();
84
                        tt.setlemma();
85
                        tt.setquiet();
86
                        tt.setnounknown();
87
                        tt.setsgml();
88
                        tt.seteostag("<s>");
89
                        tt.treetagger( modelfile.getAbsolutePath(), ttsrcfile.getAbsolutePath(), ttrezfile.getAbsolutePath())
90
                        
91
                        //create stand-off annotation file
92
                        //targeted file for annotations
93
                        String target = teifile.getName()
94
                        
95
                        
96
                        //contains txm:application/txm:commandLine
97
                        File reportFile = new File(rootDir,"NLPToolsParameters.xml");
98
                        
99
                        String respPerson = System.getProperty("user.name");
100
                        String respId = "txm";
101
                        String respDesc = "NLP annotation tool";
102
                        String respDate = "Tue Mar  11 1:02:55 Paris, Madrid 2010";
103
                        String respWhen = ""
104
                        
105
                        String appIdent = "TreeTagger";
106
                        String appVersion = "3.2";
107
                        
108
                        String distributor = "";
109
                        String publiStmt = """""";
110
                        String sourceStmt = """""";
111
                        
112
                        def types = ["ttpos","ttlemma"];
113
                        def typesTITLE = ["ttpos","ttlemma"];
114
                        def typesDesc = ["fr pos","fr lemma"]
115
                        def typesTAGSET = ["",""]
116
                        def typesWEB = ["",""]
117
                        String idform ="w_";
118
                        String encoding ="UTF-8";
119
                        
120
                        def transfo = new CSV2W_ANA();
121
                        transfo.setAnnotationTypes( types, typesDesc, typesTAGSET, typesWEB, idform);
122
                        transfo.setResp(respId, respDesc,respDate, respPerson, respWhen);
123
                        transfo.setApp(appIdent, appVersion);
124
                        transfo.setTarget(target, reportFile);
125
                        transfo.setInfos(distributor,  publiStmt, sourceStmt);
126
                        transfo.process( ttrezfile, annotfile, encoding );
127
                        
128
                        //merge into the tei file
129
                        builder = new AnnotationInjection(teifile.toURL(), annotfile.toURL(), new ArrayList<String>());
130
                        builder.transfomFile(rootDir,"temp");
131
                        
132
                        if (!(teifile.delete() && new File(rootDir,"temp").renameTo(teifile))) println "Warning can't rename file "+new File(rootDir,"temp")+" to "+teifile
133
                }
134
        }
135
        
136
        /**
137
         * The main method.
138
         *
139
         * @param args the arguments
140
         */
141
        public static void main(String[] args)
142
        {
143
                File dir = new File("C:/Documents and Settings/alavrent/TXM/corpora/corptef/")
144
                new annotate().run(dir);
145
        }
146
}