Statistics
| Revision:

root / tmp / org.txm.groovy.core / src / groovy / org / txm / scripts / importer / corptef / annotate.groovy @ 1488

History | View | Annotate | Download (4.7 kB)

1 321 mdecorde
// Copyright © 2010-2013 ENS de Lyon.
2 321 mdecorde
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of
3 321 mdecorde
// Lyon 2, University of Franche-Comté, University of Nice
4 321 mdecorde
// Sophia Antipolis, University of Paris 3.
5 321 mdecorde
//
6 321 mdecorde
// The TXM platform is free software: you can redistribute it
7 321 mdecorde
// and/or modify it under the terms of the GNU General Public
8 321 mdecorde
// License as published by the Free Software Foundation,
9 321 mdecorde
// either version 2 of the License, or (at your option) any
10 321 mdecorde
// later version.
11 321 mdecorde
//
12 321 mdecorde
// The TXM platform is distributed in the hope that it will be
13 321 mdecorde
// useful, but WITHOUT ANY WARRANTY; without even the implied
14 321 mdecorde
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 321 mdecorde
// PURPOSE. See the GNU General Public License for more
16 321 mdecorde
// details.
17 321 mdecorde
//
18 321 mdecorde
// You should have received a copy of the GNU General
19 321 mdecorde
// Public License along with the TXM platform. If not, see
20 321 mdecorde
// http://www.gnu.org/licenses.
21 321 mdecorde
//
22 321 mdecorde
//
23 321 mdecorde
//
24 479 mdecorde
// $LastChangedDate: 2013-11-08 13:38:06 +0100 (ven. 08 nov. 2013) $
25 321 mdecorde
// $LastChangedRevision: 2569 $
26 321 mdecorde
// $LastChangedBy: mdecorde $
27 321 mdecorde
//
28 986 mdecorde
package org.txm.scripts.importer.corptef;
29 321 mdecorde
30 1000 mdecorde
import org.txm.importer.cwb.*
31 1000 mdecorde
import org.txm.importer.scripts.xmltxm.*;
32 986 mdecorde
import org.txm.scripts.importer.*;
33 321 mdecorde
import org.txm.utils.treetagger.TreeTagger;
34 321 mdecorde
import org.txm.Toolbox;
35 321 mdecorde
36 321 mdecorde
// TODO: Auto-generated Javadoc
37 321 mdecorde
/**
38 321 mdecorde
 * The Class annotate.
39 321 mdecorde
 */
40 321 mdecorde
class annotate {
41 321 mdecorde
42 321 mdecorde
        /**
43 321 mdecorde
         * Run.
44 321 mdecorde
         *
45 321 mdecorde
         * @param dir the dir
46 321 mdecorde
         */
47 321 mdecorde
        public void run(File dir)
48 321 mdecorde
        {
49 321 mdecorde
                String rootDir = dir.getAbsolutePath()+"/";//"~/xml/discours/src";
50 321 mdecorde
                String txmDir = dir.getAbsolutePath()+"/txm/"
51 321 mdecorde
52 321 mdecorde
                //cleaning
53 321 mdecorde
                new File(rootDir,"annotations").deleteDir();
54 321 mdecorde
                new File(rootDir,"annotations").mkdir();
55 321 mdecorde
                new File(rootDir,"treetagger").deleteDir();
56 321 mdecorde
                new File(rootDir,"treetagger").mkdir();
57 321 mdecorde
                println txmDir
58 321 mdecorde
                List<File> listfiles = new File(txmDir).listFiles();
59 321 mdecorde
                for(File teifile : listfiles)
60 321 mdecorde
                {
61 321 mdecorde
                        println("annotate "+teifile)
62 757 sjacqu01
                        File modelfile = new File(Toolbox.getPreference(Toolbox.TREETAGGER_MODELS_PATH),"/rgaqcj.par");
63 321 mdecorde
                        if(!modelfile.exists())
64 321 mdecorde
                        {
65 321 mdecorde
                                println "Skipping ANNOTATE: Incorrect modelfile path: "+modelfile;
66 321 mdecorde
                                return;
67 321 mdecorde
                        }
68 321 mdecorde
                        File annotfile = new File(rootDir+"annotations",teifile.getName()+"-STDOFF.xml");
69 321 mdecorde
                        File ttsrcfile = new File(rootDir+"treetagger",teifile.getName()+"-src.tt");
70 321 mdecorde
                        File ttrezfile = new File(rootDir+"treetagger",teifile.getName()+"-out.tt");
71 321 mdecorde
72 321 mdecorde
                        //prepare file to be tagged
73 321 mdecorde
                        def builder = new BuildTTSrc(teifile.toURL());
74 321 mdecorde
                        builder.process(ttsrcfile);
75 321 mdecorde
76 321 mdecorde
                        //Apply TT
77 757 sjacqu01
                        if(!new File(Toolbox.getPreference(Toolbox.TREETAGGER_INSTALL_PATH)+"/bin/").exists() || Toolbox.getPreference(Toolbox.TREETAGGER_INSTALL_PATH).length() == 0 )
78 321 mdecorde
                        {
79 757 sjacqu01
                                println("Path to TreeTagger is wrong: "+Toolbox.getPreference(Toolbox.TREETAGGER_INSTALL_PATH)+"/bin/")
80 321 mdecorde
                                return;
81 321 mdecorde
                        }
82 757 sjacqu01
                        TreeTagger tt = new TreeTagger(Toolbox.getPreference(Toolbox.TREETAGGER_INSTALL_PATH)+"/bin/");
83 321 mdecorde
                        tt.settoken();
84 321 mdecorde
                        tt.setlemma();
85 321 mdecorde
                        tt.setquiet();
86 321 mdecorde
                        tt.setnounknown();
87 321 mdecorde
                        tt.setsgml();
88 321 mdecorde
                        tt.seteostag("<s>");
89 321 mdecorde
                        tt.treetagger( modelfile.getAbsolutePath(), ttsrcfile.getAbsolutePath(), ttrezfile.getAbsolutePath())
90 321 mdecorde
91 321 mdecorde
                        //create stand-off annotation file
92 321 mdecorde
                        //targeted file for annotations
93 321 mdecorde
                        String target = teifile.getName()
94 321 mdecorde
95 321 mdecorde
96 321 mdecorde
                        //contains txm:application/txm:commandLine
97 321 mdecorde
                        File reportFile = new File(rootDir,"NLPToolsParameters.xml");
98 321 mdecorde
99 321 mdecorde
                        String respPerson = System.getProperty("user.name");
100 321 mdecorde
                        String respId = "txm";
101 321 mdecorde
                        String respDesc = "NLP annotation tool";
102 321 mdecorde
                        String respDate = "Tue Mar  11 1:02:55 Paris, Madrid 2010";
103 321 mdecorde
                        String respWhen = ""
104 321 mdecorde
105 321 mdecorde
                        String appIdent = "TreeTagger";
106 321 mdecorde
                        String appVersion = "3.2";
107 321 mdecorde
108 321 mdecorde
                        String distributor = "";
109 321 mdecorde
                        String publiStmt = """""";
110 321 mdecorde
                        String sourceStmt = """""";
111 321 mdecorde
112 321 mdecorde
                        def types = ["ttpos","ttlemma"];
113 321 mdecorde
                        def typesTITLE = ["ttpos","ttlemma"];
114 321 mdecorde
                        def typesDesc = ["fr pos","fr lemma"]
115 321 mdecorde
                        def typesTAGSET = ["",""]
116 321 mdecorde
                        def typesWEB = ["",""]
117 321 mdecorde
                        String idform ="w_";
118 321 mdecorde
                        String encoding ="UTF-8";
119 321 mdecorde
120 321 mdecorde
                        def transfo = new CSV2W_ANA();
121 321 mdecorde
                        transfo.setAnnotationTypes( types, typesDesc, typesTAGSET, typesWEB, idform);
122 321 mdecorde
                        transfo.setResp(respId, respDesc,respDate, respPerson, respWhen);
123 321 mdecorde
                        transfo.setApp(appIdent, appVersion);
124 321 mdecorde
                        transfo.setTarget(target, reportFile);
125 321 mdecorde
                        transfo.setInfos(distributor,  publiStmt, sourceStmt);
126 321 mdecorde
                        transfo.process( ttrezfile, annotfile, encoding );
127 321 mdecorde
128 321 mdecorde
                        //merge into the tei file
129 321 mdecorde
                        builder = new AnnotationInjection(teifile.toURL(), annotfile.toURL(), new ArrayList<String>());
130 321 mdecorde
                        builder.transfomFile(rootDir,"temp");
131 321 mdecorde
132 321 mdecorde
                        if (!(teifile.delete() && new File(rootDir,"temp").renameTo(teifile))) println "Warning can't rename file "+new File(rootDir,"temp")+" to "+teifile
133 321 mdecorde
                }
134 321 mdecorde
        }
135 321 mdecorde
136 321 mdecorde
        /**
137 321 mdecorde
         * The main method.
138 321 mdecorde
         *
139 321 mdecorde
         * @param args the arguments
140 321 mdecorde
         */
141 321 mdecorde
        public static void main(String[] args)
142 321 mdecorde
        {
143 321 mdecorde
                File dir = new File("C:/Documents and Settings/alavrent/TXM/corpora/corptef/")
144 321 mdecorde
                new annotate().run(dir);
145 321 mdecorde
        }
146 321 mdecorde
}