Statistics
| Revision:

root / tmp / org.txm.groovy.core / src / groovy / org / txm / scripts / nlp / PatchAfrlex.groovy @ 1000

History | View | Annotate | Download (2.4 kB)

1 321 mdecorde
// Copyright © 2010-2013 ENS de Lyon.
2 321 mdecorde
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of
3 321 mdecorde
// Lyon 2, University of Franche-Comté, University of Nice
4 321 mdecorde
// Sophia Antipolis, University of Paris 3.
5 321 mdecorde
//
6 321 mdecorde
// The TXM platform is free software: you can redistribute it
7 321 mdecorde
// and/or modify it under the terms of the GNU General Public
8 321 mdecorde
// License as published by the Free Software Foundation,
9 321 mdecorde
// either version 2 of the License, or (at your option) any
10 321 mdecorde
// later version.
11 321 mdecorde
//
12 321 mdecorde
// The TXM platform is distributed in the hope that it will be
13 321 mdecorde
// useful, but WITHOUT ANY WARRANTY; without even the implied
14 321 mdecorde
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 321 mdecorde
// PURPOSE. See the GNU General Public License for more
16 321 mdecorde
// details.
17 321 mdecorde
//
18 321 mdecorde
// You should have received a copy of the GNU General
19 321 mdecorde
// Public License along with the TXM platform. If not, see
20 321 mdecorde
// http://www.gnu.org/licenses.
21 321 mdecorde
//
22 321 mdecorde
//
23 321 mdecorde
//
24 479 mdecorde
// $LastChangedDate: 2013-05-06 17:38:43 +0200 (lun. 06 mai 2013) $
25 321 mdecorde
// $LastChangedRevision: 2386 $
26 321 mdecorde
// $LastChangedBy: mdecorde $
27 321 mdecorde
//
28 1000 mdecorde
package org.txm.scripts.scripts;
29 321 mdecorde
30 321 mdecorde
// TODO: Auto-generated Javadoc
31 321 mdecorde
/**
32 321 mdecorde
 * Patch.
33 321 mdecorde
 *
34 321 mdecorde
 * @param correspondanceFile the correspondance file
35 321 mdecorde
 * @param afrlex the afrlex
36 321 mdecorde
 * @param afrlexfixed the afrlexfixed
37 321 mdecorde
 * @param encoding the encoding
38 321 mdecorde
 * @deprecated
39 321 mdecorde
 * @author mdecorde patch a cvs file replace POS name with the
40 321 mdecorde
 * correspondanceFile
41 321 mdecorde
 */
42 321 mdecorde
class PatchAfrlex {
43 321 mdecorde
        public static void patch(File correspondanceFile,File afrlex,File afrlexfixed, String encoding )
44 321 mdecorde
        {
45 321 mdecorde
                HashMap<String,String> correspondances = new HashMap<String,String>();
46 321 mdecorde
47 321 mdecorde
                def content = correspondanceFile.getText(encoding)
48 321 mdecorde
49 321 mdecorde
                String separator= "\t"
50 321 mdecorde
                content.splitEachLine(separator) {fields ->
51 321 mdecorde
                        correspondances.put( fields[0], fields[1]);
52 321 mdecorde
                }
53 321 mdecorde
                println(correspondances);
54 321 mdecorde
55 321 mdecorde
56 321 mdecorde
                Writer writer = new FileWriter(afrlexfixed);
57 321 mdecorde
58 321 mdecorde
                content = afrlex.getText(encoding)
59 321 mdecorde
                separator= "\t"
60 321 mdecorde
                content.splitEachLine(separator) {fields ->
61 321 mdecorde
                        writer.write(fields[0]);
62 321 mdecorde
                        for(int i = 1 ; i< fields.size(); i = i+2)
63 321 mdecorde
                        {
64 321 mdecorde
                                writer.write("\t"+ correspondances.get(fields[i])+"\t"+fields[i+1]);
65 321 mdecorde
                        }
66 321 mdecorde
                        writer.write("\n")
67 321 mdecorde
                }
68 321 mdecorde
                writer.close();
69 321 mdecorde
        }
70 321 mdecorde
71 321 mdecorde
        /**
72 321 mdecorde
         * The main method.
73 321 mdecorde
         *
74 321 mdecorde
         * @param args the arguments
75 321 mdecorde
         */
76 321 mdecorde
        public static void main(String[] args)
77 321 mdecorde
        {
78 321 mdecorde
                File afrlex = new File('afrlex.txt');
79 321 mdecorde
                File afrlexfixed = new File('afrlex-patched.txt');
80 321 mdecorde
                File correspondanceFile = new File('TTnca_2_CTX9.txt');
81 321 mdecorde
                String encoding ="ISO-8859-1"
82 321 mdecorde
83 321 mdecorde
                PatchAfrlex.patch( correspondanceFile, afrlex, afrlexfixed, encoding );
84 321 mdecorde
        }
85 321 mdecorde
}