Statistics
| Revision:

root / tmp / org.txm.groovy.core / src / groovy / org / txm / scripts / nlp / PatchAfrlex.groovy @ 1000

History | View | Annotate | Download (2.4 kB)

1
// Copyright © 2010-2013 ENS de Lyon.
2
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of
3
// Lyon 2, University of Franche-Comté, University of Nice
4
// Sophia Antipolis, University of Paris 3.
5
// 
6
// The TXM platform is free software: you can redistribute it
7
// and/or modify it under the terms of the GNU General Public
8
// License as published by the Free Software Foundation,
9
// either version 2 of the License, or (at your option) any
10
// later version.
11
// 
12
// The TXM platform is distributed in the hope that it will be
13
// useful, but WITHOUT ANY WARRANTY; without even the implied
14
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15
// PURPOSE. See the GNU General Public License for more
16
// details.
17
// 
18
// You should have received a copy of the GNU General
19
// Public License along with the TXM platform. If not, see
20
// http://www.gnu.org/licenses.
21
// 
22
// 
23
// 
24
// $LastChangedDate: 2013-05-06 17:38:43 +0200 (lun. 06 mai 2013) $
25
// $LastChangedRevision: 2386 $
26
// $LastChangedBy: mdecorde $ 
27
//
28
package org.txm.scripts.scripts;
29

    
30
// TODO: Auto-generated Javadoc
31
/**
32
 * Patch.
33
 *
34
 * @param correspondanceFile the correspondance file
35
 * @param afrlex the afrlex
36
 * @param afrlexfixed the afrlexfixed
37
 * @param encoding the encoding
38
 * @deprecated
39
 * @author mdecorde patch a cvs file replace POS name with the
40
 * correspondanceFile
41
 */
42
class PatchAfrlex {
43
        public static void patch(File correspondanceFile,File afrlex,File afrlexfixed, String encoding )
44
        {
45
                HashMap<String,String> correspondances = new HashMap<String,String>();
46
                
47
                def content = correspondanceFile.getText(encoding) 
48
                
49
                String separator= "\t"
50
                content.splitEachLine(separator) {fields ->
51
                        correspondances.put( fields[0], fields[1]);
52
                }
53
                println(correspondances);
54
                
55
                
56
                Writer writer = new FileWriter(afrlexfixed);
57
                
58
                content = afrlex.getText(encoding) 
59
                separator= "\t"
60
                content.splitEachLine(separator) {fields ->
61
                        writer.write(fields[0]);
62
                        for(int i = 1 ; i< fields.size(); i = i+2)
63
                        {
64
                                writer.write("\t"+ correspondances.get(fields[i])+"\t"+fields[i+1]);                                 
65
                        }
66
                        writer.write("\n")
67
                }
68
                writer.close();
69
        }
70

    
71
        /**
72
         * The main method.
73
         *
74
         * @param args the arguments
75
         */
76
        public static void main(String[] args)
77
        {
78
                File afrlex = new File('afrlex.txt');
79
                File afrlexfixed = new File('afrlex-patched.txt');
80
                File correspondanceFile = new File('TTnca_2_CTX9.txt');
81
                String encoding ="ISO-8859-1"
82
                
83
                PatchAfrlex.patch( correspondanceFile, afrlex, afrlexfixed, encoding );
84
        }
85
}