root / tmp / org.txm.groovy.core / src / groovy / org / txm / scripts / nlp / PatchAfrlex.groovy @ 1000
History | View | Annotate | Download (2.4 kB)
1 | 321 | mdecorde | // Copyright © 2010-2013 ENS de Lyon.
|
---|---|---|---|
2 | 321 | mdecorde | // Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of
|
3 | 321 | mdecorde | // Lyon 2, University of Franche-Comté, University of Nice
|
4 | 321 | mdecorde | // Sophia Antipolis, University of Paris 3.
|
5 | 321 | mdecorde | //
|
6 | 321 | mdecorde | // The TXM platform is free software: you can redistribute it
|
7 | 321 | mdecorde | // and/or modify it under the terms of the GNU General Public
|
8 | 321 | mdecorde | // License as published by the Free Software Foundation,
|
9 | 321 | mdecorde | // either version 2 of the License, or (at your option) any
|
10 | 321 | mdecorde | // later version.
|
11 | 321 | mdecorde | //
|
12 | 321 | mdecorde | // The TXM platform is distributed in the hope that it will be
|
13 | 321 | mdecorde | // useful, but WITHOUT ANY WARRANTY; without even the implied
|
14 | 321 | mdecorde | // warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
15 | 321 | mdecorde | // PURPOSE. See the GNU General Public License for more
|
16 | 321 | mdecorde | // details.
|
17 | 321 | mdecorde | //
|
18 | 321 | mdecorde | // You should have received a copy of the GNU General
|
19 | 321 | mdecorde | // Public License along with the TXM platform. If not, see
|
20 | 321 | mdecorde | // http://www.gnu.org/licenses.
|
21 | 321 | mdecorde | //
|
22 | 321 | mdecorde | //
|
23 | 321 | mdecorde | //
|
24 | 479 | mdecorde | // $LastChangedDate: 2013-05-06 17:38:43 +0200 (lun. 06 mai 2013) $
|
25 | 321 | mdecorde | // $LastChangedRevision: 2386 $
|
26 | 321 | mdecorde | // $LastChangedBy: mdecorde $
|
27 | 321 | mdecorde | //
|
28 | 1000 | mdecorde | package org.txm.scripts.scripts;
|
29 | 321 | mdecorde | |
30 | 321 | mdecorde | // TODO: Auto-generated Javadoc
|
31 | 321 | mdecorde | /**
|
32 | 321 | mdecorde | * Patch.
|
33 | 321 | mdecorde | *
|
34 | 321 | mdecorde | * @param correspondanceFile the correspondance file
|
35 | 321 | mdecorde | * @param afrlex the afrlex
|
36 | 321 | mdecorde | * @param afrlexfixed the afrlexfixed
|
37 | 321 | mdecorde | * @param encoding the encoding
|
38 | 321 | mdecorde | * @deprecated
|
39 | 321 | mdecorde | * @author mdecorde patch a cvs file replace POS name with the
|
40 | 321 | mdecorde | * correspondanceFile
|
41 | 321 | mdecorde | */
|
42 | 321 | mdecorde | class PatchAfrlex { |
43 | 321 | mdecorde | public static void patch(File correspondanceFile,File afrlex,File afrlexfixed, String encoding ) |
44 | 321 | mdecorde | { |
45 | 321 | mdecorde | HashMap<String,String> correspondances = new HashMap<String,String>(); |
46 | 321 | mdecorde | |
47 | 321 | mdecorde | def content = correspondanceFile.getText(encoding)
|
48 | 321 | mdecorde | |
49 | 321 | mdecorde | String separator= "\t" |
50 | 321 | mdecorde | content.splitEachLine(separator) {fields -> |
51 | 321 | mdecorde | correspondances.put( fields[0], fields[1]); |
52 | 321 | mdecorde | } |
53 | 321 | mdecorde | println(correspondances); |
54 | 321 | mdecorde | |
55 | 321 | mdecorde | |
56 | 321 | mdecorde | Writer writer = new FileWriter(afrlexfixed); |
57 | 321 | mdecorde | |
58 | 321 | mdecorde | content = afrlex.getText(encoding) |
59 | 321 | mdecorde | separator= "\t"
|
60 | 321 | mdecorde | content.splitEachLine(separator) {fields -> |
61 | 321 | mdecorde | writer.write(fields[0]);
|
62 | 321 | mdecorde | for(int i = 1 ; i< fields.size(); i = i+2) |
63 | 321 | mdecorde | { |
64 | 321 | mdecorde | writer.write("\t"+ correspondances.get(fields[i])+"\t"+fields[i+1]); |
65 | 321 | mdecorde | } |
66 | 321 | mdecorde | writer.write("\n")
|
67 | 321 | mdecorde | } |
68 | 321 | mdecorde | writer.close(); |
69 | 321 | mdecorde | } |
70 | 321 | mdecorde | |
71 | 321 | mdecorde | /**
|
72 | 321 | mdecorde | * The main method.
|
73 | 321 | mdecorde | *
|
74 | 321 | mdecorde | * @param args the arguments
|
75 | 321 | mdecorde | */
|
76 | 321 | mdecorde | public static void main(String[] args) |
77 | 321 | mdecorde | { |
78 | 321 | mdecorde | File afrlex = new File('afrlex.txt'); |
79 | 321 | mdecorde | File afrlexfixed = new File('afrlex-patched.txt'); |
80 | 321 | mdecorde | File correspondanceFile = new File('TTnca_2_CTX9.txt'); |
81 | 321 | mdecorde | String encoding ="ISO-8859-1" |
82 | 321 | mdecorde | |
83 | 321 | mdecorde | PatchAfrlex.patch( correspondanceFile, afrlex, afrlexfixed, encoding ); |
84 | 321 | mdecorde | } |
85 | 321 | mdecorde | } |