root / tmp / org.txm.dictionary.rcp / src / org / txm / macro / frolex / DMFSite2CLDMFTSVMacro.groovy @ 470
History | View | Annotate | Download (2.8 kB)
1 |
package org.txm.macro.frolex
|
---|---|
2 |
|
3 |
import java.io.File |
4 |
import java.io.IOException |
5 |
import java.io.PrintWriter |
6 |
import java.nio.charset.Charset |
7 |
import java.util.ArrayList |
8 |
import java.util.HashMap |
9 |
|
10 |
import javax.xml.stream.XMLStreamException |
11 |
|
12 |
import org.txm.utils.CsvReader |
13 |
import org.txm.utils.io.IOUtils |
14 |
|
15 |
import org.kohsuke.args4j.* |
16 |
import groovy.transform.Field |
17 |
import org.txm.rcpapplication.swt.widget.parameters.* |
18 |
|
19 |
// PARAMETERS
|
20 |
@Field @Option(name="workingDirectory", usage="workingDirectory containing all files needed", widget="Folder", required=true, def="dir") |
21 |
File workingDirectory = null |
22 |
if (!ParametersDialog.open(this)) { |
23 |
System.out.println("Aborting CLBFMSITELEX creation.") |
24 |
return null |
25 |
} |
26 |
|
27 |
File dmfsiteTSVFile = new File(workingDirectory, "dmf_site.tsv") |
28 |
if (!dmfsiteTSVFile.exists()) {
|
29 |
println "Aborting: file is missing: $dmfsiteTSVFile"
|
30 |
return
|
31 |
} |
32 |
File tsvFile = new File(workingDirectory, "cldmf-site.tsv") |
33 |
|
34 |
//START
|
35 |
|
36 |
HashMap<String, Entry2> lemmas = new HashMap<String, Entry2>() |
37 |
|
38 |
CsvReader reader = new CsvReader(dmfsiteTSVFile.getAbsolutePath(), "\t".charAt(0), Charset.forName("UTF-8")) |
39 |
reader.readHeaders() |
40 |
while (reader.readRecord()) {
|
41 |
String lemma = reader.get("dmf_lemma") |
42 |
String category = reader.get("category") |
43 |
String corresp = convertCorresp(reader.get("source")) |
44 |
String corresp_lemma = reader.get("source_lemma") |
45 |
String key = lemma+"_"+category |
46 |
if (!lemmas.containsKey(key)) {
|
47 |
lemmas.put(key, new Entry2(lemma, category))
|
48 |
lemmas.get(key).lemma_source = "DMF"
|
49 |
} |
50 |
Entry2 current = lemmas.get(key) |
51 |
current.lemma_source = "DMF"
|
52 |
if (current.corresps.keySet().contains(corresp)) {
|
53 |
current.corresps.put(corresp, corresp_lemma) |
54 |
} else {
|
55 |
System.out.println("Error unknown lemma="+lemma+" with corresp="+corresp+" and corresp_lemma="+corresp_lemma) |
56 |
} |
57 |
} |
58 |
reader.close() |
59 |
|
60 |
PrintWriter writer = IOUtils.getWriter(tsvFile)
|
61 |
|
62 |
writer.print("lemma\tcategory")
|
63 |
Entry2 tmp = new Entry2()
|
64 |
for (String corresp : tmp.corresps.keySet()) { |
65 |
writer.print("\t"+corresp)
|
66 |
} |
67 |
writer.print("\tlemma_source")
|
68 |
writer.println("")
|
69 |
|
70 |
ArrayList<String> keys = new ArrayList<String>(lemmas.keySet()) |
71 |
keys.sort() |
72 |
for (String key : keys) { |
73 |
writer.print(lemmas.get(key)) |
74 |
} |
75 |
writer.close() |
76 |
return true |
77 |
|
78 |
// END
|
79 |
|
80 |
String convertCorresp(String corresp) { |
81 |
if (corresp.equals("AND")) { |
82 |
return "AND" |
83 |
} else if (corresp.matches("\\*?DEAF.*")) { |
84 |
return "DEAF" |
85 |
} else if (corresp.equals("DÉCT")) { |
86 |
return "DECT" |
87 |
} else if (corresp.startsWith("FEW")) { |
88 |
return "FEW" |
89 |
} else if (corresp.equals("GD")) { |
90 |
return "GDF" |
91 |
} else if (corresp.startsWith("GDC")) { |
92 |
return "GDC" |
93 |
} else if (corresp.equals("gs")) { |
94 |
return "TLF" |
95 |
} else if (corresp.equals("HUG")) { |
96 |
return "HUG" |
97 |
} else if (corresp.startsWith("T-L")) { |
98 |
return "TL" |
99 |
} else if (corresp.startsWith("TLF")) { |
100 |
return "TLF" |
101 |
} else {
|
102 |
return "UNDEF" |
103 |
} |
104 |
} |
105 |
|