Statistics
| Revision:

root / tmp / org.txm.dictionary.rcp / src / org / txm / macro / frolex / DMFSite2CLDMFTSVMacro.groovy @ 470

History | View | Annotate | Download (2.8 kB)

1
package org.txm.macro.frolex
2

    
3
import java.io.File
4
import java.io.IOException
5
import java.io.PrintWriter
6
import java.nio.charset.Charset
7
import java.util.ArrayList
8
import java.util.HashMap
9

    
10
import javax.xml.stream.XMLStreamException
11

    
12
import org.txm.utils.CsvReader
13
import org.txm.utils.io.IOUtils
14

    
15
import org.kohsuke.args4j.*
16
import groovy.transform.Field
17
import org.txm.rcpapplication.swt.widget.parameters.*
18

    
19
// PARAMETERS
20
@Field @Option(name="workingDirectory", usage="workingDirectory containing all files needed", widget="Folder", required=true, def="dir")
21
File workingDirectory = null
22
if (!ParametersDialog.open(this)) {
23
        System.out.println("Aborting CLBFMSITELEX creation.")
24
        return null
25
}
26

    
27
File dmfsiteTSVFile = new File(workingDirectory, "dmf_site.tsv")
28
if (!dmfsiteTSVFile.exists()) {
29
        println "Aborting: file is missing: $dmfsiteTSVFile"
30
        return
31
}
32
File tsvFile = new File(workingDirectory, "cldmf-site.tsv")
33

    
34
//START
35

    
36
HashMap<String, Entry2> lemmas = new HashMap<String, Entry2>()
37

    
38
CsvReader reader = new CsvReader(dmfsiteTSVFile.getAbsolutePath(), "\t".charAt(0), Charset.forName("UTF-8"))
39
reader.readHeaders()
40
while (reader.readRecord()) {
41
        String lemma = reader.get("dmf_lemma")
42
        String category = reader.get("category")
43
        String corresp = convertCorresp(reader.get("source"))
44
        String corresp_lemma = reader.get("source_lemma")
45
        String key = lemma+"_"+category
46
        if (!lemmas.containsKey(key)) {
47
                lemmas.put(key, new Entry2(lemma, category))
48
                lemmas.get(key).lemma_source = "DMF"
49
        }
50
        Entry2 current = lemmas.get(key)
51
        current.lemma_source = "DMF"
52
        if (current.corresps.keySet().contains(corresp)) {
53
                current.corresps.put(corresp, corresp_lemma)
54
        } else {
55
                System.out.println("Error unknown lemma="+lemma+" with corresp="+corresp+" and corresp_lemma="+corresp_lemma)
56
        }
57
}
58
reader.close()
59

    
60
PrintWriter writer = IOUtils.getWriter(tsvFile)
61

    
62
writer.print("lemma\tcategory")
63
Entry2 tmp = new Entry2()
64
for (String corresp : tmp.corresps.keySet()) {
65
        writer.print("\t"+corresp)
66
}
67
writer.print("\tlemma_source")
68
writer.println("")
69

    
70
ArrayList<String> keys = new ArrayList<String>(lemmas.keySet())
71
keys.sort()
72
for (String key : keys) {
73
        writer.print(lemmas.get(key))
74
}
75
writer.close()
76
return true
77

    
78
// END
79

    
80
String convertCorresp(String corresp) {
81
        if (corresp.equals("AND")) {
82
                return "AND"
83
        } else if (corresp.matches("\\*?DEAF.*")) {
84
                return "DEAF"
85
        } else if (corresp.equals("DÉCT")) {
86
                return "DECT"
87
        } else if (corresp.startsWith("FEW")) {
88
                return "FEW"
89
        } else if (corresp.equals("GD")) {
90
                return "GDF"
91
        } else if (corresp.startsWith("GDC")) {
92
                return "GDC"
93
        } else if (corresp.equals("gs")) {
94
                return "TLF"
95
        } else if (corresp.equals("HUG")) {
96
                return "HUG"
97
        } else if (corresp.startsWith("T-L")) {
98
                return "TL"
99
        } else if (corresp.startsWith("TLF")) {
100
                return "TLF"
101
        } else {
102
                return "UNDEF"
103
        }
104
}
105