Statistics
| Revision:

root / tmp / org.txm.core / src / groovy / org / txm / importer / transcriber / BuildTemplates.groovy @ 187

History | View | Annotate | Download (2.8 kB)

1
package org.txm.importer.transcriber
2
import java.nio.charset.Charset
3
import org.txm.utils.CsvReader
4

    
5
/// START PARAMETERS ///
6
File dir = new File("C:/") // output and input directory
7
File csvfile = new File(dir, "test.csv") // 3 cols: int enq ext
8
String spkSeparator = "\\|" // regular expression
9
char colSeparator = '\t' // csv column separator
10
String csvencoding = "ISO-8859-1" // csv encoding
11

    
12
//transcriptions header
13
String date = "111128" // date of transcription edition
14
String scribes= "studentN" // scribe name
15
String encoding = "ISO-8859-1" // transcription encoding
16

    
17
/// END PARAMETERS ///
18
String canvas="""<?xml version="1.0" encoding="@ENCODING"?>
19
<!DOCTYPE Trans SYSTEM "trans-14.dtd">
20
<Trans scribe="@SCRIBE" audio_filename="@AUDIOFILE" version="1" version_date="@DATE">
21
<Speakers>
22
@SPEAKERS</Speakers>
23
<Episode>
24
<Section type="report" startTime="0" endTime="2.0">
25
<Turn startTime="0" endTime="1.0" speaker="@FIRSTSPK">
26
<Sync time="0"/>
27

28
</Turn>
29
</Section></Episode></Trans>
30
""";
31
String spkCanvas="<Speaker id=\"@ID\" name=\"@ID\" check=\"no\" dialect=\"native\" accent=\"\" scope=\"local\"/>"
32

    
33
//load csv
34
CsvReader csvreader = new CsvReader(csvfile.getAbsolutePath(), colSeparator, Charset.forName(csvencoding))
35

    
36
//check header
37
csvreader.readHeaders();
38
def titles = [];
39
for(String title : csvreader.getHeaders())
40
        titles << title
41
assert(titles.contains("int"))
42
assert(titles.contains("enq"))
43
assert(titles.contains("ext"))
44

    
45
// one transcription per csv line
46
while (csvreader.readRecord()){
47

    
48
        String ints = csvreader.get("int")
49
        String enqs = csvreader.get("enq")
50
        String exts = csvreader.get("ext")
51
        
52
        def intsS = ints.split(spkSeparator)
53
        def enqsS = enqs.split(spkSeparator)
54
        def extsS = exts.split(spkSeparator)
55
        assert(intsS.size() > 0)
56
        assert(enqsS.size() > 0)
57
        
58
        // build speaker declarations
59
        String speakers = ""
60
        for(String s : intsS)
61
                if(s.length() > 0)
62
                        speakers += spkCanvas.replace("@ID", s)+"\n"
63
        for(String s : enqsS)
64
                if(s.length() > 0)
65
                        speakers += spkCanvas.replace("@ID", s)+"\n"
66
        for(String s : extsS)
67
                if(s.length() > 0)
68
                        speakers += spkCanvas.replace("@ID", s)+"\n"
69

    
70
        // some infos
71
        String transfile = intsS[0]
72
        String firstSpeaker = enqsS[0]
73
                        
74
        // fill infos
75
        String content = canvas
76
        content = content.replace("@ENCODING", encoding)
77
        content = content.replace("@SCRIBE", scribes)
78
        content = content.replace("@AUDIOFILE", dir.getAbsolutePath()+transfile+".mp3")
79
        content = content.replace("@DATE", date)
80
        content = content.replace("@SPEAKERS", speakers)
81
        content = content.replace("@FIRSTSPK", firstSpeaker)
82
        
83
        //write transcription
84
        File outfile = new File(dir, transfile+".trs")
85
        println "create: "+outfile
86
        outfile.withWriter(encoding) { out ->
87
      out.println content
88
    }
89
}