Statistics
| Revision:

root / tmp / org.txm.core / src / groovy / org / txm / importer / Doc2Transcription.groovy @ 187

History | View | Annotate | Download (2.6 kB)

1
package org.txm.importer
2

    
3
import javax.xml.stream.*
4
import java.net.URL
5

    
6
class Doc2Transcription {
7

    
8
        String encoding
9
        File txtfile
10
        File outfile
11
        Reader reader
12
        XMLOutputFactory factory
13
        FileOutputStream output
14
        XMLStreamWriter writer
15

    
16
        int noline = 1
17
        boolean question = false
18
        int idquestion = 1
19
        int idresponse = 1
20
        boolean opened = false
21

    
22
        public Doc2Transcription(File txtfile, File outfile, String encoding)
23
        {
24
                this.txtfile = txtfile
25
                this.outfile = outfile
26
                this.encoding = encoding
27

    
28
                reader = new InputStreamReader(new FileInputStream(txtfile) , encoding)
29
                factory = XMLOutputFactory.newInstance()
30
                output = new FileOutputStream(outfile)
31
                writer = factory.createXMLStreamWriter(output, "UTF-8")//create a new file
32
        }
33

    
34
        public boolean process()
35
        {
36
                if(reader == null || writer == null)
37
                {
38
                        println "I/O error"
39
                        return false
40
                }
41
                
42
                // build text id
43
                String filename = txtfile.getName()
44
                int idx = filename.indexOf(".")
45
                if(idx > 0)
46
                        filename = filename.substring(0, idx)
47

    
48
                //write Start doc
49
                writer.writeStartDocument("UTF-8", "1.0")
50
                writer.writeStartElement("text") // we create a tag <text>
51
                writer.writeAttribute("id",filename)// and set its id
52

    
53
                //write first block
54
                writeStartQuestionResponse()
55
                
56
                String line = reader.readLine()
57
                while(line != null)
58
                {
59
                        if(line.length() == 0) // saut de ligne
60
                        {
61
                                question = !question
62
                                writer.writeEmptyElement("br")
63
                                writeStartQuestionResponse()
64
                        }
65
                        else
66
                        {
67
                                writer.writeCharacters(line+" \n")
68
                                writer.writeEmptyElement("br")
69
                        }
70

    
71
                        noline++
72
                        line = reader.readLine()
73
                }
74

    
75
                //close xml doc and question/response if needed
76
                if(opened)
77
                        writer.writeEndElement()// question/response
78
                writer.writeEndElement()// text
79

    
80
                reader.close()
81
                writer.close()
82
                output.close()
83
                return true
84
        }
85

    
86
        public void writeStartQuestionResponse()
87
        {
88
                if(opened)
89
                {
90
                        writer.writeEndElement()
91
                        opened = false
92
                }
93
                opened = true
94
                if(question)
95
                {
96
                        writer.writeStartElement("question") // we create a tag <text>
97
                        writer.writeAttribute("id","q"+idquestion++)// and set its id
98
                }
99
                else
100
                {
101
                        writer.writeStartElement("response") // we create a tag <text>
102
                        writer.writeAttribute("id","r"+idresponse++)// and set its id
103
                }
104
        }
105

    
106
        public void setInitialeState(boolean state)
107
        {
108
                question = state
109
        }
110

    
111
        static main(args) {
112
                File infile = new File("/home/matt/xml/txttranscription/test.txt")
113
                File outfile = new File("/home/matt/xml/txttranscription/test.xml")
114
                String encoding = "UTF-8"
115

    
116
                def builder = new Doc2Transcription(infile, outfile, encoding)
117
                if(builder.process())
118
                        println "ok"
119
                else
120
                        println "fail"
121
        }
122
}