Statistics
| Revision:

root / tmp / org.txm.core / src / groovy / org / txm / importer / XMLText2TXTCSV.groovy @ 187

History | View | Annotate | Download (1.1 kB)

1
package org.txm.importer
2

    
3
File dir = new File("/home/mdecorde/xml/voeux/split_xml")
4
File outdir = new File("/home/mdecorde/xml/voeux/split_txtcsv")
5
println "1) xml -> txt + write metadata.csv"
6
outdir.deleteDir()
7
outdir.mkdir()
8
File metadatafile = new File(outdir, "metadata.csv")
9
String csvString = ""
10

    
11
def files = dir.listFiles()
12
files.sort()
13
for(File f : files)
14
{
15
        File outfile = new File(outdir, f.getName()+".txt");
16
        String text = f.getText("UTF-8");
17
        String texttag = text.find("<text id.*>")
18
//        println texttag
19
//        texttag = texttag.replaceAll('<text id="([^"]+)"', '<text id="$1.txt"')
20
//        println "> "+texttag
21
        text = text.replaceAll("<text.*>", "").replace("</text>", "");
22
        outfile.withWriter("UTF-8"){writer -> writer.write(text) }
23
        csvString += texttag.replace("<text id=","").replace(" loc=", ",").replace(" annee=", ",").replace("\">", "\"")+"\n"
24
}
25

    
26
println "2) write metadata.csv"
27
metadatafile.withWriter("UTF-8"){csvwriter -> 
28
        csvwriter.write("\"id\",\"loc\",\"annee\"\n");
29
        csvwriter.write(csvString)}
30

    
31
println "3) rename Voeux_*"
32
outdir.eachFileMatch(~/Voeux_.*/) {file-> file.renameTo(new File(outdir, file.getName().substring(6, 10)+".txt")) }