Statistics
| Revision:

root / tmp / org.txm.core / src / java / org / txm / scripts / importer / XMLText2TXTCSV.groovy @ 2473

History | View | Annotate | Download (1.2 kB)

1
package org.txm.scripts.importer
2

    
3
import org.txm.metadatas.Metadatas
4
import org.txm.utils.io.IOUtils
5

    
6
File dir = new File("/home/mdecorde/xml/voeux/split_xml")
7
File outdir = new File("/home/mdecorde/xml/voeux/split_txtcsv")
8
println "1) xml -> txt + write metadata.csv"
9
outdir.deleteDir()
10
outdir.mkdir()
11
File metadatafile = Metadatas.findMetadataFile(outdir)
12
String csvString = ""
13

    
14
def files = dir.listFiles(IOUtils.HIDDENFILE_FILTER)
15
files.sort()
16
for(File f : files) {
17
        File outfile = new File(outdir, f.getName()+".txt");
18
        String text = f.getText("UTF-8");
19
        String texttag = text.find("<text id.*>")
20
//        println texttag
21
//        texttag = texttag.replaceAll('<text id="([^"]+)"', '<text id="$1.txt"')
22
//        println "> "+texttag
23
        text = text.replaceAll("<text.*>", "").replace("</text>", "");
24
        outfile.withWriter("UTF-8"){writer -> writer.write(text) }
25
        csvString += texttag.replace("<text id=","").replace(" loc=", ",").replace(" annee=", ",").replace("\">", "\"")+"\n"
26
}
27

    
28
println "2) write metadata.csv"
29
metadatafile.withWriter("UTF-8"){csvwriter -> 
30
        csvwriter.write("\"id\",\"loc\",\"annee\"\n");
31
        csvwriter.write(csvString)}
32

    
33
println "3) rename Voeux_*"
34
outdir.eachFileMatch(~/Voeux_.*/) {file-> file.renameTo(new File(outdir, file.getName().substring(6, 10)+".txt")) }