Statistics
| Revision:

root / tmp / org.txm.core / src / java / org / txm / importer / XMLText2TXTCSV.groovy @ 966

History | View | Annotate | Download (1.2 kB)

1
package org.txm.importer
2

    
3
import org.txm.metadatas.Metadatas
4

    
5
File dir = new File("/home/mdecorde/xml/voeux/split_xml")
6
File outdir = new File("/home/mdecorde/xml/voeux/split_txtcsv")
7
println "1) xml -> txt + write metadata.csv"
8
outdir.deleteDir()
9
outdir.mkdir()
10
File metadatafile = Metadatas.findMetadataFile(outdir)
11
String csvString = ""
12

    
13
def files = dir.listFiles()
14
files.sort()
15
for(File f : files)
16
{
17
        File outfile = new File(outdir, f.getName()+".txt");
18
        String text = f.getText("UTF-8");
19
        String texttag = text.find("<text id.*>")
20
//        println texttag
21
//        texttag = texttag.replaceAll('<text id="([^"]+)"', '<text id="$1.txt"')
22
//        println "> "+texttag
23
        text = text.replaceAll("<text.*>", "").replace("</text>", "");
24
        outfile.withWriter("UTF-8"){writer -> writer.write(text) }
25
        csvString += texttag.replace("<text id=","").replace(" loc=", ",").replace(" annee=", ",").replace("\">", "\"")+"\n"
26
}
27

    
28
println "2) write metadata.csv"
29
metadatafile.withWriter("UTF-8"){csvwriter -> 
30
        csvwriter.write("\"id\",\"loc\",\"annee\"\n");
31
        csvwriter.write(csvString)}
32

    
33
println "3) rename Voeux_*"
34
outdir.eachFileMatch(~/Voeux_.*/) {file-> file.renameTo(new File(outdir, file.getName().substring(6, 10)+".txt")) }