Statistics
| Revision:

root / tmp / org.txm.core / src / java / org / txm / importer / XMLText2TXTCSV.groovy @ 966

History | View | Annotate | Download (1.2 kB)

1 881 mdecorde
package org.txm.importer
2 881 mdecorde
3 966 mdecorde
import org.txm.metadatas.Metadatas
4 966 mdecorde
5 881 mdecorde
File dir = new File("/home/mdecorde/xml/voeux/split_xml")
6 881 mdecorde
File outdir = new File("/home/mdecorde/xml/voeux/split_txtcsv")
7 881 mdecorde
println "1) xml -> txt + write metadata.csv"
8 881 mdecorde
outdir.deleteDir()
9 881 mdecorde
outdir.mkdir()
10 966 mdecorde
File metadatafile = Metadatas.findMetadataFile(outdir)
11 881 mdecorde
String csvString = ""
12 881 mdecorde
13 881 mdecorde
def files = dir.listFiles()
14 881 mdecorde
files.sort()
15 881 mdecorde
for(File f : files)
16 881 mdecorde
{
17 881 mdecorde
        File outfile = new File(outdir, f.getName()+".txt");
18 881 mdecorde
        String text = f.getText("UTF-8");
19 881 mdecorde
        String texttag = text.find("<text id.*>")
20 881 mdecorde
//        println texttag
21 881 mdecorde
//        texttag = texttag.replaceAll('<text id="([^"]+)"', '<text id="$1.txt"')
22 881 mdecorde
//        println "> "+texttag
23 881 mdecorde
        text = text.replaceAll("<text.*>", "").replace("</text>", "");
24 881 mdecorde
        outfile.withWriter("UTF-8"){writer -> writer.write(text) }
25 881 mdecorde
        csvString += texttag.replace("<text id=","").replace(" loc=", ",").replace(" annee=", ",").replace("\">", "\"")+"\n"
26 881 mdecorde
}
27 881 mdecorde
28 881 mdecorde
println "2) write metadata.csv"
29 881 mdecorde
metadatafile.withWriter("UTF-8"){csvwriter ->
30 881 mdecorde
        csvwriter.write("\"id\",\"loc\",\"annee\"\n");
31 881 mdecorde
        csvwriter.write(csvString)}
32 881 mdecorde
33 881 mdecorde
println "3) rename Voeux_*"
34 881 mdecorde
outdir.eachFileMatch(~/Voeux_.*/) {file-> file.renameTo(new File(outdir, file.getName().substring(6, 10)+".txt")) }