Statistics
| Revision:

root / tmp / org.txm.groovy.core / src / groovy / org / txm / test / Quotes.groovy @ 966

History | View | Annotate | Download (1.4 kB)

1 479 mdecorde
package org.txm.test
2 321 mdecorde
3 321 mdecorde
println "Pre-balisage du discours direct"
4 321 mdecorde
5 321 mdecorde
List<String> containers = ["p","ab"]; // elements qui contiennent des <s>
6 321 mdecorde
7 321 mdecorde
File infile = new File(System.getProperty("user.home"), "TXM-SRC/quote-orig/perceval2.xml")
8 321 mdecorde
File outfile = new File(System.getProperty("user.home"), "TXM-SRC/quote-orig/perceval2-q.xml")
9 321 mdecorde
10 321 mdecorde
def doc = new XmlParser().parse (infile)
11 321 mdecorde
List<Node> nodesToInspect = doc.text.body.div
12 321 mdecorde
// nodesToInspect << doc.text.body.div // on en rajoute
13 321 mdecorde
14 321 mdecorde
/*************************/
15 321 mdecorde
new org.txm.importer.EncodeTEIQuotes(nodesToInspect, containers, "\"", "pon");
16 321 mdecorde
17 321 mdecorde
//copy the doc in "outfile" File
18 321 mdecorde
String encoding = "UTF-8"
19 321 mdecorde
OutputStreamWriter writer = new OutputStreamWriter(new FileOutputStream(outfile) , encoding);
20 321 mdecorde
//writer.write("<?xml version=\"1.0\" encoding=\""+encoding+"\"?>");
21 321 mdecorde
//writer.write ("<?xml-stylesheet type=\"text/css\" href=\"tei-graal.css\"?>\n")
22 321 mdecorde
def pwriter = new PrintWriter(writer, true)
23 321 mdecorde
XmlNodePrinter xmlwriter = new XmlNodePrinter(pwriter)
24 321 mdecorde
xmlwriter.setPreserveWhitespace(false)
25 321 mdecorde
xmlwriter.print(doc)
26 321 mdecorde
pwriter.close()
27 321 mdecorde
writer.close()
28 321 mdecorde
xmlwriter = null
29 321 mdecorde
//println "write output file "+outfile
30 321 mdecorde
31 321 mdecorde
//update counts
32 321 mdecorde
if(outfile.exists())
33 321 mdecorde
{
34 321 mdecorde
        String txtid = org.txm.importer.WordCounter.findTextId(infile, "s"); // retrouve l'id du text qui a été concaténé aux id des S originels (ex : s19_12 >> 19)
35 321 mdecorde
        new org.txm.importer.WordCounter(outfile, "s", txtid);
36 321 mdecorde
        new org.txm.importer.WordCounter(outfile, "q", txtid);
37 321 mdecorde
}