root / tmp / org.txm.groovy.core / src / groovy / org / txm / scripts / prototypes / Quotes.groovy @ 1000
History | View | Annotate | Download (1.5 kB)
1 | 1000 | mdecorde | package org.txm.scripts.test
|
---|---|---|---|
2 | 321 | mdecorde | |
3 | 321 | mdecorde | println "Pre-balisage du discours direct"
|
4 | 321 | mdecorde | |
5 | 321 | mdecorde | List<String> containers = ["p","ab"]; // elements qui contiennent des <s> |
6 | 321 | mdecorde | |
7 | 321 | mdecorde | File infile = new File(System.getProperty("user.home"), "TXM-SRC/quote-orig/perceval2.xml") |
8 | 321 | mdecorde | File outfile = new File(System.getProperty("user.home"), "TXM-SRC/quote-orig/perceval2-q.xml") |
9 | 321 | mdecorde | |
10 | 321 | mdecorde | def doc = new XmlParser().parse (infile) |
11 | 321 | mdecorde | List<Node> nodesToInspect = doc.text.body.div
|
12 | 321 | mdecorde | // nodesToInspect << doc.text.body.div // on en rajoute
|
13 | 321 | mdecorde | |
14 | 321 | mdecorde | /*************************/
|
15 | 321 | mdecorde | new org.txm.importer.EncodeTEIQuotes(nodesToInspect, containers, "\"", "pon"); |
16 | 321 | mdecorde | |
17 | 321 | mdecorde | //copy the doc in "outfile" File
|
18 | 321 | mdecorde | String encoding = "UTF-8" |
19 | 321 | mdecorde | OutputStreamWriter writer = new OutputStreamWriter(new FileOutputStream(outfile) , encoding); |
20 | 321 | mdecorde | //writer.write("<?xml version=\"1.0\" encoding=\""+encoding+"\"?>");
|
21 | 321 | mdecorde | //writer.write ("<?xml-stylesheet type=\"text/css\" href=\"tei-graal.css\"?>\n")
|
22 | 321 | mdecorde | def pwriter = new PrintWriter(writer, true) |
23 | 321 | mdecorde | XmlNodePrinter xmlwriter = new XmlNodePrinter(pwriter)
|
24 | 321 | mdecorde | xmlwriter.setPreserveWhitespace(false)
|
25 | 321 | mdecorde | xmlwriter.print(doc) |
26 | 321 | mdecorde | pwriter.close() |
27 | 321 | mdecorde | writer.close() |
28 | 321 | mdecorde | xmlwriter = null
|
29 | 321 | mdecorde | //println "write output file "+outfile
|
30 | 321 | mdecorde | |
31 | 321 | mdecorde | //update counts
|
32 | 321 | mdecorde | if(outfile.exists())
|
33 | 321 | mdecorde | { |
34 | 321 | mdecorde | String txtid = org.txm.importer.WordCounter.findTextId(infile, "s"); // retrouve l'id du text qui a été concaténé aux id des S originels (ex : s19_12 >> 19) |
35 | 321 | mdecorde | new org.txm.importer.WordCounter(outfile, "s", txtid); |
36 | 321 | mdecorde | new org.txm.importer.WordCounter(outfile, "q", txtid); |
37 | 321 | mdecorde | } |