Révision 2375
| tmp/org.txm.groovy.core/src/groovy/org/txm/macro/misc/RenameFilesMacro.groovy (revision 2375) | ||
|---|---|---|
| 1 | 1 |
package org.txm.macro.misc |
| 2 | 2 |
// STANDARD DECLARATIONS |
| 3 | 3 |
|
| 4 |
// README this macros needs the jtidy JAR library |
|
| 5 |
|
|
| 6 | 4 |
import groovy.xml.QName |
| 7 | 5 |
|
| 8 | 6 |
import java.nio.charset.Charset |
| 9 | 7 |
import java.text.DecimalFormat |
| 10 | 8 |
import org.txm.utils.xml.DomUtils; |
| 11 | 9 |
import org.txm.importer.ValidateXml; |
| 12 |
import org.w3c.tidy.Tidy |
|
| 13 | 10 |
import groovy.util.XmlParser |
| 14 | 11 |
import org.kohsuke.args4j.* |
| 15 | 12 |
import groovy.transform.Field |
| ... | ... | |
| 38 | 35 |
File rejected = new File(rootDir, "duplicates"); |
| 39 | 36 |
File tmpDir = new File(rootDir, "tmp"); |
| 40 | 37 |
|
| 41 |
|
|
| 42 | 38 |
if (!srcDir.exists()) {
|
| 43 | 39 |
println "STOP, srcDir does not exists $srcDir" |
| 44 | 40 |
return; |
| ... | ... | |
| 77 | 73 |
if (node instanceof String) {
|
| 78 | 74 |
s += " "+node |
| 79 | 75 |
} else {
|
| 80 |
for(def c : node.children())
|
|
| 76 |
for (def c : node.children()) {
|
|
| 81 | 77 |
s += " "+getText(c) |
| 78 |
} |
|
| 82 | 79 |
} |
| 83 | 80 |
//println " "+s.replace("\n", " ").trim();
|
| 84 | 81 |
return " "+s.replace("\n", " ").trim();
|
| ... | ... | |
| 101 | 98 |
|
| 102 | 99 |
File xhtmlFile = new File(outDir, name+".xhtml") |
| 103 | 100 |
|
| 104 |
// Tidy tidy = new Tidy(); // obtain a new Tidy instance |
|
| 105 |
// tidy.setXHTML(true); // set desired config options using tidy setters |
|
| 106 |
// tidy.setInputEncoding("UTF-8")
|
|
| 107 |
// tidy.setOutputEncoding("UTF-8")
|
|
| 108 |
// tidy.setShowErrors(0) |
|
| 109 |
// tidy.setShowWarnings(false) |
|
| 110 | 101 |
xhtmlFile.withWriter("UTF-8") { out ->
|
| 111 | 102 |
def doc = org.jsoup.Jsoup.connect(tmpHTML.toURI().toURL().toString()); |
| 112 | 103 |
println "current charset: "+doc.charset() |
| ... | ... | |
| 246 | 237 |
|
| 247 | 238 |
// get document number |
| 248 | 239 |
ignoredText = ignoredText.replaceAll("\n", " ")
|
| 249 |
int iNo= ignoredText.indexOf(newPrefix); |
|
| 240 |
int iNo = ignoredText.indexOf(newPrefix);
|
|
| 250 | 241 |
//println ignoredText |
| 251 | 242 |
if (iNo >= 0) {
|
| 252 |
String no =ignoredText.substring(iNo+newPrefix.length()).trim() |
|
| 243 |
String no = ignoredText.substring(iNo+newPrefix.length()).trim()
|
|
| 253 | 244 |
text.attributes().put("idnews", no)
|
| 254 | 245 |
//sign += " "+no |
| 255 | 246 |
text.attributes().put("date", no.substring(5,9)+"-"+no.substring(9,11)+"-"+no.substring(11,13))
|
Formats disponibles : Unified diff