Révision 881
| tmp/org.txm.specificities.core/.classpath (revision 881) | ||
|---|---|---|
| 1 | 1 |
<?xml version="1.0" encoding="UTF-8"?> |
| 2 | 2 |
<classpath> |
| 3 | 3 |
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.6"/> |
| 4 |
<classpathentry kind="con" path="org.eclipse.pde.core.requiredPlugins"/> |
|
| 4 |
<classpathentry kind="con" path="org.eclipse.pde.core.requiredPlugins"> |
|
| 5 |
<accessrules> |
|
| 6 |
<accessrule kind="accessible" pattern="**"/> |
|
| 7 |
</accessrules> |
|
| 8 |
</classpathentry> |
|
| 5 | 9 |
<classpathentry kind="src" path="src"/> |
| 6 | 10 |
<classpathentry kind="output" path="bin"/> |
| 7 | 11 |
</classpath> |
| tmp/org.txm.specificities.core/META-INF/MANIFEST.MF (revision 881) | ||
|---|---|---|
| 1 | 1 |
Manifest-Version: 1.0 |
| 2 |
Require-Bundle: org.txm.utils;bundle-version="1.0.0";visibility:=reexp |
|
| 3 |
ort,org.eclipse.osgi;bundle-version="3.10.2";visibility:=reexport,org |
|
| 4 |
.eclipse.core.runtime;bundle-version="3.10.0";visibility:=reexport,or |
|
| 5 |
g.txm.searchengine.cqp.core;bundle-version="1.1.0";visibility:=reexpo |
|
| 6 |
rt,org.txm.statsengine.r.core;visibility:=reexport,org.txm.lexicaltab |
|
| 7 |
le.core;bundle-version="1.0.0";visibility:=reexport,org.txm.statsengi |
|
| 8 |
ne.core;bundle-version="1.0.0";visibility:=reexport,org.txm.core;bund |
|
| 9 |
le-version="0.7.0";visibility:=reexport,org.txm.progression.core;bund |
|
| 10 |
le-version="1.0.0";visibility:=reexport,org.txm.chartsengine.core;bun |
|
| 11 |
dle-version="1.0.0";visibility:=reexport,org.txm.chartsengine.jfreech |
|
| 12 |
art.core;bundle-version="1.0.0";visibility:=reexport,org.txm.chartsen |
|
| 13 |
gine.r.core;bundle-version="1.0.0";visibility:=reexport |
|
| 2 |
Require-Bundle: org.txm.lexicaltable.core;bundle-version="1.0.0";visibility:=reexport, |
|
| 3 |
org.txm.progression.core;bundle-version="1.0.0";visibility:=reexport |
|
| 14 | 4 |
Export-Package: org.txm.functions.contrasts, |
| 15 | 5 |
org.txm.specificities.core.chartsengine.jfreechart, |
| 16 | 6 |
org.txm.specificities.core.chartsengine.r, |
| tmp/org.txm.specificities.feature/feature.xml (revision 881) | ||
|---|---|---|
| 17 | 17 |
</license> |
| 18 | 18 |
|
| 19 | 19 |
<requires> |
| 20 |
<import plugin="org.txm.utils" version="1.0.0" match="greaterOrEqual"/> |
|
| 21 |
<import plugin="org.eclipse.osgi" version="3.10.2" match="greaterOrEqual"/> |
|
| 22 |
<import plugin="org.eclipse.core.runtime" version="3.10.0" match="greaterOrEqual"/> |
|
| 23 |
<import plugin="org.txm.searchengine.cqp.core" version="1.1.0" match="greaterOrEqual"/> |
|
| 24 |
<import plugin="org.txm.statsengine.r.core"/> |
|
| 25 | 20 |
<import plugin="org.txm.lexicaltable.core" version="1.0.0" match="greaterOrEqual"/> |
| 26 |
<import plugin="org.txm.statsengine.core" version="1.0.0" match="greaterOrEqual"/> |
|
| 27 |
<import plugin="org.txm.core" version="0.7.0" match="greaterOrEqual"/> |
|
| 28 | 21 |
<import plugin="org.txm.progression.core" version="1.0.0" match="greaterOrEqual"/> |
| 29 |
<import plugin="org.txm.chartsengine.core" version="1.0.0" match="greaterOrEqual"/> |
|
| 30 |
<import plugin="org.txm.chartsengine.jfreechart.core" version="1.0.0" match="greaterOrEqual"/> |
|
| 31 |
<import plugin="org.txm.chartsengine.r.core" version="1.0.0" match="greaterOrEqual"/> |
|
| 32 |
<import plugin="org.eclipse.ui" version="3.106.1" match="greaterOrEqual"/> |
|
| 33 |
<import plugin="org.txm.index.core" version="1.0.0" match="greaterOrEqual"/> |
|
| 34 |
<import plugin="org.txm.statsengine.r.core" version="1.0.0" match="greaterOrEqual"/> |
|
| 35 |
<import plugin="org.txm.statsengine.r.rcp"/> |
|
| 36 | 22 |
<import plugin="org.txm.chartsengine.rcp"/> |
| 37 |
<import plugin="org.txm.rcp" version="0.7.8" match="greaterOrEqual"/> |
|
| 38 |
<import plugin="org.eclipse.core.expressions" version="3.4.600" match="greaterOrEqual"/> |
|
| 39 | 23 |
<import plugin="org.txm.lexicaltable.rcp"/> |
| 40 | 24 |
</requires> |
| 41 | 25 |
|
| tmp/org.txm.core/.settings/org.eclipse.jdt.groovy.core.prefs (revision 881) | ||
|---|---|---|
| 1 | 1 |
eclipse.preferences.version=1 |
| 2 |
groovy.compiler.level=23
|
|
| 2 |
groovy.compiler.level=-1
|
|
| 3 | 3 |
groovy.script.filters=scripts/**/*.groovy,y,src/main/resources/**/*.groovy,y,src/test/resources/**/*.groovy,y |
| tmp/org.txm.core/src/java/org/txm/importer/NiceToXML.groovy (revision 881) | ||
|---|---|---|
| 1 |
package org.txm.importer |
|
| 2 |
|
|
| 3 |
import javax.xml.stream.XMLStreamException |
|
| 4 |
import org.apache.tools.ant.types.resources.selectors.InstanceOf; |
|
| 5 |
import groovy.xml.* |
|
| 6 |
|
|
| 7 |
def root = new File("/home/mdecorde/xml/temoignagesnice/corpus Matrice - fichiers xmlisés/")
|
|
| 8 |
File srcdir = new File(root, "orig"); |
|
| 9 |
File outdir = new File(root, "tmp"); |
|
| 10 |
File okdir = new File(root, "ok"); |
|
| 11 |
File ok2dir = new File(root, "ok2"); |
|
| 12 |
File temoignagedir = new File(root, "temoignages"); |
|
| 13 |
ok2dir.deleteDir() |
|
| 14 |
ok2dir.mkdir() |
|
| 15 |
|
|
| 16 |
//rename title -> head |
|
| 17 |
for (def file : outdir.listFiles()) {
|
|
| 18 |
if (!file.getName().endsWith(".xml")) continue;
|
|
| 19 |
def doc = new XmlParser().parse(file); |
|
| 20 |
|
|
| 21 |
for (def note : doc.body.chapter.title) {
|
|
| 22 |
println note |
|
| 23 |
note.name = "head" |
|
| 24 |
} |
|
| 25 |
|
|
| 26 |
new File(ok2dir, file.getName()).withWriter("UTF-8") { writer ->
|
|
| 27 |
new XmlNodePrinter(new PrintWriter(writer)).print(doc) |
|
| 28 |
} |
|
| 29 |
} |
|
| 30 |
|
|
| 31 |
/* |
|
| 32 |
// ADD chapter@title |
|
| 33 |
for (def file : outdir.listFiles()) {
|
|
| 34 |
if (!file.getName().endsWith(".xml")) continue;
|
|
| 35 |
def doc = new XmlParser().parse(file); |
|
| 36 |
|
|
| 37 |
for (def chapter : doc.body.chapter) {
|
|
| 38 |
for (def title : chapter.title) {
|
|
| 39 |
chapter.@title = title.text() |
|
| 40 |
break; |
|
| 41 |
} |
|
| 42 |
} |
|
| 43 |
|
|
| 44 |
new File(ok2dir, file.getName()).withWriter("UTF-8") { writer ->
|
|
| 45 |
new XmlNodePrinter(new PrintWriter(writer)).print(doc) |
|
| 46 |
} |
|
| 47 |
} |
|
| 48 |
*/ |
|
| 49 |
//FIX figure and caption inclusions |
|
| 50 |
/* |
|
| 51 |
for (def file : outdir.listFiles()) {
|
|
| 52 |
if (!file.getName().endsWith(".xml")) continue;
|
|
| 53 |
def doc = new XmlParser().parse(file); |
|
| 54 |
for (def note : doc.body."**".figure) {
|
|
| 55 |
note.name = "note" |
|
| 56 |
//println "fig : $note" |
|
| 57 |
//if ("Image :" == note.text()) {
|
|
| 58 |
def children = note.parent().children() |
|
| 59 |
int i = children.indexOf(note) |
|
| 60 |
//println i + " < "+children.size() |
|
| 61 |
def nextChild = children[i+1] |
|
| 62 |
if (nextChild != null && nextChild.name().toString() == "caption") {
|
|
| 63 |
println nextChild |
|
| 64 |
|
|
| 65 |
note.value = "Images : "+note.text()// + " "+nextChild.text() |
|
| 66 |
//println note |
|
| 67 |
|
|
| 68 |
children.remove(i+1) |
|
| 69 |
note.append(nextChild) |
|
| 70 |
} |
|
| 71 |
//} |
|
| 72 |
} |
|
| 73 |
|
|
| 74 |
new File(ok2dir, file.getName()).withWriter("UTF-8") { writer ->
|
|
| 75 |
new XmlNodePrinter(new PrintWriter(writer)).print(doc) |
|
| 76 |
} |
|
| 77 |
} |
|
| 78 |
*/ |
|
| 79 |
/* |
|
| 80 |
// DOCBOOK -> DOCBOOK TEXT ONLY |
|
| 81 |
for (def file : outdir.listFiles()) {
|
|
| 82 |
def doc = new XmlParser().parse(file); |
|
| 83 |
|
|
| 84 |
def body = null |
|
| 85 |
def bookinfo = null |
|
| 86 |
def preface = null |
|
| 87 |
for (def e : doc.body) body = e |
|
| 88 |
|
|
| 89 |
for (def e : doc.bookinfo) {
|
|
| 90 |
doc.remove(e) |
|
| 91 |
} |
|
| 92 |
for (def e : doc.preface) {
|
|
| 93 |
doc.remove(e) |
|
| 94 |
} |
|
| 95 |
for (def e : doc.appendix) {
|
|
| 96 |
doc.remove(e) |
|
| 97 |
} |
|
| 98 |
for (def e : doc.chapter) {
|
|
| 99 |
doc.remove(e) |
|
| 100 |
} |
|
| 101 |
|
|
| 102 |
if (body == null) {
|
|
| 103 |
println "error text: "+file |
|
| 104 |
continue |
|
| 105 |
} |
|
| 106 |
|
|
| 107 |
new File(ok2dir, file.getName()).withWriter("UTF-8") { writer ->
|
|
| 108 |
new XmlNodePrinter(new PrintWriter(writer)).print(doc) |
|
| 109 |
} |
|
| 110 |
// writer.print XmlUtil.serialize(new StreamingMarkupBuilder().bind {
|
|
| 111 |
// mkp.yield body |
|
| 112 |
// }) |
|
| 113 |
} |
|
| 114 |
*/ |
|
| 115 |
//DOCBOOK to TEI |
|
| 116 |
/* |
|
| 117 |
for (def file : outdir.listFiles()) {
|
|
| 118 |
def doc = new XmlParser().parse(file); |
|
| 119 |
def body = null |
|
| 120 |
//def bookinfo = null |
|
| 121 |
for (def e : doc.body) body = e |
|
| 122 |
//for (def e : doc.bookinfo) bookinfo = e |
|
| 123 |
//println body.getClass() |
|
| 124 |
if (body == null) {
|
|
| 125 |
println "error text: "+file |
|
| 126 |
continue |
|
| 127 |
} |
|
| 128 |
// bookinfo.name = "teiHeader" |
|
| 129 |
body.name = "text" |
|
| 130 |
def teins = new groovy.xml.Namespace("http://www.tei-c.org/ns/1.0",'tei')
|
|
| 131 |
// for (def node : body."**") {
|
|
| 132 |
// if (node instanceof String) continue |
|
| 133 |
// def name = node.name() |
|
| 134 |
// if (name instanceof String) |
|
| 135 |
// node.name = teins.get(name) |
|
| 136 |
// else |
|
| 137 |
// node.name = teins.get(name.getLocalPart()) |
|
| 138 |
// } |
|
| 139 |
for (def figure : body."**".figure) {
|
|
| 140 |
figure.name = "note" |
|
| 141 |
figure.value = "Image : " + figure.caption.text() |
|
| 142 |
} |
|
| 143 |
for (def chapter : body."**".chapter) {
|
|
| 144 |
chapter.name = "div" |
|
| 145 |
chapter.@type = "chapter" |
|
| 146 |
} |
|
| 147 |
for (def caption : body."**".title) {
|
|
| 148 |
caption.name = "head" |
|
| 149 |
} |
|
| 150 |
for (def para : body."**".para) {
|
|
| 151 |
para.name = "p" |
|
| 152 |
} |
|
| 153 |
def newdoc = new Node(null, "TEI"); |
|
| 154 |
newdoc.@xmlns="http://www.tei-c.org/ns/1.0"; |
|
| 155 |
newdoc.append(new Node(null, "teiHeader")) |
|
| 156 |
newdoc.append(body) |
|
| 157 |
new File(okdir, file.getName()).withWriter("UTF-8") { writer ->
|
|
| 158 |
new XmlNodePrinter(new PrintWriter(writer)).print(newdoc) |
|
| 159 |
} |
|
| 160 |
// writer.print XmlUtil.serialize(new StreamingMarkupBuilder().bind {
|
|
| 161 |
// mkp.yield body |
|
| 162 |
// }) |
|
| 163 |
} |
|
| 164 |
*/ |
|
| 165 |
|
|
| 166 |
// remove TEI |
|
| 167 |
/*outdir.deleteDir() |
|
| 168 |
outdir.mkdir() |
|
| 169 |
def errors = [] |
|
| 170 |
for (def file : srcdir.listFiles()) {
|
|
| 171 |
if (file.isDirectory()) continue; |
|
| 172 |
//new EncodingConverter(file, "Windows-1252", "UTF-8") |
|
| 173 |
File outfile = new File(outdir, file.getName()); |
|
| 174 |
outfile.withWriter("UTF-8") { writer ->
|
|
| 175 |
file.eachLine("UTF-8") { line ->
|
|
| 176 |
if (line.trim() == "<TEI>") {
|
|
| 177 |
} else if (line.trim() == "</TEI>") {
|
|
| 178 |
writer.println("</book>")
|
|
| 179 |
} else if (line.trim() == "<book lang=\"fr\"/>") {
|
|
| 180 |
writer.println("<book lang=\"fr\">")
|
|
| 181 |
} else {
|
|
| 182 |
writer.println(line) |
|
| 183 |
} |
|
| 184 |
} |
|
| 185 |
} |
|
| 186 |
try {
|
|
| 187 |
ValidateXml.testAndThrow(outfile); |
|
| 188 |
} catch (XMLStreamException e) {
|
|
| 189 |
println file.getName() + " : "+ e.getMessage() |
|
| 190 |
errors << file |
|
| 191 |
if (e.getMessage().contains('Message: The element type "TEI" must be terminated by the matching end-tag "</TEI>"')) {
|
|
| 192 |
println "Delete line : "+e.location.lineNumber |
|
| 193 |
} |
|
| 194 |
println "" |
|
| 195 |
} |
|
| 196 |
} |
|
| 197 |
*/ |
|
| 198 |
println "done" |
|
| 199 |
//if (errors.size() > 0) |
|
| 200 |
// println ""+errors.size()+" errors : $errors" |
|
| 201 |
//String content = file.getText("Windows-1252")
|
|
| 202 |
//println content |
|
| tmp/org.txm.core/src/java/org/txm/importer/WExtractWithMode.groovy (revision 881) | ||
|---|---|---|
| 1 |
// Copyright © 2010-2013 ENS de Lyon. |
|
| 2 |
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of |
|
| 3 |
// Lyon 2, University of Franche-Comté, University of Nice |
|
| 4 |
// Sophia Antipolis, University of Paris 3. |
|
| 5 |
// |
|
| 6 |
// The TXM platform is free software: you can redistribute it |
|
| 7 |
// and/or modify it under the terms of the GNU General Public |
|
| 8 |
// License as published by the Free Software Foundation, |
|
| 9 |
// either version 2 of the License, or (at your option) any |
|
| 10 |
// later version. |
|
| 11 |
// |
|
| 12 |
// The TXM platform is distributed in the hope that it will be |
|
| 13 |
// useful, but WITHOUT ANY WARRANTY; without even the implied |
|
| 14 |
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR |
|
| 15 |
// PURPOSE. See the GNU General Public License for more |
|
| 16 |
// details. |
|
| 17 |
// |
|
| 18 |
// You should have received a copy of the GNU General |
|
| 19 |
// Public License along with the TXM platform. If not, see |
|
| 20 |
// http://www.gnu.org/licenses. |
|
| 21 |
// |
|
| 22 |
// |
|
| 23 |
// |
|
| 24 |
// $LastChangedDate: 2011-10-19 17:50:26 +0200 (mer., 19 oct. 2011) $ |
|
| 25 |
// $LastChangedRevision: 2038 $ |
|
| 26 |
// $LastChangedBy: alavrentev $ |
|
| 27 |
// |
|
| 28 |
package org.txm.importer |
|
| 29 |
|
|
| 30 |
import javax.xml.parsers.DocumentBuilder; |
|
| 31 |
import javax.xml.parsers.DocumentBuilderFactory; |
|
| 32 |
import javax.xml.parsers.ParserConfigurationException; |
|
| 33 |
import javax.xml.transform.OutputKeys; |
|
| 34 |
import javax.xml.transform.Result; |
|
| 35 |
import javax.xml.transform.Source; |
|
| 36 |
import javax.xml.transform.Transformer; |
|
| 37 |
import javax.xml.transform.TransformerFactory; |
|
| 38 |
import javax.xml.transform.dom.DOMSource; |
|
| 39 |
import javax.xml.transform.stream.StreamResult; |
|
| 40 |
|
|
| 41 |
import org.w3c.dom.Document; |
|
| 42 |
import org.w3c.dom.Element; |
|
| 43 |
import org.w3c.dom.NodeList; |
|
| 44 |
import org.xml.sax.SAXException; |
|
| 45 |
|
|
| 46 |
import javax.xml.stream.*; |
|
| 47 |
import java.io.File; |
|
| 48 |
import java.net.URL; |
|
| 49 |
|
|
| 50 |
// TODO: Auto-generated Javadoc |
|
| 51 |
/** |
|
| 52 |
* Extract w tags from a tei file |
|
| 53 |
* not finished. |
|
| 54 |
* |
|
| 55 |
* @author mdecorde |
|
| 56 |
*/ |
|
| 57 |
class WExtractWithMode |
|
| 58 |
{
|
|
| 59 |
|
|
| 60 |
/** |
|
| 61 |
* Process. |
|
| 62 |
* |
|
| 63 |
* @param infile the infile |
|
| 64 |
* @param outfile the outfile |
|
| 65 |
* @param max the max |
|
| 66 |
* @return the java.lang. object |
|
| 67 |
*/ |
|
| 68 |
public process(File infile, File outfile, String modemax) |
|
| 69 |
{
|
|
| 70 |
println "Process "+infile.getName()+", keep $modemax words" |
|
| 71 |
int count = this.countW(infile); |
|
| 72 |
|
|
| 73 |
int max = 0 |
|
| 74 |
String mode = "" |
|
| 75 |
|
|
| 76 |
try {
|
|
| 77 |
mode = modemax.split("/")[0]
|
|
| 78 |
max = Integer.parseInt(modemax.split("/")[1])
|
|
| 79 |
}catch(Exception e ){}
|
|
| 80 |
|
|
| 81 |
if(count < max) |
|
| 82 |
{
|
|
| 83 |
println "can't extract $max words, the file "+infile.getName()+" contains only $count words" |
|
| 84 |
return; |
|
| 85 |
} |
|
| 86 |
//String ms = "#ms_K" |
|
| 87 |
int part = 0; |
|
| 88 |
if (mode == "3") |
|
| 89 |
{
|
|
| 90 |
part = max/3 |
|
| 91 |
} |
|
| 92 |
else if (mode == "2") |
|
| 93 |
{
|
|
| 94 |
part = max/2 |
|
| 95 |
} |
|
| 96 |
else if (mode == "1a" || mode == "1m" || mode == "1z") |
|
| 97 |
{
|
|
| 98 |
part = max |
|
| 99 |
} |
|
| 100 |
else |
|
| 101 |
{
|
|
| 102 |
println "mode must be 1a, 1m, 1z, 2 or 3" |
|
| 103 |
return |
|
| 104 |
} |
|
| 105 |
int from1 = 0 |
|
| 106 |
int to1 = 0 |
|
| 107 |
if (mode != "1m" && mode != "1z") |
|
| 108 |
{
|
|
| 109 |
to1 = part |
|
| 110 |
} |
|
| 111 |
int from2 = 0 |
|
| 112 |
int to2 = 0 |
|
| 113 |
if (mode == "3" || mode == "1m") |
|
| 114 |
{
|
|
| 115 |
from2 = (count/2) - (part/2); |
|
| 116 |
to2 =(count/2) + (part/2); |
|
| 117 |
} |
|
| 118 |
int from3 = 0 |
|
| 119 |
int to3 = 0 |
|
| 120 |
if (mode != "1a" && mode != "1m") |
|
| 121 |
{
|
|
| 122 |
from3 = count -part; |
|
| 123 |
to3= count-1; |
|
| 124 |
} |
|
| 125 |
boolean isSic = false; |
|
| 126 |
boolean isW = false; |
|
| 127 |
boolean isText = false; |
|
| 128 |
boolean printW = true; |
|
| 129 |
int wcount=0; |
|
| 130 |
|
|
| 131 |
println " count : "+count |
|
| 132 |
println " get from "+from1+" to "+to1 |
|
| 133 |
println " get from "+from2+" to "+to2 |
|
| 134 |
println " get from "+from3+" to "+to3 |
|
| 135 |
|
|
| 136 |
|
|
| 137 |
String localname; |
|
| 138 |
String prefix; |
|
| 139 |
InputStream inputData = infile.toURI().toURL().openStream(); |
|
| 140 |
XMLInputFactory inputfactory = XMLInputFactory.newInstance(); |
|
| 141 |
XMLStreamReader parser = inputfactory.createXMLStreamReader(inputData); |
|
| 142 |
XMLOutputFactory factory = XMLOutputFactory.newInstance(); |
|
| 143 |
|
|
| 144 |
FileOutputStream output = new FileOutputStream(outfile) |
|
| 145 |
XMLStreamWriter writer = factory.createXMLStreamWriter(output, "UTF-8"); |
|
| 146 |
|
|
| 147 |
writer.writeStartDocument("utf-8", "1.0");
|
|
| 148 |
|
|
| 149 |
for (int event = parser.next(); event != XMLStreamConstants.END_DOCUMENT; event = parser.next()) |
|
| 150 |
{
|
|
| 151 |
if(isText) |
|
| 152 |
{
|
|
| 153 |
if((wcount >= from1 && wcount <= to1 )|| |
|
| 154 |
(wcount >= from2 && wcount <= to2) || |
|
| 155 |
(wcount >= from3 && wcount <= to3)) |
|
| 156 |
printW = true; |
|
| 157 |
else |
|
| 158 |
printW = false; |
|
| 159 |
} |
|
| 160 |
else |
|
| 161 |
printW = true; |
|
| 162 |
|
|
| 163 |
switch (event) |
|
| 164 |
{
|
|
| 165 |
case XMLStreamConstants.START_ELEMENT: |
|
| 166 |
localname = parser.getLocalName(); |
|
| 167 |
prefix = parser.getPrefix(); |
|
| 168 |
|
|
| 169 |
/* |
|
| 170 |
if(localname == "supplied") |
|
| 171 |
if(parser.getAttributeValue(null,"source") != null) |
|
| 172 |
ms = parser.getAttributeValue(null,"source") |
|
| 173 |
if(localname == "sic") |
|
| 174 |
{
|
|
| 175 |
isSic= true; |
|
| 176 |
} |
|
| 177 |
*/ |
|
| 178 |
if(localname == "text") |
|
| 179 |
isText = true; |
|
| 180 |
|
|
| 181 |
if(localname == "w") |
|
| 182 |
{
|
|
| 183 |
isW= true; |
|
| 184 |
wcount++; |
|
| 185 |
|
|
| 186 |
if(isText) |
|
| 187 |
{
|
|
| 188 |
if((wcount >= from1 && wcount <= to1 )|| |
|
| 189 |
(wcount >= from2 && wcount <= to2) || |
|
| 190 |
(wcount >= from3 && wcount <= to3)) |
|
| 191 |
printW = true; |
|
| 192 |
else |
|
| 193 |
printW = false; |
|
| 194 |
} |
|
| 195 |
else |
|
| 196 |
printW = true; |
|
| 197 |
} |
|
| 198 |
|
|
| 199 |
/*if(!isSic) |
|
| 200 |
if(localname != "choice" && localname != "corr" && localname != "sic" && localname != "supplied" && localname != "seg") |
|
| 201 |
{*/
|
|
| 202 |
if(localname == "w") |
|
| 203 |
{
|
|
| 204 |
if(printW) |
|
| 205 |
{
|
|
| 206 |
if(prefix != null && prefix.length() > 0) |
|
| 207 |
writer.writeStartElement(prefix+":"+localname); |
|
| 208 |
else |
|
| 209 |
writer.writeStartElement(localname); |
|
| 210 |
|
|
| 211 |
for(int i= 0 ; i < parser.getAttributeCount() ;i++ ) |
|
| 212 |
{
|
|
| 213 |
if(parser.getAttributePrefix(i)!= "") |
|
| 214 |
writer.writeAttribute(parser.getAttributePrefix(i)+":"+parser.getAttributeLocalName(i), parser.getAttributeValue(i)); |
|
| 215 |
else |
|
| 216 |
writer.writeAttribute(parser.getAttributeLocalName(i), parser.getAttributeValue(i)); |
|
| 217 |
} |
|
| 218 |
//writer.writeAttribute("srcmf:src", ms);
|
|
| 219 |
} |
|
| 220 |
} |
|
| 221 |
else |
|
| 222 |
{
|
|
| 223 |
if(prefix != null && prefix.length() > 0) |
|
| 224 |
writer.writeStartElement(prefix+":"+localname); |
|
| 225 |
else |
|
| 226 |
writer.writeStartElement(localname); |
|
| 227 |
|
|
| 228 |
if(localname == "teiHeader") |
|
| 229 |
{
|
|
| 230 |
writer.writeAttribute("xmlns:me", "http://www.menota.org/ns/1.0");
|
|
| 231 |
writer.writeAttribute("xmlns:bfm", "http://bfm.ens-lsh.fr/ns/1.0");
|
|
| 232 |
//writer.writeAttribute("xmlns:srcmf", "https://listes.cru.fr/wiki/srcmf/index");
|
|
| 233 |
} |
|
| 234 |
|
|
| 235 |
if(localname == "TEI") |
|
| 236 |
{
|
|
| 237 |
writer.writeAttribute("xmlns","http://www.tei-c.org/ns/1.0");
|
|
| 238 |
} |
|
| 239 |
|
|
| 240 |
for(int i= 0 ; i < parser.getAttributeCount() ;i++ ) |
|
| 241 |
{
|
|
| 242 |
if(parser.getAttributePrefix(i)!= "") |
|
| 243 |
writer.writeAttribute(parser.getAttributePrefix(i)+":"+parser.getAttributeLocalName(i), parser.getAttributeValue(i)); |
|
| 244 |
else |
|
| 245 |
writer.writeAttribute(parser.getAttributeLocalName(i), parser.getAttributeValue(i)); |
|
| 246 |
} |
|
| 247 |
} |
|
| 248 |
//} |
|
| 249 |
break; |
|
| 250 |
|
|
| 251 |
case XMLStreamConstants.END_ELEMENT: |
|
| 252 |
localname =parser.getLocalName() |
|
| 253 |
|
|
| 254 |
/*if(localname == "sic") |
|
| 255 |
isSic= false; |
|
| 256 |
if(localname == "w") |
|
| 257 |
isW= false; |
|
| 258 |
if(localname == "supplied" && ms != "#ms_K") |
|
| 259 |
ms = "#ms_K"; |
|
| 260 |
if(!isSic) |
|
| 261 |
if(localname != "choice" && localname != "corr" && localname != "sic" && localname != "supplied" && localname != "seg") |
|
| 262 |
{*/
|
|
| 263 |
if(localname == "w") |
|
| 264 |
{
|
|
| 265 |
if(printW) |
|
| 266 |
{
|
|
| 267 |
writer.writeEndElement(); |
|
| 268 |
writer.writeCharacters("\n");
|
|
| 269 |
} |
|
| 270 |
} |
|
| 271 |
else |
|
| 272 |
{
|
|
| 273 |
writer.writeEndElement(); |
|
| 274 |
writer.writeCharacters("\n");
|
|
| 275 |
} |
|
| 276 |
// } |
|
| 277 |
|
|
| 278 |
break; |
|
| 279 |
|
|
| 280 |
case XMLStreamConstants.CHARACTERS: |
|
| 281 |
//if(!isSic) |
|
| 282 |
if(isW) |
|
| 283 |
{
|
|
| 284 |
if(printW) |
|
| 285 |
{
|
|
| 286 |
writer.writeCharacters(parser.getText().trim()); |
|
| 287 |
} |
|
| 288 |
} |
|
| 289 |
else |
|
| 290 |
writer.writeCharacters(parser.getText().trim()); |
|
| 291 |
break; |
|
| 292 |
} |
|
| 293 |
} |
|
| 294 |
writer.flush(); |
|
| 295 |
writer.close(); |
|
| 296 |
output.close() |
|
| 297 |
inputData.close(); |
|
| 298 |
} |
|
| 299 |
|
|
| 300 |
/** |
|
| 301 |
* Count w. |
|
| 302 |
* |
|
| 303 |
* @param infile the infile |
|
| 304 |
* @return the int |
|
| 305 |
*/ |
|
| 306 |
public int countW(File infile) |
|
| 307 |
{
|
|
| 308 |
InputStream inputData = infile.toURI().toURL().openStream(); |
|
| 309 |
XMLInputFactory inputfactory = XMLInputFactory.newInstance(); |
|
| 310 |
XMLStreamReader parser = inputfactory.createXMLStreamReader(inputData); |
|
| 311 |
|
|
| 312 |
int count = 0; |
|
| 313 |
for (int event = parser.next(); event != XMLStreamConstants.END_DOCUMENT; event = parser.next()) |
|
| 314 |
{
|
|
| 315 |
switch (event) |
|
| 316 |
{
|
|
| 317 |
case XMLStreamConstants.START_ELEMENT: |
|
| 318 |
if(parser.getLocalName() == "w") |
|
| 319 |
count++; |
|
| 320 |
} |
|
| 321 |
} |
|
| 322 |
inputData.close() |
|
| 323 |
return count; |
|
| 324 |
} |
|
| 325 |
|
|
| 326 |
/** |
|
| 327 |
* The main method. |
|
| 328 |
* |
|
| 329 |
* @param args the arguments |
|
| 330 |
*/ |
|
| 331 |
public static void main(String[] args) |
|
| 332 |
{
|
|
| 333 |
String userDir = System.getProperty("user.home");
|
|
| 334 |
|
|
| 335 |
File directory = new File(userDir+"/xml/extract/"); |
|
| 336 |
File outdir = new File(userDir+"/xml/extract/","results"); |
|
| 337 |
outdir.mkdir(); |
|
| 338 |
|
|
| 339 |
File maxfilemode = new File(userDir+"/xml/extract/maxfilemode"); |
|
| 340 |
/* |
|
| 341 |
* maxfilemode format: |
|
| 342 |
* |
|
| 343 |
* filename1.xml 3 45000 |
|
| 344 |
* filename2.xml 1a 15000 |
|
| 345 |
* filename3.xml 1m 15000 |
|
| 346 |
* filename4.xml 1z 15000 |
|
| 347 |
* filename5.xml 2 22500 |
|
| 348 |
*/ |
|
| 349 |
HashMap<File, String> maxperfile = new HashMap<File, String>(); |
|
| 350 |
maxfilemode.eachLine{it->
|
|
| 351 |
String[] split = it.split("\t");
|
|
| 352 |
if(split.length == 3) |
|
| 353 |
{
|
|
| 354 |
try |
|
| 355 |
{
|
|
| 356 |
String filename = it.split("\t")[0];
|
|
| 357 |
String modemax = it.split("\t")[1]+"/"+it.split("\t")[2]
|
|
| 358 |
maxperfile.put(filename, modemax); |
|
| 359 |
}catch(Exception e ){}
|
|
| 360 |
} |
|
| 361 |
} |
|
| 362 |
println maxperfile; |
|
| 363 |
|
|
| 364 |
def files = directory.listFiles(); |
|
| 365 |
for(File infile : files) |
|
| 366 |
{
|
|
| 367 |
|
|
| 368 |
if(maxperfile.containsKey(infile.getName())) |
|
| 369 |
{
|
|
| 370 |
File outfile = new File(outdir, infile.getName()); |
|
| 371 |
String modemax = maxperfile.get(infile.getName()); |
|
| 372 |
new WExtractWithMode().process(infile, outfile, modemax) |
|
| 373 |
} |
|
| 374 |
} |
|
| 375 |
} |
|
| 376 |
} |
|
| tmp/org.txm.core/src/java/org/txm/importer/HTML2XHTML.groovy (revision 881) | ||
|---|---|---|
| 1 |
package org.txm.importer |
|
| 2 |
|
|
| 3 |
import org.txm.utils.CharsetDetector; |
|
| 4 |
import org.txm.importer.ValidateXml; |
|
| 5 |
|
|
| 6 |
File infile = new File("/home/mdecorde/Bureau/matrice/témoignages/CONVERSIONS/jod/odt.html")
|
|
| 7 |
File outfile = new File("/home/mdecorde/Bureau/matrice/témoignages/CONVERSIONS/jod/odt.xml")
|
|
| 8 |
|
|
| 9 |
String encoding = new CharsetDetector(infile).getEncoding(); |
|
| 10 |
println "Encoding: $encoding" |
|
| 11 |
String text = infile.getText(encoding); |
|
| 12 |
|
|
| 13 |
//lower case tags |
|
| 14 |
text = text.replaceAll(/(<[^!][^>]*>)/, |
|
| 15 |
{ full, word ->
|
|
| 16 |
//fix attributes TRUC=sdf234 |
|
| 17 |
word = word.replaceAll("([A-Z]+=)([^\" >]+)([ >])",'$1"$2"$3' )
|
|
| 18 |
word.toLowerCase() // bourrin |
|
| 19 |
} ) |
|
| 20 |
|
|
| 21 |
//lower case <.> tags |
|
| 22 |
text = text.replaceAll(/(<.>)/, |
|
| 23 |
{ full, word ->
|
|
| 24 |
word.toLowerCase() |
|
| 25 |
} ) |
|
| 26 |
|
|
| 27 |
//resolve entities |
|
| 28 |
text = text.replaceAll(/ /," ") |
|
| 29 |
|
|
| 30 |
//close tags |
|
| 31 |
text = text.replaceAll(/<br>/,"<br/>") |
|
| 32 |
text = text.replaceAll(/<meta([^>]*)>/,'<meta$1/>') |
|
| 33 |
text = text.replaceAll(/<img([^>]*)>/,'<img$1/>') |
|
| 34 |
|
|
| 35 |
//remove doctype declaration |
|
| 36 |
text = text.replace('<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">', '')
|
|
| 37 |
|
|
| 38 |
//write&Validate |
|
| 39 |
outfile.withWriter(encoding) { writer -> writer.write(text) }
|
|
| 40 |
if (!ValidateXml.test(outfile)) {
|
|
| 41 |
println "FILE: $outfile" |
|
| 42 |
} |
|
| tmp/org.txm.core/src/java/org/txm/importer/XMLText2TXTCSV.groovy (revision 881) | ||
|---|---|---|
| 1 |
package org.txm.importer |
|
| 2 |
|
|
| 3 |
File dir = new File("/home/mdecorde/xml/voeux/split_xml")
|
|
| 4 |
File outdir = new File("/home/mdecorde/xml/voeux/split_txtcsv")
|
|
| 5 |
println "1) xml -> txt + write metadata.csv" |
|
| 6 |
outdir.deleteDir() |
|
| 7 |
outdir.mkdir() |
|
| 8 |
File metadatafile = new File(outdir, "metadata.csv") |
|
| 9 |
String csvString = "" |
|
| 10 |
|
|
| 11 |
def files = dir.listFiles() |
|
| 12 |
files.sort() |
|
| 13 |
for(File f : files) |
|
| 14 |
{
|
|
| 15 |
File outfile = new File(outdir, f.getName()+".txt"); |
|
| 16 |
String text = f.getText("UTF-8");
|
|
| 17 |
String texttag = text.find("<text id.*>")
|
|
| 18 |
// println texttag |
|
| 19 |
// texttag = texttag.replaceAll('<text id="([^"]+)"', '<text id="$1.txt"')
|
|
| 20 |
// println "> "+texttag |
|
| 21 |
text = text.replaceAll("<text.*>", "").replace("</text>", "");
|
|
| 22 |
outfile.withWriter("UTF-8"){writer -> writer.write(text) }
|
|
| 23 |
csvString += texttag.replace("<text id=","").replace(" loc=", ",").replace(" annee=", ",").replace("\">", "\"")+"\n"
|
|
| 24 |
} |
|
| 25 |
|
|
| 26 |
println "2) write metadata.csv" |
|
| 27 |
metadatafile.withWriter("UTF-8"){csvwriter ->
|
|
| 28 |
csvwriter.write("\"id\",\"loc\",\"annee\"\n");
|
|
| 29 |
csvwriter.write(csvString)} |
|
| 30 |
|
|
| 31 |
println "3) rename Voeux_*" |
|
| 32 |
outdir.eachFileMatch(~/Voeux_.*/) {file-> file.renameTo(new File(outdir, file.getName().substring(6, 10)+".txt")) }
|
|
| tmp/org.txm.core/src/java/org/txm/importer/xmltxm/BuildTTSrc.groovy (revision 881) | ||
|---|---|---|
| 1 |
// Copyright © 2010-2013 ENS de Lyon. |
|
| 2 |
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of |
|
| 3 |
// Lyon 2, University of Franche-Comté, University of Nice |
|
| 4 |
// Sophia Antipolis, University of Paris 3. |
|
| 5 |
// |
|
| 6 |
// The TXM platform is free software: you can redistribute it |
|
| 7 |
// and/or modify it under the terms of the GNU General Public |
|
| 8 |
// License as published by the Free Software Foundation, |
|
| 9 |
// either version 2 of the License, or (at your option) any |
|
| 10 |
// later version. |
|
| 11 |
// |
|
| 12 |
// The TXM platform is distributed in the hope that it will be |
|
| 13 |
// useful, but WITHOUT ANY WARRANTY; without even the implied |
|
| 14 |
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR |
|
| 15 |
// PURPOSE. See the GNU General Public License for more |
|
| 16 |
// details. |
|
| 17 |
// |
|
| 18 |
// You should have received a copy of the GNU General |
|
| 19 |
// Public License along with the TXM platform. If not, see |
|
| 20 |
// http://www.gnu.org/licenses. |
|
| 21 |
// |
|
| 22 |
// |
|
| 23 |
// |
|
| 24 |
// $LastChangedDate: 2017-01-24 18:11:42 +0100 (mar. 24 janv. 2017) $ |
|
| 25 |
// $LastChangedRevision: 3400 $ |
|
| 26 |
// $LastChangedBy: mdecorde $ |
|
| 27 |
// |
|
| 28 |
package org.txm.importer.xmltxm |
|
| 29 |
|
|
| 30 |
import java.text.DateFormat; |
|
| 31 |
import java.util.Date; |
|
| 32 |
import java.util.ArrayList; |
|
| 33 |
import javax.xml.stream.*; |
|
| 34 |
import java.net.URL; |
|
| 35 |
|
|
| 36 |
import org.txm.Toolbox; |
|
| 37 |
import org.txm.importer.filters.*; |
|
| 38 |
|
|
| 39 |
// TODO: Auto-generated Javadoc |
|
| 40 |
/** |
|
| 41 |
* The Class BuildTTSrc. |
|
| 42 |
* |
|
| 43 |
* @author mdecorde |
|
| 44 |
* build the TT source for tigerSearch |
|
| 45 |
*/ |
|
| 46 |
|
|
| 47 |
public class BuildTTSrc {
|
|
| 48 |
|
|
| 49 |
/** The url. */ |
|
| 50 |
private def url; |
|
| 51 |
|
|
| 52 |
/** The input data. */ |
|
| 53 |
private def inputData; |
|
| 54 |
|
|
| 55 |
/** The factory. */ |
|
| 56 |
private def factory; |
|
| 57 |
|
|
| 58 |
/** The parser. */ |
|
| 59 |
private XMLStreamReader parser; |
|
| 60 |
|
|
| 61 |
/** The output. */ |
|
| 62 |
private BufferedWriter output; |
|
| 63 |
|
|
| 64 |
/** |
|
| 65 |
* Instantiates a new builds the tt src. |
|
| 66 |
* uses XML-TXM V2 |
|
| 67 |
* |
|
| 68 |
* @param url the url of the file to process |
|
| 69 |
*/ |
|
| 70 |
public BuildTTSrc(URL url) {
|
|
| 71 |
try {
|
|
| 72 |
this.url = url; |
|
| 73 |
inputData = url.openStream(); |
|
| 74 |
factory = XMLInputFactory.newInstance(); |
|
| 75 |
parser = factory.createXMLStreamReader(inputData); |
|
| 76 |
|
|
| 77 |
} catch (XMLStreamException ex) {
|
|
| 78 |
System.out.println(ex); |
|
| 79 |
} catch (IOException ex) {
|
|
| 80 |
System.out.println("IOException while parsing ");
|
|
| 81 |
} |
|
| 82 |
} |
|
| 83 |
|
|
| 84 |
/** |
|
| 85 |
* Creates the output. |
|
| 86 |
* |
|
| 87 |
* @param outfile the outfile |
|
| 88 |
* @return true, if successful |
|
| 89 |
*/ |
|
| 90 |
private boolean createOutput(File outfile) {
|
|
| 91 |
try {
|
|
| 92 |
File f = outfile; |
|
| 93 |
output = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(outfile), |
|
| 94 |
"UTF-8")); |
|
| 95 |
return true; |
|
| 96 |
} catch (Exception e) {
|
|
| 97 |
System.out.println(e.getLocalizedMessage()); |
|
| 98 |
return false; |
|
| 99 |
} |
|
| 100 |
} |
|
| 101 |
|
|
| 102 |
/** |
|
| 103 |
* Process. |
|
| 104 |
* |
|
| 105 |
* @param outfile the outfile |
|
| 106 |
* @param formtype, if multiple form, use this param to choose the correct one, if null takes the first form found |
|
| 107 |
* @return true, if successful |
|
| 108 |
*/ |
|
| 109 |
public boolean process(File outfile, String formtype) {
|
|
| 110 |
if (!createOutput(outfile)) |
|
| 111 |
return false; |
|
| 112 |
|
|
| 113 |
boolean flagform = false; // to catch the content of the form tag |
|
| 114 |
boolean firstform = false; // to know if its the first form of the w element |
|
| 115 |
String form = ""; // the content of the form tag |
|
| 116 |
String lastopenlocalname = ""; |
|
| 117 |
String localname = ""; |
|
| 118 |
StringBuffer buffer = new StringBuffer(); |
|
| 119 |
try {
|
|
| 120 |
for (int event = parser.next(); event != XMLStreamConstants.END_DOCUMENT; event = parser.next()) {
|
|
| 121 |
switch (event) {
|
|
| 122 |
case XMLStreamConstants.START_ELEMENT: |
|
| 123 |
localname = parser.getLocalName(); |
|
| 124 |
switch (localname) {
|
|
| 125 |
case "w": |
|
| 126 |
//firstform = true; |
|
| 127 |
break; |
|
| 128 |
case "form": |
|
| 129 |
// if (firstform) {
|
|
| 130 |
// if (formtype != null) {
|
|
| 131 |
// if(parser.getAttributeCount() > 0 |
|
| 132 |
// && parser.getAttributeValue(0).equals(formtype)) // only one attribute in form, type |
|
| 133 |
// flagform = true; |
|
| 134 |
// } |
|
| 135 |
// else |
|
| 136 |
flagform = true; |
|
| 137 |
form = ""; |
|
| 138 |
firstform = false; |
|
| 139 |
//} |
|
| 140 |
break; |
|
| 141 |
case "s": // TreeTagger can use s tags |
|
| 142 |
buffer.append("<s>\n");
|
|
| 143 |
break; |
|
| 144 |
} |
|
| 145 |
break; |
|
| 146 |
case XMLStreamConstants.END_ELEMENT: |
|
| 147 |
localname = parser.getLocalName(); |
|
| 148 |
switch (localname) {
|
|
| 149 |
case "form": |
|
| 150 |
flagform = false; |
|
| 151 |
form = form.trim() |
|
| 152 |
if (form.length() == 0) buffer.append("__EMPTY__\n");
|
|
| 153 |
else buffer.append(form.replace("\n", "").replace("<", "<")+ "\n");
|
|
| 154 |
//buffer.append(form+ "\n"); // its a txt file no need to use entities |
|
| 155 |
break; |
|
| 156 |
|
|
| 157 |
case "s": |
|
| 158 |
buffer.append("</s>\n");
|
|
| 159 |
break; |
|
| 160 |
} |
|
| 161 |
break; |
|
| 162 |
|
|
| 163 |
case XMLStreamConstants.CHARACTERS: |
|
| 164 |
if (flagform) {
|
|
| 165 |
if (parser.getText().length() > 0) |
|
| 166 |
form += parser.getText(); |
|
| 167 |
} |
|
| 168 |
break; |
|
| 169 |
} |
|
| 170 |
} |
|
| 171 |
|
|
| 172 |
String str = buffer.toString() |
|
| 173 |
if ("false".equals(Toolbox.getPreference(Toolbox.TREETAGGER_APOSTROPHE))) {
|
|
| 174 |
str = str.replace("’", "'").replace("‘", "'");
|
|
| 175 |
} |
|
| 176 |
output.write(str) |
|
| 177 |
output.close(); |
|
| 178 |
parser.close(); |
|
| 179 |
inputData.close(); |
|
| 180 |
} catch (Exception ex) {
|
|
| 181 |
System.out.println(ex); |
|
| 182 |
return false; |
|
| 183 |
} |
|
| 184 |
|
|
| 185 |
return true; |
|
| 186 |
} |
|
| 187 |
|
|
| 188 |
/** |
|
| 189 |
* The main method. |
|
| 190 |
* |
|
| 191 |
* @param args the arguments |
|
| 192 |
*/ |
|
| 193 |
public static void main(String[] args) {
|
|
| 194 |
|
|
| 195 |
String rootDir = "~/xml/rgaqcj/"; |
|
| 196 |
// new File(rootDir+"/identity/").mkdir(); |
|
| 197 |
|
|
| 198 |
ArrayList<String> milestones = new ArrayList<String>();// the tags who |
|
| 199 |
// you want them |
|
| 200 |
// to stay |
|
| 201 |
// milestones |
|
| 202 |
milestones.add("tagUsage");
|
|
| 203 |
milestones.add("pb");
|
|
| 204 |
milestones.add("lb");
|
|
| 205 |
milestones.add("catRef");
|
|
| 206 |
|
|
| 207 |
File srcfile = new File(rootDir + "anainline/", "roland-p5.xml"); |
|
| 208 |
File resultfile = new File(rootDir + "ttsrc/", "roland-p5.tt"); |
|
| 209 |
println("build ttsrc file : " + srcfile + " to : " + resultfile);
|
|
| 210 |
|
|
| 211 |
def builder = new BuildTTSrc(srcfile.toURL(), milestones); |
|
| 212 |
builder.process(resultfile); |
|
| 213 |
|
|
| 214 |
return; |
|
| 215 |
} |
|
| 216 |
|
|
| 217 |
} |
|
| tmp/org.txm.core/src/java/org/txm/importer/xmltxm/Xml2Ana.groovy (revision 881) | ||
|---|---|---|
| 1 |
// Copyright © 2010-2013 ENS de Lyon. |
|
| 2 |
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of |
|
| 3 |
// Lyon 2, University of Franche-Comté, University of Nice |
|
| 4 |
// Sophia Antipolis, University of Paris 3. |
|
| 5 |
// |
|
| 6 |
// The TXM platform is free software: you can redistribute it |
|
| 7 |
// and/or modify it under the terms of the GNU General Public |
|
| 8 |
// License as published by the Free Software Foundation, |
|
| 9 |
// either version 2 of the License, or (at your option) any |
|
| 10 |
// later version. |
|
| 11 |
// |
|
| 12 |
// The TXM platform is distributed in the hope that it will be |
|
| 13 |
// useful, but WITHOUT ANY WARRANTY; without even the implied |
|
| 14 |
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR |
|
| 15 |
// PURPOSE. See the GNU General Public License for more |
|
| 16 |
// details. |
|
| 17 |
// |
|
| 18 |
// You should have received a copy of the GNU General |
|
| 19 |
// Public License along with the TXM platform. If not, see |
|
| 20 |
// http://www.gnu.org/licenses. |
|
| 21 |
// |
|
| 22 |
// |
|
| 23 |
// |
|
| 24 |
// $LastChangedDate: 2017-04-19 16:23:38 +0200 (mer. 19 avril 2017) $ |
|
| 25 |
// $LastChangedRevision: 3430 $ |
|
| 26 |
// $LastChangedBy: mdecorde $ |
|
| 27 |
// |
|
| 28 |
package org.txm.importer.xmltxm |
|
| 29 |
|
|
| 30 |
import org.txm.importer.HasElement; |
|
| 31 |
import org.txm.importer.StaxIdentityParser; |
|
| 32 |
|
|
| 33 |
import java.text.DateFormat; |
|
| 34 |
import java.util.ArrayList; |
|
| 35 |
import java.util.Date; |
|
| 36 |
import java.util.HashMap; |
|
| 37 |
import java.util.Locale; |
|
| 38 |
|
|
| 39 |
import javax.xml.stream.*; |
|
| 40 |
|
|
| 41 |
import java.net.URL; |
|
| 42 |
|
|
| 43 |
import org.txm.importer.filters.*; |
|
| 44 |
import org.txm.utils.AsciiUtils; |
|
| 45 |
|
|
| 46 |
/** |
|
| 47 |
* The Class Xml2Ana. |
|
| 48 |
* |
|
| 49 |
* @author mdecorde |
|
| 50 |
* transform : pre xml-tei file >> xml-tei-txm file |
|
| 51 |
* The pre xml-tei file must contains a minimal teiHeader with classDecl, encodingDesc and titleStmt |
|
| 52 |
* |
|
| 53 |
* you must specify the correspondance between word attributs and ana types&respStmtIDs |
|
| 54 |
* then the attributes of w tags will be transformed into interp tag |
|
| 55 |
*/ |
|
| 56 |
public class Xml2Ana extends StaxIdentityParser |
|
| 57 |
{
|
|
| 58 |
/** The dir. */ |
|
| 59 |
private def dir; |
|
| 60 |
|
|
| 61 |
/** The convert all attributes. */ |
|
| 62 |
private boolean convertAllAttributes = false; |
|
| 63 |
|
|
| 64 |
/** The corresp type. */ |
|
| 65 |
HashMap<String,String> correspType; |
|
| 66 |
|
|
| 67 |
/** The corresp ref. */ |
|
| 68 |
HashMap<String,String> correspRef; |
|
| 69 |
|
|
| 70 |
/** The check tags. */ |
|
| 71 |
HashMap<String,Boolean> checkTags = new HashMap<String,Boolean>(); |
|
| 72 |
|
|
| 73 |
/** The resp id. */ |
|
| 74 |
def respId = []; |
|
| 75 |
|
|
| 76 |
/** The applications. */ |
|
| 77 |
HashMap<String,File> applications; |
|
| 78 |
|
|
| 79 |
/** The taxonomies. */ |
|
| 80 |
HashMap<String,String[]> taxonomies; |
|
| 81 |
|
|
| 82 |
/** The resps. */ |
|
| 83 |
HashMap<String,String[]> resps; |
|
| 84 |
|
|
| 85 |
/** The items. */ |
|
| 86 |
HashMap<String,HashMap<String,String>> items; |
|
| 87 |
|
|
| 88 |
/** The XML headeradded. */ |
|
| 89 |
boolean XMLHeaderadded = false; |
|
| 90 |
String textname; |
|
| 91 |
String wtag = "w"; |
|
| 92 |
|
|
| 93 |
public static final String TEXT = "text" |
|
| 94 |
public static final String ID = "id" |
|
| 95 |
|
|
| 96 |
/** |
|
| 97 |
* Instantiates a new xml2 ana. |
|
| 98 |
* |
|
| 99 |
* @param url the url |
|
| 100 |
* @param wordprefix the wordprefix |
|
| 101 |
*/ |
|
| 102 |
public Xml2Ana(File file) {
|
|
| 103 |
super(file.toURI().toURL()); |
|
| 104 |
//File file = new File(url.getFile()).getAbsoluteFile() |
|
| 105 |
textname = file.getName(); |
|
| 106 |
int idx = textname.lastIndexOf(".");
|
|
| 107 |
if (idx > 0) |
|
| 108 |
textname = textname.substring(0, idx) |
|
| 109 |
|
|
| 110 |
|
|
| 111 |
checkTags.put("respStmt",false);
|
|
| 112 |
checkTags.put("titleStmt",false);
|
|
| 113 |
checkTags.put("appInfo",false);
|
|
| 114 |
|
|
| 115 |
hasText = new HasElement(file, TEXT).process(); |
|
| 116 |
} |
|
| 117 |
|
|
| 118 |
/** |
|
| 119 |
* Sets the convert all atrtibutes. |
|
| 120 |
* |
|
| 121 |
* @param value the value |
|
| 122 |
* @return the java.lang. object |
|
| 123 |
*/ |
|
| 124 |
public setConvertAllAtrtibutes(boolean value) {
|
|
| 125 |
convertAllAttributes = value; |
|
| 126 |
} |
|
| 127 |
|
|
| 128 |
/** |
|
| 129 |
* Sets the convert all atrtibutes. |
|
| 130 |
* |
|
| 131 |
* @param value the value |
|
| 132 |
* @return the java.lang. object |
|
| 133 |
*/ |
|
| 134 |
public setWordTag(String wtag) {
|
|
| 135 |
this.wtag = wtag |
|
| 136 |
} |
|
| 137 |
|
|
| 138 |
int idcount = 0; |
|
| 139 |
boolean flagWord = false; |
|
| 140 |
int firstElement = 0; |
|
| 141 |
boolean teiElementAdded = false; |
|
| 142 |
boolean teiHeaderElementAdded = false; |
|
| 143 |
boolean hasText = false; |
|
| 144 |
boolean textElementAdded = false; |
|
| 145 |
def anabalises = []; |
|
| 146 |
protected void processStartElement() |
|
| 147 |
{
|
|
| 148 |
// println "checkTags=$checkTags"; |
|
| 149 |
// println "parser=$parser"; |
|
| 150 |
firstElement++; |
|
| 151 |
|
|
| 152 |
if (this.checkTags.containsKey(parser.getLocalName())) {
|
|
| 153 |
this.checkTags.put(parser.getLocalName(), true); |
|
| 154 |
} |
|
| 155 |
|
|
| 156 |
switch (parser.getLocalName()) {
|
|
| 157 |
case wtag: |
|
| 158 |
if (!hasText) {
|
|
| 159 |
writer.writeStartElement(TEXT); |
|
| 160 |
writer.writeAttribute(ID, textname); |
|
| 161 |
textElementAdded = true; |
|
| 162 |
hasText = true; |
|
| 163 |
} |
|
| 164 |
idcount++; // increment word counter |
|
| 165 |
anabalises.clear(); |
|
| 166 |
|
|
| 167 |
writer.writeStartElement(parser.getLocalName()); // write w |
|
| 168 |
|
|
| 169 |
for (int i = 0 ; i < parser.getNamespaceCount() ; i++) // write namespaces |
|
| 170 |
writer.writeNamespace(parser.getNamespacePrefix(i), parser.getNamespaceURI(i)); |
|
| 171 |
|
|
| 172 |
for (int i= 0 ; i < parser.getAttributeCount() ;i++ ) { // transform attributes
|
|
| 173 |
String type = parser.getAttributeLocalName(i); |
|
| 174 |
String value = parser.getAttributeValue(i); |
|
| 175 |
if (correspType.containsKey(type)) { // check if txm:ana
|
|
| 176 |
String corresptype = correspType.get(type); |
|
| 177 |
String ref = correspRef.get(type); |
|
| 178 |
anabalises.add(["#"+ref, "#"+corresptype, value]); |
|
| 179 |
} else if (type == ID) { // keep id attribute
|
|
| 180 |
String wordid = value |
|
| 181 |
if (wordid.startsWith("w")) {
|
|
| 182 |
if (!wordid.startsWith("w_"))
|
|
| 183 |
wordid = "w_"+wordid.substring(1) |
|
| 184 |
} |
|
| 185 |
// else {
|
|
| 186 |
// wordid = "w_"+textname+"_"+wordid; |
|
| 187 |
// } |
|
| 188 |
|
|
| 189 |
wordid = AsciiUtils.buildAttributeId(wordid); // remove characters not compatible with the id attribute value |
|
| 190 |
|
|
| 191 |
writer.writeAttribute(type, wordid); |
|
| 192 |
|
|
| 193 |
} else { // add attributes that was in the original <w>
|
|
| 194 |
if (convertAllAttributes) |
|
| 195 |
anabalises.add(["none","#"+type, value]) |
|
| 196 |
else |
|
| 197 |
writer.writeAttribute(type, value); |
|
| 198 |
} |
|
| 199 |
} |
|
| 200 |
|
|
| 201 |
flagWord = true; // start to capture the form |
|
| 202 |
writer.writeStartElement(TXMNS, "form"); |
|
| 203 |
break; |
|
| 204 |
|
|
| 205 |
case "TEI": |
|
| 206 |
super.processStartElement(); |
|
| 207 |
boolean hasTeiNS = false; |
|
| 208 |
boolean hasTXMNs = false; |
|
| 209 |
for (int i = 0 ; i < parser.getNamespaceCount() ; i++) {
|
|
| 210 |
if (parser.getNamespaceURI(i) == TXMNS) |
|
| 211 |
hasTXMNs = true; |
|
| 212 |
else if (parser.getNamespaceURI(i) == TEINS) |
|
| 213 |
hasTeiNS = true; |
|
| 214 |
} |
|
| 215 |
if (!hasTeiNS) {
|
|
| 216 |
writer.writeDefaultNamespace(TEINS); |
|
| 217 |
} |
|
| 218 |
if (!hasTXMNs) |
|
| 219 |
writer.writeNamespace(TXM, TXMNS); |
|
| 220 |
break; |
|
| 221 |
|
|
| 222 |
default: |
|
| 223 |
|
|
| 224 |
if (TEXT.equals(localname)) {
|
|
| 225 |
hasText = true; |
|
| 226 |
} |
|
| 227 |
|
|
| 228 |
if (firstElement == 1) { // test if first element is TEI
|
|
| 229 |
//println "first tag: "+parser.getLocalName() |
|
| 230 |
if (localname != "TEI") { // "TEI" is missing
|
|
| 231 |
teiElementAdded = true; |
|
| 232 |
addTEIElement(); |
|
| 233 |
} else if (!hasText) {
|
|
| 234 |
writer.writeStartElement(TEXT); |
|
| 235 |
writer.writeAttribute(ID, textname); |
|
| 236 |
textElementAdded = true; |
|
| 237 |
hasText = true; |
|
| 238 |
} |
|
| 239 |
} |
|
| 240 |
if (firstElement == 2 && teiElementAdded != true) {
|
|
| 241 |
//println "second tag: "+parser.getLocalName() |
|
| 242 |
if (localname != "teiHeader") { // teiHeader is missing
|
|
| 243 |
writeTeiHeader(); |
|
| 244 |
hasTeiHeader = true |
|
| 245 |
teiHeaderElementAdded = true |
|
| 246 |
} |
|
| 247 |
} else if (!hasText & (teiElementAdded | teiHeaderElementAdded)) {
|
|
| 248 |
writer.writeStartElement(TEXT); |
|
| 249 |
writer.writeAttribute(ID, textname); |
|
| 250 |
textElementAdded = true; |
|
| 251 |
hasText = true; |
|
| 252 |
} |
|
| 253 |
|
|
| 254 |
super.processStartElement(); |
|
| 255 |
if (TEXT.equals(localname)) {
|
|
| 256 |
if (!parser.getAttributeValue(null, ID)) {
|
|
| 257 |
writer.writeAttribute(ID, textname); |
|
| 258 |
} |
|
| 259 |
} |
|
| 260 |
} |
|
| 261 |
} |
|
| 262 |
|
|
| 263 |
protected void after() |
|
| 264 |
{
|
|
| 265 |
if (textElementAdded) {
|
|
| 266 |
writer.writeEndElement(); // text |
|
| 267 |
} |
|
| 268 |
if (teiElementAdded) {
|
|
| 269 |
writer.writeEndElement(); // TEI |
|
| 270 |
} |
|
| 271 |
super.after(); // close writer, parser, etc |
|
| 272 |
} |
|
| 273 |
|
|
| 274 |
protected void addTEIElement() |
|
| 275 |
{
|
|
| 276 |
writer.writeStartElement("TEI");
|
|
| 277 |
writer.writeDefaultNamespace(TEINS); |
|
| 278 |
writer.writeNamespace(TXM, TXMNS); |
|
| 279 |
writer.writeNamespace(TEI, TEINS); |
|
| 280 |
writeTeiHeader(); |
|
| 281 |
} |
|
| 282 |
|
|
| 283 |
protected void processCharacters() |
|
| 284 |
{
|
|
| 285 |
if (flagWord) {
|
|
| 286 |
writer.writeCharacters(parser.getText().trim()); // keep form in 1 line |
|
| 287 |
} else {
|
|
| 288 |
super.processCharacters(); |
|
| 289 |
} |
|
| 290 |
} |
|
| 291 |
|
|
| 292 |
boolean hasClassDecl = false; |
|
| 293 |
boolean hasFileDesc = false; |
|
| 294 |
boolean hasEncodingDesc = false; |
|
| 295 |
boolean hasTeiHeader = false; |
|
| 296 |
boolean hasTEI = false; |
|
| 297 |
public static String ANA = "ana" |
|
| 298 |
public static String RESP = "resp" |
|
| 299 |
public static String TYPE = "type" |
|
| 300 |
protected void processEndElement() |
|
| 301 |
{
|
|
| 302 |
switch (parser.getLocalName()) {
|
|
| 303 |
case wtag: |
|
| 304 |
writer.writeEndElement(); // txm:form |
|
| 305 |
for (def values : anabalises) |
|
| 306 |
{// <txm:ana resp=ref type=corresptype>value</txm:ana>
|
|
| 307 |
writer.writeStartElement(TXMNS, ANA); |
|
| 308 |
writer.writeAttribute(RESP, values[0]); |
|
| 309 |
writer.writeAttribute(TYPE, values[1]); |
|
| 310 |
writer.writeCharacters(values[2]); |
|
| 311 |
writer.writeEndElement(); // txm:ana |
|
| 312 |
} |
|
| 313 |
|
|
| 314 |
flagWord = false; |
|
| 315 |
break; |
|
| 316 |
|
|
| 317 |
case "fileDesc": |
|
| 318 |
hasFileDesc = true; |
|
| 319 |
this.writeTXMResps(); |
|
| 320 |
break; |
|
| 321 |
|
|
| 322 |
case "classDecl": |
|
| 323 |
hasClassDecl=true; |
|
| 324 |
this.writeTXMTaxonomies(); |
|
| 325 |
break; |
|
| 326 |
case "encodingDesc": |
|
| 327 |
hasEncodingDesc = true; |
|
| 328 |
writeContentOfEncodingDesc(); |
|
| 329 |
break; |
|
| 330 |
|
|
| 331 |
case "teiHeader": |
|
| 332 |
hasTeiHeader = true |
|
| 333 |
if (!hasEncodingDesc) {
|
|
| 334 |
writer.writeStartElement("encodingDesc");
|
|
| 335 |
writeContentOfEncodingDesc(); |
|
| 336 |
writer.writeEndElement(); |
|
| 337 |
} |
|
| 338 |
|
|
| 339 |
break; |
|
| 340 |
case "TEI": |
|
| 341 |
hasTEI = true; |
|
| 342 |
if (!hasTeiHeader) {
|
|
| 343 |
writeTeiHeader(); |
|
| 344 |
} |
|
| 345 |
break; |
|
| 346 |
} |
|
| 347 |
|
|
| 348 |
super.processEndElement(); |
|
| 349 |
} |
|
| 350 |
|
|
| 351 |
protected void writeTeiHeader() |
|
| 352 |
{
|
|
| 353 |
writer.writeStartElement("teiHeader");
|
|
| 354 |
writer.writeStartElement("fileDesc")
|
|
| 355 |
this.writeTXMResps(); |
|
| 356 |
writer.writeStartElement("titleStmt")
|
|
| 357 |
writer.writeStartElement("title")
|
|
| 358 |
writer.writeEndElement(); // title |
|
| 359 |
writer.writeEndElement(); // titleStmt |
|
| 360 |
writer.writeStartElement("publicationStmt")
|
|
| 361 |
writer.writeEndElement(); // publicationStmt |
|
| 362 |
writer.writeStartElement("sourceDesc")
|
|
| 363 |
writer.writeEndElement(); // sourceDesc |
|
| 364 |
writer.writeEndElement(); // fileDesc |
|
| 365 |
writer.writeStartElement("encodingDesc");
|
|
| 366 |
writeContentOfEncodingDesc(); |
|
| 367 |
writer.writeEndElement(); // encodingDesc |
|
| 368 |
writer.writeEndElement(); // teiHeader |
|
| 369 |
} |
|
| 370 |
|
|
| 371 |
protected void writeContentOfEncodingDesc() |
|
| 372 |
{
|
|
| 373 |
writer.writeStartElement("appInfo")
|
|
| 374 |
this.writeTXMApps(); |
|
| 375 |
writer.writeEndElement(); // appInfo |
|
| 376 |
if (!hasClassDecl) {
|
|
| 377 |
writer.writeStartElement("classDecl");
|
|
| 378 |
this.writeTXMTaxonomies(); |
|
| 379 |
writer.writeEndElement(); // classDecl |
|
| 380 |
} |
|
| 381 |
} |
|
| 382 |
|
|
| 383 |
/** |
|
| 384 |
* Check resp. |
|
| 385 |
* |
|
| 386 |
* @return the string |
|
| 387 |
*/ |
|
| 388 |
public String checkResp() |
|
| 389 |
{
|
|
| 390 |
String rez ="found tags : \n"; |
|
| 391 |
for (String key : checkTags.keySet()) |
|
| 392 |
rez += "\t"+key+"\n"; |
|
| 393 |
return rez; |
|
| 394 |
} |
|
| 395 |
|
|
| 396 |
/** |
|
| 397 |
* Sets the correspondances. |
|
| 398 |
* |
|
| 399 |
* @param correspRef the corresp ref |
|
| 400 |
* @param correspType the corresp type |
|
| 401 |
*/ |
|
| 402 |
public void setCorrespondances(correspRef, correspType) |
|
| 403 |
{
|
|
| 404 |
this.correspRef = correspRef; |
|
| 405 |
this.correspType = correspType; |
|
| 406 |
} |
|
| 407 |
|
|
| 408 |
/** |
|
| 409 |
* Sets the header infos. |
|
| 410 |
* |
|
| 411 |
* @param respId the resp id |
|
| 412 |
* @param resps the resps |
|
| 413 |
* @param applications the applications |
|
| 414 |
* @param taxonomies the taxonomies |
|
| 415 |
* @param items the items |
|
| 416 |
*/ |
|
| 417 |
public void setHeaderInfos(respId,resps, applications, taxonomies, items) |
|
| 418 |
{
|
|
| 419 |
this.respId = respId |
|
| 420 |
this.resps = resps |
|
| 421 |
this.applications = applications |
|
| 422 |
this.taxonomies = taxonomies; |
|
| 423 |
this.items = items; |
|
| 424 |
} |
|
| 425 |
|
|
| 426 |
/** |
|
| 427 |
* Write txm resps. |
|
| 428 |
*/ |
|
| 429 |
public void writeTXMResps() |
|
| 430 |
{
|
|
| 431 |
for (String ref : respId) {
|
|
| 432 |
String[] infos = resps.get(ref); |
|
| 433 |
writer.writeStartElement("respStmt");
|
|
| 434 |
writer.writeStartElement(RESP); |
|
| 435 |
writer.writeAttribute(ID,ref); |
|
| 436 |
writer.writeCharacters(infos[0]); |
|
| 437 |
writer.writeStartElement("date");
|
|
| 438 |
writer.writeAttribute("when",infos[2]);
|
|
| 439 |
writer.writeCharacters(infos[3]); |
|
| 440 |
writer.writeEndElement(); // date |
|
| 441 |
writer.writeEndElement(); //resp |
|
| 442 |
writer.writeStartElement("name");
|
|
| 443 |
writer.writeAttribute(TYPE, "person"); |
|
| 444 |
writer.writeCharacters(infos[1]) |
|
| 445 |
writer.writeEndElement(); // name |
|
| 446 |
writer.writeEndElement(); //respStmt |
|
| 447 |
} |
|
| 448 |
} |
|
| 449 |
|
|
| 450 |
/** |
|
| 451 |
* Write txm apps. |
|
| 452 |
*/ |
|
| 453 |
public void writeTXMApps() |
|
| 454 |
{
|
|
| 455 |
for (String ref : respId) {
|
|
| 456 |
List<String> list= applications.get(ref); |
|
| 457 |
String ident = list.get(0); |
|
| 458 |
String version = list.get(1); |
|
| 459 |
File report = list.get(2); |
|
| 460 |
|
|
| 461 |
writer.writeStartElement(TXMNS, "application"); |
|
| 462 |
writer.writeAttribute("ident", ident);
|
|
| 463 |
writer.writeAttribute("version", version);
|
|
| 464 |
writer.writeAttribute(RESP, ref); |
|
| 465 |
|
|
| 466 |
//get txm:commandLine from GeneratedReport |
|
| 467 |
if (report != null) {
|
|
| 468 |
writer.writeCharacters("");writer.flush();
|
|
| 469 |
Reader reader = new FileReader(report); |
|
| 470 |
String line = reader.readLine(); |
|
| 471 |
while (line != null) {
|
|
| 472 |
if (line.length() != 0) |
|
| 473 |
output.write(line+"\n"); |
|
| 474 |
line = reader.readLine(); |
|
| 475 |
} |
|
| 476 |
reader.close(); |
|
| 477 |
} |
|
| 478 |
|
|
| 479 |
writer.writeStartElement("ab");
|
|
| 480 |
writer.writeAttribute(TYPE, "annotation"); |
|
| 481 |
for (String item : taxonomies.get(ref)) {
|
|
| 482 |
writer.writeStartElement("list");
|
|
| 483 |
writer.writeEmptyElement("ref");
|
|
| 484 |
writer.writeAttribute(TYPE, "tagset"); |
|
| 485 |
writer.writeAttribute("target", item);
|
|
| 486 |
writer.writeEndElement(); // list |
|
| 487 |
} |
|
| 488 |
writer.writeEndElement(); // ab |
|
| 489 |
writer.writeEndElement(); // txm:application |
|
| 490 |
} |
|
| 491 |
} |
|
| 492 |
|
|
| 493 |
/** |
|
| 494 |
* Write txm taxonomies. |
|
| 495 |
*/ |
|
| 496 |
public void writeTXMTaxonomies() |
|
| 497 |
{
|
|
| 498 |
for (String tax : items.keySet()) {
|
|
| 499 |
writer.writeStartElement("taxonomy");
|
|
| 500 |
writer.writeAttribute(ID, tax); |
|
| 501 |
|
|
| 502 |
writer.writeStartElement("bibl");
|
|
| 503 |
writer.writeAttribute(TYPE, "tagset"); |
|
| 504 |
writer.writeStartElement("title");
|
|
| 505 |
writer.writeCharacters(tax); |
|
| 506 |
writer.writeEndElement(); // title |
|
| 507 |
|
|
| 508 |
for (String type : items.get(tax).keySet()) {
|
|
| 509 |
writer.writeEmptyElement("ref");
|
|
| 510 |
writer.writeAttribute(TYPE, type); |
|
| 511 |
writer.writeAttribute("target", items.get(tax).get(type));
|
|
| 512 |
} |
|
| 513 |
writer.writeEndElement(); // bibl |
|
| 514 |
writer.writeEndElement(); // taxonomy |
|
| 515 |
} |
|
| 516 |
} |
|
| 517 |
|
|
| 518 |
/** |
|
| 519 |
* The main method. |
|
| 520 |
* |
|
| 521 |
* @param args the arguments |
|
Formats disponibles : Unified diff