Révision 1948
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/r/ExecRMacro.groovy (revision 1948) | ||
---|---|---|
20 | 20 |
((Index)sel).asRMatrix() |
21 | 21 |
symbol = "t("+((Index)sel).getSymbol()+'$data)' |
22 | 22 |
prop = ((Index)sel).getProperties().toString() |
23 |
} |
|
24 |
|
|
25 |
if (symbol == null) { |
|
23 |
} else { |
|
26 | 24 |
println "Selection is not an Index. Aborting." |
27 | 25 |
return |
28 | 26 |
} |
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/commands/AdvancedPartitionMacro.groovy (revision 1948) | ||
---|---|---|
5 | 5 |
package org.txm.macro.commands |
6 | 6 |
|
7 | 7 |
// STANDARD DECLARATIONS |
8 |
|
|
9 | 8 |
import org.txm.rcpapplication.swt.widget.parameters.* |
10 | 9 |
import org.txm.searchengine.cqp.corpus.* |
11 |
import org.txm.rcpapplication.views.CorporaView
|
|
10 |
import org.txm.rcp.views.corpora.CorporaView
|
|
12 | 11 |
|
13 | 12 |
def scriptName = this.class.getSimpleName() |
14 | 13 |
|
15 |
if (!(corpusViewSelection instanceof Corpus)) { |
|
14 |
if (!(corpusViewSelection instanceof CQPCorpus)) {
|
|
16 | 15 |
println "** $scriptName: please select a Corpus in the Corpus view." |
17 | 16 |
return 0 |
18 | 17 |
} |
19 | 18 |
|
20 |
def corpus = corpusViewSelection
|
|
19 |
CQPCorpus corpus = corpusViewSelection
|
|
21 | 20 |
|
22 | 21 |
// PARAMETERS ................................... |
23 | 22 |
// - NAME: name of the partition to build |
... | ... | |
54 | 53 |
|
55 | 54 |
println "Building partition on $corpus (size = "+(corpus.getSize())+")." |
56 | 55 |
|
57 |
partition = corpus.createPartition(NAME, QUERIES, PARTNAMES) |
|
56 |
partition = new Partition(corpus) |
|
57 |
partition.setParameters(NAME, QUERIES, PARTNAMES) |
|
58 |
partition.compute() |
|
58 | 59 |
|
59 | 60 |
def list = Arrays.asList(partition.getPartSizes()) |
60 | 61 |
|
... | ... | |
72 | 73 |
println "** $scriptName: PARTNAMES.size() != QUERIES.size(), ("+PARTNAMES.size()+" != "+QUERIES.size()+")." |
73 | 74 |
return 0 |
74 | 75 |
} |
75 |
|
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/commands/CrossedPartitionBuilderMacro.groovy (revision 1948) | ||
---|---|---|
6 | 6 |
|
7 | 7 |
import org.txm.rcp.swt.widget.parameters.* |
8 | 8 |
import org.txm.searchengine.cqp.corpus.* |
9 |
import org.txm.utils.logger.Log; |
|
9 |
import org.txm.searchengine.cqp.corpus.Partition |
|
10 |
import org.txm.utils.logger.Log |
|
10 | 11 |
import org.txm.rcp.views.* |
11 | 12 |
|
12 | 13 |
//BEGINNING OF PARAMETERS |
13 |
def corpus = corpusViewSelection |
|
14 |
if (!(corpus instanceof Corpus)) { |
|
14 |
if (!(corpusViewSelection instanceof CQPCorpus)) { |
|
15 | 15 |
println "Error: this macro should be run with a Corpus selected" |
16 | 16 |
return |
17 | 17 |
} |
18 | 18 |
|
19 |
corpus = corpusViewSelection |
|
20 |
|
|
19 | 21 |
@Field @Option(name="structuralUnit", usage="the structural Unit to use", widget="String", required=true, def="text") |
20 | 22 |
String structuralUnit = "text" |
21 | 23 |
|
... | ... | |
46 | 48 |
* Create partition with advanced CQL queries and autoname the parts. |
47 | 49 |
* Can create partitions by defining a multi-level structural units hierarchy or by defining several properties values for one structural unit. |
48 | 50 |
* Can define some structural units to remove from the resulting parts subcorpus. |
49 |
*
|
|
51 |
* |
|
50 | 52 |
*/ |
51 | 53 |
|
52 | 54 |
// TODO: add this variables to the macro parameters |
... | ... | |
156 | 158 |
* Init the generator and process. |
157 | 159 |
* @param corpusName |
158 | 160 |
*/ |
159 |
public Partition createPartition(Corpus corpus, boolean debug, |
|
161 |
public Partition createPartition(CQPCorpus corpus, boolean debug,
|
|
160 | 162 |
String partitionName, String partNamePrefix, |
161 | 163 |
def structuralUnits, def structuralUnitProperties, |
162 | 164 |
def structuralUnitToIgnore, String expandTarget) { |
... | ... | |
213 | 215 |
|
214 | 216 |
// Creating the partition |
215 | 217 |
if (!DEBUG && queries.size() == partNames.size()) { |
216 |
return corpus.createPartition(PARTITION_NAME, queries, partNames); |
|
218 |
Partition partition = new Partition(corpus) |
|
219 |
partition.setParameters(PARTITION_NAME, queries, partNames); |
|
220 |
partition.compute() |
|
221 |
return partition |
|
217 | 222 |
} |
218 | 223 |
} else { |
219 | 224 |
println "Error: Structural units count or structural units properties count error."; |
... | ... | |
222 | 227 |
} |
223 | 228 |
|
224 | 229 |
/** |
225 |
* Recurse through structural units and structural units properties of corpus and create the queries and the part names.
|
|
230 |
* Recurse through structural units and structural units properties of corpus and create the queries and the part names. |
|
226 | 231 |
* @param corpus the corpus or subcorpus |
227 | 232 |
* @param index the index for recursion |
228 | 233 |
* @param tmpQuery the temporary query for creating subcorpus part |
229 | 234 |
* @param tmpPartName the temporary part name of the subcorpus part |
230 | 235 |
*/ |
231 |
protected void process(Corpus corpus, int index, String tmpQuery, String tmpPartName) { |
|
236 |
protected void process(CQPCorpus corpus, int index, String tmpQuery, String tmpPartName) {
|
|
232 | 237 |
// End of array |
233 | 238 |
if (index >= STRUCTURAL_UNITS.size()) { |
234 | 239 |
|
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/annotation/BuildWordPropTableMacro.groovy (revision 1948) | ||
---|---|---|
34 | 34 |
import org.txm.functions.concordances.* |
35 | 35 |
import org.txm.functions.concordances.comparators.* |
36 | 36 |
import org.txm.searchengine.cqp.ReferencePattern |
37 |
|
|
38 |
import java.util.List |
|
39 |
|
|
37 | 40 |
import org.kohsuke.args4j.* |
38 | 41 |
import groovy.transform.Field |
39 | 42 |
import org.txm.rcp.swt.widget.parameters.* |
40 | 43 |
|
41 |
def corpus = corpusViewSelection |
|
42 |
if (!(corpus instanceof Corpus)) { |
|
44 |
if (!(corpusViewSelection instanceof CQPCorpus)) { |
|
43 | 45 |
println "Error: you must select a corpus or a subcorpus to export properties" |
44 | 46 |
return false; |
45 | 47 |
} |
46 | 48 |
|
49 |
CQPCorpus corpus = corpusViewSelection |
|
50 |
|
|
51 |
|
|
47 | 52 |
@Field @Option(name="properties", usage="columns to inject separated by commas", widget="String", required=true, def="p1, p2, ... , pn") |
48 | 53 |
def properties = "pos" |
49 | 54 |
|
... | ... | |
134 | 139 |
//query = new Query(Query.fixQuery(query)) |
135 | 140 |
def viewprops = [word] |
136 | 141 |
viewprops.addAll(annots) |
137 |
def concordance = new Concordance(corpus, query, word, viewprops, referencePattern, referencePattern, leftcontextsize, rightcontextsize) |
|
138 |
|
|
142 |
Concordance concordance = new Concordance(corpus) |
|
143 |
concordance.setParameters(query, [word], [word], [word], viewprops, viewprops, viewprops, referencePattern, referencePattern, leftcontextsize, rightcontextsize) |
|
144 |
concordance.compute() |
|
139 | 145 |
//println "Conc done "+(System.currentTimeMillis()-start) |
140 | 146 |
|
141 | 147 |
def writer = tsvFile.newWriter("UTF-8"); |
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/export/ExportTextsMetadataMacro.groovy (revision 1948) | ||
---|---|---|
1 | 1 |
// STANDARD DECLARATIONS |
2 | 2 |
package org.txm.macroproto.export |
3 | 3 |
|
4 |
import org.txm.searchengine.cqp.CQPSearchEngine |
|
5 |
import org.txm.searchengine.cqp.corpus.* |
|
4 | 6 |
import org.kohsuke.args4j.* |
5 | 7 |
import groovy.transform.Field |
6 | 8 |
import org.txm.rcp.swt.widget.parameters.* |
7 |
import org.txm.searchengine.cqp.corpus.* |
|
8 | 9 |
import org.txm.Toolbox |
9 | 10 |
|
10 |
if (!(corpusViewSelection instanceof Corpus)) { |
|
11 |
if (!(corpusViewSelection instanceof CQPCorpus)) {
|
|
11 | 12 |
println "Please select a corpus first" |
12 | 13 |
return |
13 | 14 |
} |
... | ... | |
23 | 24 |
if (!ParametersDialog.open(this)) return |
24 | 25 |
|
25 | 26 |
// BEGINNING |
26 |
def Corpus corpus = corpusViewSelection
|
|
27 |
CQPCorpus corpus = corpusViewSelection
|
|
27 | 28 |
def CQI = CQPSearchEngine.getCqiClient() |
28 | 29 |
def writer = tsvFile.newWriter("UTF-8") |
29 | 30 |
def internalTextProperties = ["project", "base", "path"] |
... | ... | |
40 | 41 |
properties[0] = properties[idi] |
41 | 42 |
properties[idi] = tmp |
42 | 43 |
} else if (idi == -1) { |
43 |
println sprintf("** Warning: incoherent metadata content found for %s corpus - no 'id' metadata found", corpus) |
|
44 |
println "** Aborting" |
|
45 |
return |
|
44 |
println sprintf("** Warning: incoherent metadata content found for %s corpus - no 'id' metadata found", corpus)
|
|
45 |
println "** Aborting"
|
|
46 |
return
|
|
46 | 47 |
} |
47 | 48 |
|
48 | 49 |
// getting values for all texts and all text metadata |
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/export/ExportTextContentMacro.groovy (revision 1948) | ||
---|---|---|
1 | 1 |
// STANDARD DECLARATIONS |
2 | 2 |
package org.txm.macro.export |
3 | 3 |
|
4 |
import org.txm.searchengine.cqp.CQPSearchEngine |
|
4 | 5 |
import org.txm.searchengine.cqp.corpus.* |
5 |
import org.txm.searchengine.cqp.corpus.query.Query |
|
6 |
import org.txm.searchengine.cqp.corpus.query.CQLQuery
|
|
6 | 7 |
import org.txm.Toolbox |
7 | 8 |
import org.txm.utils.i18n.LangFormater; |
8 | 9 |
import org.apache.commons.lang.StringUtils; |
... | ... | |
12 | 13 |
|
13 | 14 |
println "corpora selection: "+corpusViewSelection |
14 | 15 |
|
15 |
if (!(corpusViewSelection instanceof Corpus)) { |
|
16 |
if (!(corpusViewSelection instanceof CQPCorpus)) {
|
|
16 | 17 |
println "Please select a corpus" |
17 | 18 |
return; |
18 | 19 |
} |
... | ... | |
31 | 32 |
|
32 | 33 |
if (!exportDir.exists()) exportDir.mkdirs() |
33 | 34 |
|
34 |
Corpus corpus = corpusViewSelection |
|
35 |
CQPCorpus corpus = corpusViewSelection
|
|
35 | 36 |
CQI = CQPSearchEngine.getCqiClient(); |
36 | 37 |
|
37 | 38 |
def lineSeparatorStructure = corpus.getStructuralUnit(lineSeparatorStructureName) |
... | ... | |
41 | 42 |
return; |
42 | 43 |
} |
43 | 44 |
|
44 |
def breaks_pos = Arrays.asList(corpus.query(new Query("[]</"+lineSeparatorStructureName+">"),"test", false).getEnds()) |
|
45 |
def breaks_pos = Arrays.asList(corpus.query(new CQLQuery("[]</"+lineSeparatorStructureName+">"),"test", false).getEnds())
|
|
45 | 46 |
println breaks_pos |
46 | 47 |
|
47 | 48 |
println "Exporting $corpus text content to $exportDir" |
... | ... | |
73 | 74 |
if (breaks_pos.contains(p)) words[j] = words[j] +"\n" |
74 | 75 |
} |
75 | 76 |
writer.println LangFormater.format(StringUtils.join(words, " "), |
76 |
corpus.getAttribute("lang"));
|
|
77 |
corpus.getLanguage());
|
|
77 | 78 |
writer.close(); |
78 | 79 |
} |
79 | 80 |
|
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/conversion/EuroPressToXML2018Macro.groovy (revision 1948) | ||
---|---|---|
3 | 3 |
|
4 | 4 |
import groovy.xml.QName |
5 | 5 |
import java.text.DecimalFormat |
6 |
import org.txm.importer.DomUtils
|
|
6 |
import org.txm.utils.xml.DomUtils
|
|
7 | 7 |
import org.txm.importer.ValidateXml |
8 | 8 |
import org.w3c.tidy.Tidy |
9 | 9 |
import groovy.util.XmlParser |
... | ... | |
13 | 13 |
import org.jsoup.Jsoup |
14 | 14 |
import org.jsoup.nodes.Document.OutputSettings.Syntax |
15 | 15 |
|
16 |
// README |
|
17 |
// This macros needs the following libraries: jsoup-1.11.3.jar and jtidy |
|
18 |
|
|
16 | 19 |
// BEGINNING OF PARAMETERS |
17 | 20 |
|
18 | 21 |
@Field @Option(name="inputDir", usage="The directory containing the html files, to export from the Europress portal", widget="Folder", required=true, def="") |
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/debug/RVersionMacro.groovy (revision 1948) | ||
---|---|---|
1 | 1 |
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté |
2 | 2 |
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses) |
3 |
// |
|
3 | 4 |
// @author sheiden |
5 |
|
|
4 | 6 |
package org.txm.macro.debug |
7 |
|
|
8 |
import org.txm.statsengine.r.core.RWorkspace |
|
9 |
|
|
5 | 10 |
def r = RWorkspace.getRWorkspaceInstance() |
6 | 11 |
|
7 | 12 |
println r.eval("R.version.string").asString() |
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/edition/AddAttributeValuesInXML.groovy (revision 1948) | ||
---|---|---|
1 |
package org.txm.macro.edition |
|
2 |
|
|
3 |
import java.io.File; |
|
4 |
import java.util.ArrayList; |
|
5 |
import java.util.List; |
|
6 |
import org.txm.scripts.importer.* |
|
7 |
|
|
8 |
/** |
|
9 |
* Add a attribute value map in a XML file |
|
10 |
* Warning: if an attribute already exists its value won't be changed |
|
11 |
*/ |
|
12 |
class AddAttributeValuesInXML extends StaxIdentityParser { |
|
13 |
File xmlFile; |
|
14 |
String tag, attribute; |
|
15 |
List<String> values; |
|
16 |
HashMap<String, String> attributesMap; |
|
17 |
boolean result; |
|
18 |
|
|
19 |
public AddAttributeValuesInXML(File xmlFile, String tag, String attribute, List<String> values) |
|
20 |
{ |
|
21 |
super(xmlFile.toURI().toURL()); |
|
22 |
this.xmlFile = xmlFile; |
|
23 |
this.tag = tag; |
|
24 |
this.attribute = attribute; |
|
25 |
this.values = values; |
|
26 |
} |
|
27 |
|
|
28 |
public boolean process(File outfile) { |
|
29 |
this.result = false; |
|
30 |
boolean ret = super.process(outfile) |
|
31 |
return this.result & ret; |
|
32 |
} |
|
33 |
|
|
34 |
|
|
35 |
/** |
|
36 |
* Rewrite the processStartElement() to update/add attributes |
|
37 |
*/ |
|
38 |
int n = 0; |
|
39 |
public void processStartElement() |
|
40 |
{ |
|
41 |
if (localname != tag) { |
|
42 |
super.processStartElement() |
|
43 |
} else { |
|
44 |
String prefix = parser.getPrefix(); |
|
45 |
//TODO: uncomment for TXM 0.7.6 |
|
46 |
// if (INCLUDE == localname && XI == prefix) { |
|
47 |
// processingXInclude(); |
|
48 |
// return; |
|
49 |
// } |
|
50 |
|
|
51 |
if (prefix.length() > 0) |
|
52 |
writer.writeStartElement(Nscontext.getNamespaceURI(prefix), localname) |
|
53 |
else |
|
54 |
writer.writeStartElement(localname); |
|
55 |
|
|
56 |
for (int i = 0 ; i < parser.getNamespaceCount() ; i++) { |
|
57 |
writer.writeNamespace(parser.getNamespacePrefix(i), parser.getNamespaceURI(i)); |
|
58 |
} |
|
59 |
|
|
60 |
// get attributes |
|
61 |
HashMap<String, String> attributes = new HashMap<String, String>(); |
|
62 |
for (int i = 0 ; i < parser.getAttributeCount() ; i++) { |
|
63 |
attributes[parser.getAttributeLocalName(i)] = parser.getAttributeValue(i); |
|
64 |
} |
|
65 |
// add/update the value |
|
66 |
if (n < values.size()) { |
|
67 |
attributes[attribute] = values[n]; |
|
68 |
} else { |
|
69 |
println "ERROR: not enough values to insert for file $xmlFile, at XML parser location: l="+parser.getLocation().getLineNumber()+",c="+parser.getLocation().getColumnNumber()+")." |
|
70 |
} |
|
71 |
n++ |
|
72 |
|
|
73 |
// write attributes |
|
74 |
for (def k : attributes.keySet()) { |
|
75 |
writer.writeAttribute(k, attributes[k]) |
|
76 |
} |
|
77 |
} |
|
78 |
} |
|
79 |
|
|
80 |
@Override |
|
81 |
public void after() { |
|
82 |
super.after(); |
|
83 |
|
|
84 |
if (n != values.size()) { |
|
85 |
println "ERROR: number of $tag ("+n+") missmatch the number of values to insert: "+values.size() |
|
86 |
} |
|
87 |
} |
|
88 |
} |
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/edition/BuildXTZEditions.groovy (revision 1948) | ||
---|---|---|
1 |
package org.txm.macro.edition |
|
2 |
|
|
3 |
import org.txm.scripts.importer.* |
|
4 |
import org.xml.sax.Attributes |
|
5 |
import org.txm.importer.scripts.filters.* |
|
6 |
import java.util.ArrayList |
|
7 |
import javax.xml.parsers.* |
|
8 |
import javax.xml.stream.* |
|
9 |
import java.net.URL |
|
10 |
import org.xml.sax.InputSource |
|
11 |
import org.xml.sax.helpers.DefaultHandler |
|
12 |
|
|
13 |
class BuildXTZEditions { |
|
14 |
|
|
15 |
private def url |
|
16 |
private def inputData |
|
17 |
private def factory |
|
18 |
private XMLStreamReader parser |
|
19 |
OutputStreamWriter writer |
|
20 |
StaxStackWriter pagedWriter = null |
|
21 |
|
|
22 |
File editionDir |
|
23 |
File xmlFile |
|
24 |
File htmlFile |
|
25 |
def pages = [] |
|
26 |
def txtname, corpusname |
|
27 |
int wordsPerPage = 500 |
|
28 |
boolean firstWord |
|
29 |
boolean cutBefore = true; |
|
30 |
|
|
31 |
public BuildFacsEditions(File xmlFile, File editionDir, String corpusname, String txtname, int wordsPerPage) { |
|
32 |
inputData = xmlFile.toURI().toURL().openStream() |
|
33 |
factory = XMLInputFactory.newInstance() |
|
34 |
parser = factory.createXMLStreamReader(inputData) |
|
35 |
|
|
36 |
this.xmlFile = xmlFile |
|
37 |
this.editionDir = editionDir |
|
38 |
this.txtname = txtname |
|
39 |
this.wordsPerPage = wordsPerPage |
|
40 |
} |
|
41 |
|
|
42 |
int n = 0; |
|
43 |
private boolean createNextOutput() |
|
44 |
{ |
|
45 |
try { |
|
46 |
def tags = closeMultiWriter(); |
|
47 |
for (int i = 0 ; i < tags.size() ; i++) { |
|
48 |
String tag = tags[i] |
|
49 |
if ("body" != tag) { |
|
50 |
tags.remove(i--) |
|
51 |
} else { |
|
52 |
tags.remove(i--) // remove "body" |
|
53 |
break; // remove elements until "body tag |
|
54 |
} |
|
55 |
} |
|
56 |
n++ |
|
57 |
htmlFile = new File(editionDir, "${txtname}_${n}.html") |
|
58 |
firstWord = true |
|
59 |
|
|
60 |
pagedWriter = new StaxStackWriter(htmlFile, "UTF-8"); |
|
61 |
|
|
62 |
pagedWriter.writeStartDocument("UTF-8", "1.0") |
|
63 |
pagedWriter.writeStartElement("html"); |
|
64 |
pagedWriter.writeEmptyElement("meta", ["http-equiv":"Content-Type", "content":"text/html","charset":"UTF-8"]); |
|
65 |
pagedWriter.writeEmptyElement("link", ["rel":"stylesheet", "type":"text/css","href":"txm.css"]); |
|
66 |
pagedWriter.writeEmptyElement("link", ["rel":"stylesheet", "type":"text/css","href":"${corpusname}.css"]); |
|
67 |
pagedWriter.writeStartElement("head"); |
|
68 |
pagedWriter.writeStartElement("title") |
|
69 |
pagedWriter.writeCharacters(corpusname+" Edition - Page "+n) |
|
70 |
pagedWriter.writeEndElement(); // </title> |
|
71 |
pagedWriter.writeEndElement() // </head> |
|
72 |
pagedWriter.writeStartElement("body") //<body> |
|
73 |
|
|
74 |
pagedWriter.writeStartElements(tags); |
|
75 |
return true; |
|
76 |
} catch (Exception ee) { |
|
77 |
System.out.println(ee); |
|
78 |
return false; |
|
79 |
} |
|
80 |
} |
|
81 |
|
|
82 |
private def closeMultiWriter() |
|
83 |
{ |
|
84 |
if (pagedWriter != null) { |
|
85 |
def tags = pagedWriter.getTagStack().clone(); |
|
86 |
|
|
87 |
if (firstWord) { // there was no words |
|
88 |
pagedWriter.writeCharacters(""); |
|
89 |
pagedWriter.write("<span id=\"w_0\"/>"); |
|
90 |
pages << ["$n", "w_0"] |
|
91 |
} |
|
92 |
pagedWriter.writeEndElements(); |
|
93 |
pagedWriter.close(); |
|
94 |
return tags; |
|
95 |
} else { |
|
96 |
return []; |
|
97 |
} |
|
98 |
} |
|
99 |
|
|
100 |
private writeImg(String src) { |
|
101 |
pagedWriter.writeStartElement("div"); |
|
102 |
pagedWriter.writeEmptyElement("img", ["src":src]); |
|
103 |
pagedWriter.writeEndElement(); // </div> |
|
104 |
} |
|
105 |
|
|
106 |
public def process() { |
|
107 |
String wordid = "w_0" |
|
108 |
boolean start = false |
|
109 |
String localname |
|
110 |
|
|
111 |
createNextOutput(); |
|
112 |
for (int event = parser.next(); event != XMLStreamConstants.END_DOCUMENT; event = parser.next()) { |
|
113 |
switch (event) { |
|
114 |
case XMLStreamConstants.START_ELEMENT: |
|
115 |
localname = parser.getLocalName(); |
|
116 |
switch (localname) { |
|
117 |
case "text": |
|
118 |
start = true |
|
119 |
break; |
|
120 |
case "w": |
|
121 |
if (firstWord) { |
|
122 |
wordid = parser.getAttributeValue(null, "id"); |
|
123 |
firstWord = false; |
|
124 |
} |
|
125 |
break; |
|
126 |
case "pb": |
|
127 |
if (cutBefore) { |
|
128 |
pages << ["$n", wordid] |
|
129 |
// WRITE PB |
|
130 |
createNextOutput() |
|
131 |
} else { |
|
132 |
createNextOutput() |
|
133 |
pages << ["$n", wordid] |
|
134 |
// WRITE PB |
|
135 |
} |
|
136 |
break; |
|
137 |
} |
|
138 |
break; |
|
139 |
} |
|
140 |
} |
|
141 |
closeMultiWriter() |
|
142 |
if (parser != null) parser.close(); |
|
143 |
if (inputData != null) inputData.close(); |
|
144 |
return pages |
|
145 |
} |
|
146 |
} |
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/edition/RemoveTag.groovy (revision 1948) | ||
---|---|---|
1 |
// Copyright © 2010-2013 ENS de Lyon. |
|
2 |
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of |
|
3 |
// Lyon 2, University of Franche-Comté, University of Nice |
|
4 |
// Sophia Antipolis, University of Paris 3. |
|
5 |
// |
|
6 |
// The TXM platform is free software: you can redistribute it |
|
7 |
// and/or modify it under the terms of the GNU General Public |
|
8 |
// License as published by the Free Software Foundation, |
|
9 |
// either version 2 of the License, or (at your option) any |
|
10 |
// later version. |
|
11 |
// |
|
12 |
// The TXM platform is distributed in the hope that it will be |
|
13 |
// useful, but WITHOUT ANY WARRANTY; without even the implied |
|
14 |
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR |
|
15 |
// PURPOSE. See the GNU General Public License for more |
|
16 |
// details. |
|
17 |
// |
|
18 |
// You should have received a copy of the GNU General |
|
19 |
// Public License along with the TXM platform. If not, see |
|
20 |
// http://www.gnu.org/licenses. |
|
21 |
// |
|
22 |
// |
|
23 |
// |
|
24 |
// $LastChangedDate:$ |
|
25 |
// $LastChangedRevision:$ |
|
26 |
// $LastChangedBy:$ |
|
27 |
// |
|
28 |
package org.txm.macro.edition; |
|
29 |
|
|
30 |
import javax.xml.parsers.* |
|
31 |
import javax.xml.transform.* |
|
32 |
import javax.xml.transform.dom.DOMSource |
|
33 |
import javax.xml.transform.stream.StreamResult |
|
34 |
import javax.xml.xpath.* |
|
35 |
|
|
36 |
import org.txm.metadatas.* |
|
37 |
import org.txm.utils.* |
|
38 |
import org.w3c.dom.Document |
|
39 |
import org.w3c.dom.Element |
|
40 |
|
|
41 |
/** |
|
42 |
* Removes tags of XML file given a XPath. |
|
43 |
* @author mdecorde |
|
44 |
* |
|
45 |
*/ |
|
46 |
public class RemoveTag { |
|
47 |
File outfile |
|
48 |
String xpath |
|
49 |
Document doc |
|
50 |
|
|
51 |
public RemoveTag(def root, def outfile, def xpath) |
|
52 |
{ |
|
53 |
this.doc = root |
|
54 |
this.outfile = outfile |
|
55 |
this.xpath = xpath |
|
56 |
|
|
57 |
process() |
|
58 |
} |
|
59 |
|
|
60 |
/** |
|
61 |
* |
|
62 |
* @param xmlfile the xmlfile |
|
63 |
* @param outfile the outfile |
|
64 |
* @param xpath the XPath |
|
65 |
*/ |
|
66 |
public RemoveTag(File xmlfile, File outfile, String xpath) |
|
67 |
{ |
|
68 |
this.outfile = outfile |
|
69 |
this.xpath = xpath |
|
70 |
|
|
71 |
def domFactory = DocumentBuilderFactory.newInstance(); |
|
72 |
domFactory.setNamespaceAware(true); // never forget this! |
|
73 |
DocumentBuilder builder = domFactory.newDocumentBuilder(); |
|
74 |
this.doc = builder.parse(xmlfile); |
|
75 |
|
|
76 |
process() |
|
77 |
} |
|
78 |
|
|
79 |
private void process() { |
|
80 |
def expr = XPathFactory.newInstance().newXPath().compile(xpath); |
|
81 |
def nodes = expr.evaluate(doc, XPathConstants.NODESET); |
|
82 |
|
|
83 |
if (nodes != null) |
|
84 |
for(def node : nodes) |
|
85 |
{ |
|
86 |
//println "Remove node "+node |
|
87 |
Element elem = (Element)node; |
|
88 |
elem.getParentNode().removeChild(node); |
|
89 |
} |
|
90 |
save() |
|
91 |
doc = null |
|
92 |
} |
|
93 |
|
|
94 |
/** |
|
95 |
* Save. |
|
96 |
* |
|
97 |
* @return true, if successful |
|
98 |
*/ |
|
99 |
private boolean save() |
|
100 |
{ |
|
101 |
if (outfile == null) return true; |
|
102 |
|
|
103 |
try { |
|
104 |
// Création de la source DOM |
|
105 |
Source source = new DOMSource(doc); |
|
106 |
|
|
107 |
// Création du fichier de sortie |
|
108 |
Writer writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(outfile), "UTF-8")); |
|
109 |
Result resultat = new StreamResult(writer); |
|
110 |
|
|
111 |
// Configuration du transformer |
|
112 |
TransformerFactory fabrique = new net.sf.saxon.TransformerFactoryImpl(); |
|
113 |
Transformer transformer = fabrique.newTransformer(); |
|
114 |
transformer.setOutputProperty(OutputKeys.METHOD, "xml"); |
|
115 |
transformer.setOutputProperty(OutputKeys.INDENT, "yes"); |
|
116 |
transformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8"); |
|
117 |
|
|
118 |
// Transformation |
|
119 |
transformer.transform(source, resultat); |
|
120 |
writer.close(); |
|
121 |
return true; |
|
122 |
} catch (Exception e) { |
|
123 |
org.txm.utils.logger.Log.printStackTrace(e); |
|
124 |
return false; |
|
125 |
} |
|
126 |
} |
|
127 |
|
|
128 |
public static void main(String[] args) { |
|
129 |
RemoveTag rt = new RemoveTag( |
|
130 |
new File("/home/mdecorde/TXM/corpora/graal/import.xml"), |
|
131 |
new File("/home/mdecorde/TXM/corpora/graal/import-o.xml"), |
|
132 |
"//edition[@name='courante']" |
|
133 |
) |
|
134 |
} |
|
135 |
} |
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/edition/AddFacsAttributeMacro.groovy (revision 1948) | ||
---|---|---|
1 |
package org.txm.macro.edition |
|
2 |
|
|
3 |
import org.kohsuke.args4j.* |
|
4 |
import groovy.transform.Field |
|
5 |
import org.txm.rcp.swt.widget.parameters.* |
|
6 |
|
|
7 |
try { println "ARGS=$args";} catch(Exception e) {args = [:]} |
|
8 |
|
|
9 |
// BEGINNING OF PARAMETERS |
|
10 |
@Field @Option(name="sourceDirectory", usage="Directory containig XML-TXM or XML SRC files", widget="Folder", required=true, def="src") |
|
11 |
def sourceDirectory |
|
12 |
|
|
13 |
@Field @Option(name="imageDirectory", usage="directory containing the ordered images files in subdirectories", widget="Folder", required=true, def="img") |
|
14 |
def imageDirectory |
|
15 |
|
|
16 |
@Field @Option(name="outputDirectory", usage="Output directory", widget="Folder", required=true, def="out") |
|
17 |
def outputDirectory |
|
18 |
|
|
19 |
@Field @Option(name="element", usage="The element to upgrade", widget="String", required=true, def="pb") |
|
20 |
def element |
|
21 |
|
|
22 |
@Field @Option(name="attribute", usage="The attribute to add", widget="String", required=true, def="facs") |
|
23 |
def attribute |
|
24 |
|
|
25 |
@Field @Option(name="prefix", usage="The image path prefix, if empty then the absolute file path is used", widget="String", required=true, def="../img/") |
|
26 |
def prefix |
|
27 |
|
|
28 |
// Open the parameters input dialog box |
|
29 |
if (args.size() == 0) { |
|
30 |
if (!ParametersDialog.open(this)) return; |
|
31 |
} else { |
|
32 |
monitor = args["monitor"]; |
|
33 |
sourceDirectory = args["sourceDirectory"] |
|
34 |
imageDirectory = args["imageDirectory"] |
|
35 |
outputDirectory = args["outputDirectory"] |
|
36 |
element = args["element"] |
|
37 |
attribute = args["attribute"] |
|
38 |
prefix = args["prefix"] |
|
39 |
} |
|
40 |
// END OF PARAMETERS |
|
41 |
|
|
42 |
println "Parameters: " |
|
43 |
println " sourceDirectory: $sourceDirectory" |
|
44 |
println " imageDirectory: $imageDirectory" |
|
45 |
println " outputDirectory: $outputDirectory" |
|
46 |
println " element: $element" |
|
47 |
println " attribute: $attribute" |
|
48 |
|
|
49 |
assert(sourceDirectory.exists()) |
|
50 |
assert(imageDirectory.exists()) |
|
51 |
|
|
52 |
outputDirectory.deleteDir() |
|
53 |
outputDirectory.mkdir() |
|
54 |
|
|
55 |
if (!outputDirectory.exists()) { |
|
56 |
println "Error: failed to create $outputDirectory" |
|
57 |
return; |
|
58 |
} |
|
59 |
|
|
60 |
def srcFiles = [] |
|
61 |
sourceDirectory.eachFile() { file -> |
|
62 |
if (!file.isDirectory() && !file.isHidden() && file.getName().endsWith(".xml") && file.getName() != "import.xml") srcFiles << file |
|
63 |
} |
|
64 |
println "srcFiles=$srcFiles" |
|
65 |
|
|
66 |
for (def subdir : imageDirectory.listFiles()) { |
|
67 |
if (!subdir.isDirectory()) continue; |
|
68 |
|
|
69 |
String name = subdir.getName(); |
|
70 |
File srcFile = new File(sourceDirectory, name+".xml") |
|
71 |
if (!srcFile.exists()) { println "Warning: missing source file: $srcFile"; continue } |
|
72 |
|
|
73 |
def imgFiles = subdir.listFiles().sort() |
|
74 |
if (imgFiles.size() == 0) { println "Warning: no image in $subdir"; continue } |
|
75 |
|
|
76 |
def imgPaths = [] |
|
77 |
for (def img : imgFiles) { |
|
78 |
if (img.isFile() && !img.isHidden()) { |
|
79 |
if (prefix != null && prefix.length() > 0) |
|
80 |
imgPaths << prefix+name+"/"+img.getName() |
|
81 |
else |
|
82 |
imgPaths << img.getAbsolutePath() |
|
83 |
} |
|
84 |
} |
|
85 |
|
|
86 |
println "Processing '$name' directory with "+imgPaths.size()+" images." |
|
87 |
|
|
88 |
AddAttributeValuesInXML builder = new AddAttributeValuesInXML(srcFile, element, attribute, imgPaths); |
|
89 |
builder.process(new File(outputDirectory, srcFile.getName())); |
|
90 |
} |
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/edition/BuildFacsEditions.groovy (revision 1948) | ||
---|---|---|
1 |
package org.txm.macro.edition |
|
2 |
|
|
3 |
import org.txm.scripts.importer.* |
|
4 |
import org.xml.sax.Attributes |
|
5 |
import org.txm.importer.scripts.filters.* |
|
6 |
import java.util.ArrayList |
|
7 |
import javax.xml.parsers.* |
|
8 |
import javax.xml.stream.* |
|
9 |
import java.net.URL |
|
10 |
import org.xml.sax.InputSource |
|
11 |
import org.xml.sax.helpers.DefaultHandler |
|
12 |
|
|
13 |
class BuildFacsEditions { |
|
14 |
|
|
15 |
private def url |
|
16 |
private def inputData |
|
17 |
private def factory |
|
18 |
private XMLStreamReader parser |
|
19 |
OutputStreamWriter writer |
|
20 |
StaxStackWriter pagedWriter = null |
|
21 |
|
|
22 |
File editionDir |
|
23 |
File xmlFile |
|
24 |
File htmlFile |
|
25 |
def pages = [] |
|
26 |
def tag, attribute, txtname, corpusname |
|
27 |
boolean firstWord |
|
28 |
boolean cutBefore = true; |
|
29 |
boolean debug = false; |
|
30 |
|
|
31 |
public BuildFacsEditions(File xmlFile, File editionDir, String corpusname, String txtname, String tag, String attribute, boolean debug) { |
|
32 |
inputData = xmlFile.toURI().toURL().openStream() |
|
33 |
factory = XMLInputFactory.newInstance() |
|
34 |
parser = factory.createXMLStreamReader(inputData) |
|
35 |
|
|
36 |
this.xmlFile = xmlFile |
|
37 |
this.editionDir = editionDir |
|
38 |
this.tag = tag |
|
39 |
this.attribute = attribute |
|
40 |
this.txtname = txtname |
|
41 |
this.debug = debug |
|
42 |
} |
|
43 |
|
|
44 |
int n = 1; |
|
45 |
private boolean createNextOutput() |
|
46 |
{ |
|
47 |
try { |
|
48 |
def tags = closeMultiWriter(); |
|
49 |
for (int i = 0 ; i < tags.size() ; i++) { |
|
50 |
String tag = tags[i] |
|
51 |
if ("body" != tag) { |
|
52 |
tags.remove(i--) |
|
53 |
} else { |
|
54 |
tags.remove(i--) // remove "body" |
|
55 |
break; // remove elements until "body tag |
|
56 |
} |
|
57 |
} |
|
58 |
if (wordid != null) {//wordid = "w_0"; |
|
59 |
//println " add page $n $wordid, page=$pages" |
|
60 |
pages << ["$n", wordid] |
|
61 |
n++ |
|
62 |
} |
|
63 |
// Page suivante |
|
64 |
|
|
65 |
htmlFile = new File(editionDir, "${txtname}_${n}.html") |
|
66 |
firstWord = true |
|
67 |
//println "SET FIRST WORD=true" |
|
68 |
pagedWriter = new StaxStackWriter(htmlFile, "UTF-8"); |
|
69 |
if (debug) println "Create file $htmlFile" |
|
70 |
pagedWriter.writeStartDocument("UTF-8", "1.0") |
|
71 |
pagedWriter.writeStartElement("html"); |
|
72 |
pagedWriter.writeEmptyElement("meta", ["http-equiv":"Content-Type", "content":"text/html","charset":"UTF-8"]); |
|
73 |
pagedWriter.writeEmptyElement("link", ["rel":"stylesheet", "type":"text/css","href":"txm.css"]); |
|
74 |
pagedWriter.writeEmptyElement("link", ["rel":"stylesheet", "type":"text/css","href":"${corpusname}.css"]); |
|
75 |
pagedWriter.writeStartElement("head"); |
|
76 |
pagedWriter.writeStartElement("title") |
|
77 |
pagedWriter.writeCharacters(corpusname+" Edition - Page "+n) |
|
78 |
pagedWriter.writeEndElement(); // </title> |
|
79 |
pagedWriter.writeEndElement() // </head> |
|
80 |
pagedWriter.writeStartElement("body") //<body> |
|
81 |
|
|
82 |
pagedWriter.writeStartElements(tags); |
|
83 |
|
|
84 |
wordid = "w_0"; // default value if no word is found |
|
85 |
return true; |
|
86 |
} catch (Exception e) { |
|
87 |
System.out.println(e.getLocalizedMessage()); |
|
88 |
return false; |
|
89 |
} |
|
90 |
} |
|
91 |
|
|
92 |
private def closeMultiWriter() |
|
93 |
{ |
|
94 |
if (pagedWriter != null) { |
|
95 |
def tags = pagedWriter.getTagStack().clone(); |
|
96 |
|
|
97 |
if (firstWord) { // there was no words |
|
98 |
pagedWriter.writeCharacters(""); |
|
99 |
pagedWriter.write("<span id=\"w_0\"/>"); |
|
100 |
} |
|
101 |
pagedWriter.writeEndElements(); |
|
102 |
pagedWriter.close(); |
|
103 |
return tags; |
|
104 |
} else { |
|
105 |
return []; |
|
106 |
} |
|
107 |
} |
|
108 |
|
|
109 |
private writeImg(String src) { |
|
110 |
pagedWriter.writeStartElement("div"); |
|
111 |
pagedWriter.writeEmptyElement("img", ["src":src, "width":"100%"]); |
|
112 |
pagedWriter.writeEndElement(); // </div> |
|
113 |
} |
|
114 |
|
|
115 |
String wordid = null; |
|
116 |
public def process() { |
|
117 |
|
|
118 |
boolean start = false |
|
119 |
String localname |
|
120 |
|
|
121 |
createNextOutput(); |
|
122 |
|
|
123 |
for (int event = parser.next(); event != XMLStreamConstants.END_DOCUMENT; event = parser.next()) { |
|
124 |
switch (event) { |
|
125 |
case XMLStreamConstants.START_ELEMENT: |
|
126 |
localname = parser.getLocalName(); |
|
127 |
switch (localname) { |
|
128 |
case "text": |
|
129 |
start = true |
|
130 |
break; |
|
131 |
case "w": |
|
132 |
if (firstWord) { |
|
133 |
wordid = parser.getAttributeValue(null, "id"); |
|
134 |
//println "found word: $wordid" |
|
135 |
firstWord = false; |
|
136 |
} |
|
137 |
break; |
|
138 |
case tag: |
|
139 |
if (debug) println "** TAG $tag $attribute : "+parser.getAttributeValue(null, "id"); |
|
140 |
String imgPath = parser.getAttributeValue(null, attribute); |
|
141 |
if (imgPath == null) { |
|
142 |
println "ERROR in $xmlFile no value found for $tag@$attribute at location "+parser.getLocation().getLineNumber() |
|
143 |
} else { |
|
144 |
if (cutBefore) { |
|
145 |
if (debug) println " cut before" |
|
146 |
createNextOutput() |
|
147 |
if (debug) println " write img $imgPath" |
|
148 |
writeImg(imgPath) |
|
149 |
} else { |
|
150 |
if (debug) println " write img $imgPath" |
|
151 |
writeImg(imgPath) |
|
152 |
if (debug) println " cut after" |
|
153 |
createNextOutput() |
|
154 |
} |
|
155 |
} |
|
156 |
//firstWord = true; |
|
157 |
break; |
|
158 |
} |
|
159 |
break; |
|
160 |
} |
|
161 |
} |
|
162 |
if (parser != null) parser.close(); |
|
163 |
if (inputData != null) inputData.close(); |
|
164 |
closeMultiWriter() |
|
165 |
pages << ["$n", wordid] // add the last page (no pb encountered |
|
166 |
return pages |
|
167 |
} |
|
168 |
} |
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/edition/FacsEditionBuilderMacro.groovy (revision 1948) | ||
---|---|---|
1 |
package org.txm.macro.edition |
|
2 |
|
|
3 |
// STANDARD DECLARATIONS |
|
4 |
|
|
5 |
import org.kohsuke.args4j.* |
|
6 |
import groovy.transform.Field |
|
7 |
import org.txm.rcp.swt.widget.parameters.* |
|
8 |
import org.txm.objects.* |
|
9 |
import org.txm.searchengine.cqp.corpus.*; |
|
10 |
import org.w3c.dom.* |
|
11 |
import org.txm.utils.xml.DomUtils |
|
12 |
import org.txm.Toolbox |
|
13 |
|
|
14 |
import org.w3c.dom.Document; |
|
15 |
import org.xml.sax.SAXException; |
|
16 |
import org.txm.rcp.commands.* |
|
17 |
import javax.xml.parsers.*; |
|
18 |
import javax.xml.transform.*; |
|
19 |
import javax.xml.transform.dom.DOMSource; |
|
20 |
import javax.xml.transform.stream.StreamResult; |
|
21 |
|
|
22 |
|
|
23 |
try { println "ARGS=$args";} catch(Exception e) {args = [:]} |
|
24 |
|
|
25 |
def corpus = null |
|
26 |
if (args.size() == 0) { |
|
27 |
if (!(corpusViewSelection instanceof MainCorpus)) { |
|
28 |
println "This marcro works with a MainCorpus selection. Aborting" |
|
29 |
return; |
|
30 |
} |
|
31 |
println "Working on $corpusViewSelection corpus" |
|
32 |
corpus = corpusViewSelection |
|
33 |
} else { |
|
34 |
monitor = args["monitor"]; |
|
35 |
} |
|
36 |
|
|
37 |
// BEGINNING OF PARAMETERS |
|
38 |
@Field @Option(name="editionName", usage="the edition name to create", widget="String", required=true, def="facs") |
|
39 |
def editionName |
|
40 |
|
|
41 |
@Field @Option(name="tag", usage="The tag to upgrade", widget="String", required=true, def="pb") |
|
42 |
def tag |
|
43 |
|
|
44 |
@Field @Option(name="attribute", usage="The attribute to add", widget="String", required=true, def="facs") |
|
45 |
def attribute |
|
46 |
|
|
47 |
@Field @Option(name="debug", usage="Debug mode", widget="Boolean", required=false, def="true") |
|
48 |
def debug = false |
|
49 |
|
|
50 |
// END OF params |
|
51 |
if (args.size() == 0) { |
|
52 |
if (!ParametersDialog.open(this)) return; |
|
53 |
} else { |
|
54 |
monitor = args["monitor"]; |
|
55 |
attribute = args["attribute"] |
|
56 |
tag = args["tag"] |
|
57 |
editionName = args["editionName"] |
|
58 |
} |
|
59 |
|
|
60 |
if (args.size() == 0) { // the "corpus" variable exists |
|
61 |
binDirectory = corpus.getProject().getProjectDirectory() |
|
62 |
txmDirectory = new File(binDirectory, "txm/"+corpus.getID().toUpperCase()) |
|
63 |
tokenizedDirectory = new File(binDirectory, "tokenized") |
|
64 |
HTMLDirectory = new File(binDirectory, "HTML") |
|
65 |
newEditionDirectory = new File(HTMLDirectory, corpus.getID().toUpperCase()+"/"+editionName) |
|
66 |
params = corpus.getProject().params |
|
67 |
} else { // the "corpus" variable does not exist, get informations from the "args" map |
|
68 |
binDirectory = args["binDirectory"] |
|
69 |
txmDirectory = args["txmDirectory"] |
|
70 |
params = args["params"] |
|
71 |
tokenizedDirectory = new File(binDirectory, "tokenized") |
|
72 |
HTMLDirectory = new File(binDirectory, "HTML") |
|
73 |
corpusName = params.getCorpusName() |
|
74 |
newEditionDirectory = new File(HTMLDirectory, corpusName.toUpperCase()+"/"+editionName) |
|
75 |
} |
|
76 |
|
|
77 |
println "Parameters:" |
|
78 |
println " attribute = $attribute" |
|
79 |
println " tag = $tag" |
|
80 |
println " editionName = $editionName" |
|
81 |
|
|
82 |
if (!HTMLDirectory.exists()) { |
|
83 |
println "ERROR: can't find this corpus 'HTML' directory: $HTMLDirectory. Aborting" |
|
84 |
return false; |
|
85 |
} |
|
86 |
|
|
87 |
File workDirectory = txmDirectory |
|
88 |
if (!workDirectory.exists()) { |
|
89 |
println "XML-TXM directory ($txmDirectory) not found. Using XML tokenized directory instead: "+tokenizedDirectory |
|
90 |
workDirectory = tokenizedDirectory |
|
91 |
} |
|
92 |
if (!workDirectory.exists()) { |
|
93 |
println "XML tokenized directory not found: "+tokenizedDirectory |
|
94 |
println "Aborting." |
|
95 |
return false |
|
96 |
} |
|
97 |
|
|
98 |
println "Working directory=$workDirectory" |
|
99 |
|
|
100 |
//0- clean previous edition if any : html files, import.xml |
|
101 |
if (newEditionDirectory.exists()) { |
|
102 |
println "** Old version of $editionName edition found." |
|
103 |
println " removing the 'edition' reference from the corpus configuration." |
|
104 |
File tempParam = new File(binDirectory, "import.xml.cpy") |
|
105 |
RemoveTag rt = new RemoveTag( |
|
106 |
params.root.getOwnerDocument(), // will be updated |
|
107 |
null, // don't create a new import.xml |
|
108 |
"//edition[@name='$editionName']" |
|
109 |
) |
|
110 |
println " delete $newEditionDirectory" |
|
111 |
newEditionDirectory.deleteDir() |
|
112 |
|
|
113 |
//printDOM(params.root.getOwnerDocument()) |
|
114 |
} |
|
115 |
|
|
116 |
//2- fix import.xml |
|
117 |
println "** Updating corpus configuration..." |
|
118 |
// for edition list |
|
119 |
def corpusElem = params.getCorpusElement() |
|
120 |
params.addEditionDefinition(corpusElem, editionName, "groovy", "FacsEditionBuilderMacro"); |
|
121 |
|
|
122 |
//1- create HTML files |
|
123 |
println "** Building new edition HTML files..." |
|
124 |
|
|
125 |
println " Creating edition '$editionName' directory: '$newEditionDirectory'" |
|
126 |
newEditionDirectory.mkdir() |
|
127 |
for (def xmlFile : workDirectory.listFiles()) { |
|
128 |
if (xmlFile.isHidden() || xmlFile.isDirectory()) continue // ignore |
|
129 |
String txtname = xmlFile.getName() |
|
130 |
int idx = txtname.lastIndexOf(".") |
|
131 |
if (idx > 0) txtname = txtname.substring(0,idx) |
|
132 |
|
|
133 |
// create a page per "pb" element found in the xml-txm file |
|
134 |
println " Building HTML pages of text=$txtname" |
|
135 |
BuildFacsEditions builder = new BuildFacsEditions(xmlFile, newEditionDirectory, params.name, txtname, tag, attribute, debug); |
|
136 |
def newPages = builder.process() |
|
137 |
if (newPages == null || newPages.size() == 0) { |
|
138 |
println "WARNING: no edition files created with $xmlFile" |
|
139 |
} |
|
140 |
|
|
141 |
println " Building edition references in corpus configuration" |
|
142 |
|
|
143 |
Element corpusElement = params.getCorpusElement(); |
|
144 |
//println "corpusElement $corpusElement" |
|
145 |
Element textsElem = params.getTextsElement(corpusElement); |
|
146 |
//println "textsElem $textsElem" |
|
147 |
Element textElem = params.getTextElement(textsElem, txtname); |
|
148 |
if (textElem == null) { // just in case |
|
149 |
textElem = params.addText(textsElem, txtname, xmlFile) |
|
150 |
} |
|
151 |
//println "textElem $textElem with txtname=$txtname" |
|
152 |
Element editionElem = params.addEdition(textElem, editionName, newEditionDirectory.getAbsolutePath(), "html"); // add a new edition to the "text" node |
|
153 |
for (def pagedef : newPages) { |
|
154 |
params.addPage(editionElem, pagedef[0], pagedef[1]); |
|
155 |
} |
|
156 |
} |
|
157 |
|
|
158 |
//printDOM(params.root.getOwnerDocument()) |
|
159 |
|
|
160 |
//3- Save and reload the corpus |
|
161 |
println " Saving corpus configuration..." |
|
162 |
File paramFile = new File(binDirectory, "import.xml"); |
|
163 |
DomUtils.save(params.root.getOwnerDocument(), paramFile); |
|
164 |
|
|
165 |
|
|
166 |
//4- Reload Corpora |
|
167 |
if (args.size() == 0) { |
|
168 |
Toolbox.restart(); |
|
169 |
monitor.syncExec(new Runnable() { |
|
170 |
public void run() { |
|
171 |
RestartTXM.reloadViews(); |
|
172 |
} |
|
173 |
}); |
|
174 |
} |
|
175 |
|
|
176 |
//5- Done |
|
177 |
println "New edition created." |
|
178 |
|
|
179 |
//printDOM(params.root.getOwnerDocument()) |
|
180 |
|
|
181 |
|
|
182 |
|
|
183 |
def printDOM(def doc) { |
|
184 |
if (!debug) return; |
|
185 |
try { |
|
186 |
// Création de la source DOM |
|
187 |
Source source = new DOMSource(doc); |
|
188 |
|
|
189 |
// Création du fichier de sortie |
|
190 |
StreamResult resultat = new StreamResult(new PrintWriter(System.out)); |
|
191 |
|
|
192 |
// Configuration du transformer |
|
193 |
TransformerFactory fabrique = new net.sf.saxon.TransformerFactoryImpl(); |
|
194 |
Transformer transformer = fabrique.newTransformer(); |
|
195 |
transformer.setOutputProperty(OutputKeys.METHOD, "xml"); //$NON-NLS-1$ |
|
196 |
transformer.setOutputProperty(OutputKeys.INDENT, "yes"); //$NON-NLS-1$ |
|
197 |
transformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8"); //$NON-NLS-1$ |
|
198 |
|
|
199 |
// Transformation |
|
200 |
transformer.transform(source, resultat); |
|
201 |
// writer.close(); |
|
202 |
return true; |
|
203 |
} catch (Exception e) { |
|
204 |
e.printStackTrace(); |
|
205 |
return false; |
|
206 |
} |
|
207 |
} |
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/edition/EditionUpdaterMacro.groovy (revision 1948) | ||
---|---|---|
1 |
package org.txm.macro.edition |
|
2 |
// STANDARD DECLARATIONS |
|
3 |
|
|
4 |
import org.kohsuke.args4j.* |
|
5 |
import groovy.transform.Field |
|
6 |
import org.txm.rcp.swt.widget.parameters.* |
|
7 |
import org.txm.objects.* |
|
8 |
import org.txm.searchengine.cqp.corpus.*; |
|
9 |
import org.w3c.dom.* |
|
10 |
import org.txm.scripts.importer.* |
|
11 |
import org.txm.Toolbox |
|
12 |
import java.io.* |
|
13 |
|
|
14 |
import org.w3c.dom.Document; |
|
15 |
import org.xml.sax.SAXException; |
|
16 |
import org.txm.rcp.commands.* |
|
17 |
import javax.xml.parsers.*; |
|
18 |
import javax.xml.transform.*; |
|
19 |
import javax.xml.transform.dom.DOMSource; |
|
20 |
import javax.xml.transform.stream.StreamResult; |
|
21 |
import org.txm.utils.io.FileCopy |
|
22 |
|
|
23 |
try { println "ARGS=$args";} catch(Exception e) {args = [:]} |
|
24 |
|
|
25 |
def corpus = null |
|
26 |
if (args.size() == 0) { |
|
27 |
if (!(corpusViewSelection instanceof MainCorpus)) { |
|
28 |
println "This marcro works with a MainCorpus selection. Aborting" |
|
29 |
return; |
|
30 |
} |
|
31 |
println "Working on $corpusViewSelection corpus" |
|
32 |
corpus = corpusViewSelection |
|
33 |
} else { |
|
34 |
monitor = args["monitor"]; |
|
35 |
} |
|
36 |
|
|
37 |
// BEGINNING OF params |
|
38 |
|
|
39 |
@Field @Option(name="xslEdition", usage="XSL to build the HTML edition - if empty uses 'txm-edition-xtz.xsl'", widget="File", required=false, def="txm-edition-xtz.xsl") |
|
40 |
def xslEdition |
|
41 |
|
|
42 |
@Field @Option(name="xslPages", usage="XSL to build the edition pages - if empty uses 'txm-edition-page-split.xsl'", widget="File", required=false, def="txm-edition-page-split.xsl") |
|
43 |
def xslPages |
|
44 |
|
|
45 |
@Field @Option(name="editionName", usage="The edition name to produce", widget="String", required=false, def="default") |
|
46 |
String editionName |
|
47 |
|
|
48 |
@Field @Option(name="useTokenizedDirectory", usage="Use the 'XML/w' of the 'tokenized' directory instead of the 'XML-TXM' files", widget="Boolean", required=false, def="false") |
|
49 |
def useTokenizedDirectory = false |
|
50 |
|
|
51 |
@Field @Option(name="debug", usage="Enable debug mode: temporary files are not deleted", widget="Boolean", required=false, def="false") |
|
52 |
def debug = false |
|
53 |
|
|
54 |
// END OF params |
|
55 |
if (args.size() == 0) { |
|
56 |
if (!ParametersDialog.open(this)) return; |
|
57 |
} else { |
|
58 |
monitor = args["monitor"]; |
|
59 |
xslEdition = new File(args["xslEdition"]) |
|
60 |
xslPages = new File(args["xslPages"]) |
|
61 |
editionName = args["editionName"] |
|
62 |
useTokenizedDirectory = args["useTokenizedDirectory"] |
|
63 |
} |
|
64 |
|
|
65 |
if (args.size() == 0) { // corpus informations |
|
66 |
defaultEditionName = corpus.getDefaultEdition() |
|
67 |
corpusName = corpus.getID() |
|
68 |
binDirectory = corpus.getProject().getProjectDirectory() |
|
69 |
txmDirectory = new File(binDirectory, "txm/"+corpusName.toUpperCase()) |
|
70 |
params = corpus.getProject().params |
|
71 |
} else { // the "corpus" variable does not exists |
|
72 |
defaultEditionName = "default" |
|
73 |
binDirectory = args["binDirectory"] |
|
74 |
txmDirectory = args["txmDirectory"] |
|
75 |
params = args["params"] |
|
76 |
corpusName = params.getCorpusName() |
|
77 |
} |
|
78 |
|
|
79 |
if (editionName == null || editionName.length() == 0) { |
|
80 |
editionName = defaultEditionName |
|
81 |
} |
|
82 |
println "Parameters:" |
|
83 |
println " xslEdition = $xslEdition" |
|
84 |
println " xslPages = $xslPages" |
|
85 |
println " editionName = $editionName" |
|
86 |
println " useTokenizedDirectory = $useTokenizedDirectory" |
|
87 |
|
|
88 |
File TXMHOME = new File(Toolbox.getTxmHomePath()) |
|
89 |
File xslDirectory = new File(TXMHOME, "xsl") |
|
90 |
if (xslEdition == null || xslEdition.getName() == "txm-edition-xtz.xsl") |
|
91 |
xslEdition = new File(xslDirectory, "txm-edition-xtz.xsl") |
|
92 |
if (xslPages == null || xslPages.getName() == "txm-edition-page-split.xsl") |
|
93 |
xslPages = new File(xslDirectory, "txm-edition-page-split.xsl") |
|
94 |
|
|
95 |
println "XSLs: "+xslEdition.getName()+" & "+ xslPages.getName() |
|
96 |
println "" |
|
97 |
|
|
98 |
|
|
99 |
if (useTokenizedDirectory) { |
|
100 |
println "Using the 'tokenized' directory instead of the 'txm' directory to get XML files" |
|
101 |
txmDirectory = new File(binDirectory, "tokenized") |
|
102 |
} |
|
103 |
File HTMLDirectory = new File(binDirectory, "HTML") |
|
104 |
File HTMLCorpusDirectory = new File(HTMLDirectory, corpusName.toUpperCase()) |
|
105 |
File defaultEditionDirectory = new File(HTMLCorpusDirectory, editionName) |
|
106 |
|
|
107 |
File cssDirectory = new File(TXMHOME, "css") |
|
108 |
File cssDefaultEditionDirectory = new File(defaultEditionDirectory, "css") |
|
109 |
File cssTXM = new File(cssDirectory, "txm.css") |
|
110 |
File cssTEI = new File(cssDirectory, "tei.css") |
|
111 |
boolean newEdition = false; |
|
112 |
|
|
113 |
if (!txmDirectory.exists()) { |
|
114 |
println "ERROR: can't find this corpus 'txm' directory: $txmDirectory. Aborting" |
|
115 |
return false; |
|
116 |
} |
|
117 |
if (!defaultEditionDirectory.exists()) { |
|
118 |
println "This is a new edition" |
|
119 |
newEdition = true; |
|
120 |
defaultEditionDirectory.mkdir() |
|
121 |
if (!defaultEditionDirectory.exists()) { |
|
122 |
println "HTML directory could be created: $defaultEditionDirectory. Aborting" |
|
123 |
return false |
|
124 |
} |
|
125 |
} |
|
126 |
if (!cssTXM.exists()) { |
|
127 |
println "WARNING: can't find the $cssTXM CSS file. Aborting" |
|
128 |
return false; |
|
129 |
} |
|
130 |
if (!cssTEI.exists()) { |
|
131 |
println "WARNING: can't find the $cssTEI CSS file. Aborting" |
|
132 |
return false; |
|
133 |
} |
|
134 |
if (!xslEdition.exists()) { |
|
135 |
println "Error: can't find $xslEdition XSL file" |
|
136 |
return false; |
|
137 |
} |
|
138 |
if (!xslPages.exists()) { |
|
139 |
println "Error: can't find $xslPages XSL file" |
|
140 |
return false; |
|
141 |
} |
|
142 |
|
|
143 |
defaultEditionDirectory.mkdir() |
|
144 |
cssDefaultEditionDirectory.mkdir() |
|
145 |
FileCopy.copy(cssTXM, new File(cssDefaultEditionDirectory, cssTXM.getName())) |
|
146 |
FileCopy.copy(cssTEI, new File(cssDefaultEditionDirectory, cssTEI.getName())) |
|
147 |
|
|
148 |
//1- Back up current "HTML" directory |
|
149 |
if (!newEdition) { |
|
150 |
File backupDirectory = new File(binDirectory, "HTML-"+defaultEditionDirectory.getName()+"-back") |
|
151 |
backupDirectory.mkdir() |
|
152 |
println "Backup of $defaultEditionDirectory directory to $backupDirectory..." |
|
153 |
for (File f : defaultEditionDirectory.listFiles()) { |
|
154 |
String name = f.getName() |
|
155 |
if (f.isDirectory() || f.isHidden()) continue |
|
156 |
|
|
157 |
File rez = new File(backupDirectory, f.getName()) |
|
158 |
|
|
159 |
if (debug) println " file $f >> $rez" |
|
160 |
else print "." |
|
161 |
|
|
162 |
if (!FileCopy.copy(f, rez)) { |
|
163 |
println "Error: failed to backup $f" |
|
164 |
return false; |
|
165 |
} |
|
166 |
} |
|
167 |
println "" |
|
168 |
} |
|
169 |
|
|
170 |
//2- Apply edition XSL |
|
171 |
println "Applying XSL 1: $xslEdition..." |
|
172 |
ApplyXsl2 applier = new ApplyXsl2(xslEdition); |
|
173 |
def htmlFiles = [] |
|
174 |
for (File f : txmDirectory.listFiles()) { |
|
175 |
String name = f.getName() |
|
176 |
String txtname = name.substring(0, name.lastIndexOf(".")); |
|
177 |
File rez = new File(HTMLCorpusDirectory, txtname+".html") |
|
178 |
|
|
179 |
if (!f.isDirectory() && !f.isHidden() && name.endsWith(".xml") && !name.equals("import.xml")) { |
|
180 |
|
|
181 |
if (debug) println " file $f >> $rez" |
|
182 |
else print "." |
|
183 |
|
|
184 |
if (!applier.process(f, rez)) { |
|
185 |
println "Error: failed to process $f" |
|
186 |
return false |
|
187 |
} else { |
|
188 |
htmlFiles << rez |
|
189 |
} |
|
190 |
} |
|
191 |
} |
|
192 |
println "" |
|
193 |
|
|
194 |
//3- Apply pages XSL |
|
195 |
println "Applying XSL 2: $xslPages..." |
|
196 |
ApplyXsl2 applier2 = new ApplyXsl2(xslPages); |
|
197 |
applier2.setParam("editionname", editionName) |
|
198 |
applier2.setParam("cssname", corpusName) |
|
199 |
for (File f : htmlFiles) { |
|
200 |
String name = f.getName() |
|
201 |
String txtname = name.substring(0, name.lastIndexOf(".")); |
|
202 |
File rez = new File(defaultEditionDirectory, txtname+"-pages.html") |
|
203 |
|
|
204 |
if (debug) println " file $f >> $rez" |
|
205 |
else print "." |
|
206 |
|
|
207 |
if (!applier2.process(f, rez)) { |
|
208 |
println "Error: failed to process $f" |
|
209 |
return false |
|
210 |
} else { |
|
211 |
if (!debug) rez.delete() |
|
212 |
} |
|
213 |
} |
|
214 |
println "" |
|
215 |
|
|
216 |
// clean temp files |
|
217 |
if (!debug) { |
|
218 |
for (File f : htmlFiles) { |
|
219 |
f.delete() |
|
220 |
} |
|
221 |
} |
|
222 |
|
|
223 |
//4- register new edition if any (copy edition) |
|
224 |
|
|
225 |
if (editionName != defaultEditionName) { |
|
226 |
println "Update corpus configuration" |
|
227 |
|
|
228 |
// remove edition declaration if any |
|
229 |
RemoveTag rt = new RemoveTag( |
|
230 |
params.root.getOwnerDocument(), // will be updated |
|
231 |
null, // don't create a new import.xml |
|
232 |
"//edition[@name='$editionName']" |
|
233 |
) |
|
234 |
|
|
235 |
def corpusElem = params.getCorpusElement() |
|
236 |
params.addEditionDefinition(corpusElem, editionName, "xsl", "XSLEditionBuilder"); // declare the new edition |
|
237 |
|
|
238 |
for (def text : corpus.getTexts()) { |
|
239 |
Element textElem = text.getSelfElement() |
|
240 |
def defaultEdition = text.getEdition(defaultEditionName) |
|
241 |
if (defaultEdition == null) { println "Error: no default edition with name="+defaultEditionName; return false} |
|
242 |
Element editionElem = params.addEdition(textElem, editionName, defaultEditionDirectory.getAbsolutePath(), "html"); |
|
243 |
def pages = defaultEdition.getPages() |
|
244 |
for (int i = 1 ; i <= pages.size() ; i++) { |
|
245 |
def page = pages[i-1] |
|
246 |
params.addPage(editionElem, "$i", page.getWordId()); |
|
247 |
} |
|
248 |
} |
|
249 |
|
|
250 |
File paramFile = new File(binDirectory, "import.xml"); |
|
251 |
DomUtils.save(params.root.getOwnerDocument(), paramFile); |
|
252 |
} |
|
253 |
|
|
254 |
//5- Reload Corpora |
|
255 |
if (args.size() == 0) { // the user run the macro |
|
256 |
Toolbox.restart(); |
|
257 |
monitor.syncExec(new Runnable() { |
|
258 |
public void run() { |
|
259 |
RestartTXM.reloadViews(); |
|
260 |
} |
|
261 |
}); |
|
262 |
} |
|
263 |
println "New edition created." |
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/misc/RenameFilesMacro.groovy (revision 1948) | ||
---|---|---|
1 |
package org.txm.macro.misc |
|
1 | 2 |
// STANDARD DECLARATIONS |
2 |
package org.txm.macro.misc |
|
3 | 3 |
|
4 |
// README this macros needs the jtidy JAR library |
|
5 |
|
|
6 |
import groovy.xml.QName |
|
7 |
|
|
8 |
import java.nio.charset.Charset |
|
9 |
import java.text.DecimalFormat |
|
10 |
import org.txm.utils.xml.DomUtils; |
|
11 |
import org.txm.importer.ValidateXml; |
|
12 |
import org.w3c.tidy.Tidy |
|
13 |
import groovy.util.XmlParser |
|
4 | 14 |
import org.kohsuke.args4j.* |
5 | 15 |
import groovy.transform.Field |
6 | 16 |
import org.txm.rcp.swt.widget.parameters.* |
7 | 17 |
|
8 | 18 |
// BEGINNING OF PARAMETERS |
19 |
@Field @Option(name="rootDir", usage="The directory contains the 'orig' directory which contains the html files", widget="Folder", required=false, def="/path") |
|
20 |
File rootDir = new File(""); |
|
9 | 21 |
|
10 |
@Field @Option(name="inputDirectory",usage="TXT directory", widget="Folder", required=false, def="txt")
|
|
11 |
File inputDirectory;
|
|
22 |
@Field @Option(name="encoding", usage="HTML encoding", widget="String", required=false, def="iso-8859-1")
|
|
23 |
String encoding = "iso-8859-1" // HTML files encoding
|
|
12 | 24 |
|
13 |
// **change this parameter** |
|
14 |
@Field @Option(name="extension",usage="Regexp de l'extension des fichiers à modifier", widget="String", required=true, def='\\.txt') |
|
15 |
def extension = "\\.trs" |
|
25 |
@Field @Option(name="debug", usage="show debug messages. Values = true, false", widget="String", required=false, def="false") |
|
26 |
def debug = "true" // set true to debug the script |
|
16 | 27 |
|
17 |
// **change this parameter** |
|
18 |
@Field @Option(name="find",usage="Expression régulière", widget="String", required=true, def='’') |
|
19 |
def find = "Bobine" |
|
20 |
|
|
21 |
// **change this parameter** |
|
22 |
@Field @Option(name="replaceWith",usage="Chaîne de remplacement", widget="String", required=false, def='\'') |
|
23 |
def replaceWith = "" |
|
24 |
|
|
25 |
|
|
26 | 28 |
// Open the parameters input dialog box |
27 | 29 |
if (!ParametersDialog.open(this)) return; |
28 | 30 |
|
31 |
debug = ("true" == debug) |
|
29 | 32 |
// END OF PARAMETERS |
30 | 33 |
|
31 |
println "In $inputDirectory..." |
|
32 |
inputDirectory.eachFileMatch(~/.*$extension/) { file -> // for each file matching extension |
|
33 |
println " renaming: "+file.getName() |
|
34 |
String name = file.getName() |
|
35 |
name = name.replaceAll(find, replaceWith) |
|
36 |
file.renameTo(new File(file.getParentFile(), name)) |
|
37 |
} |
Formats disponibles : Unified diff